Commit d5f09f56e0
Changed files (4)
src
codegen
test
behavior
x86_64
src/codegen/x86_64/abi.zig
@@ -110,7 +110,9 @@ pub const Class = enum {
}
};
-pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target) Class {
+pub const Context = enum { ret, arg, other };
+
+pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) Class {
// https://docs.microsoft.com/en-gb/cpp/build/x64-calling-convention?view=vs-2017
// "There's a strict one-to-one correspondence between a function call's arguments
// and the registers used for those arguments. Any argument that doesn't fit in 8
@@ -148,8 +150,9 @@ pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target) Class {
},
.float => switch (ty.floatBits(target)) {
- 16, 32, 64, 128 => .sse,
+ 16, 32, 64 => .sse,
80 => .memory,
+ 128 => if (ctx == .arg) .memory else .sse,
else => unreachable,
},
.vector => .sse,
@@ -166,8 +169,6 @@ pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target) Class {
};
}
-pub const Context = enum { ret, arg, other };
-
/// There are a maximum of 8 possible return slots. Returned values are in
/// the beginning of the array; unused slots are filled with .none.
pub fn classifySystemV(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) [8]Class {
src/codegen/x86_64/CodeGen.zig
@@ -2292,7 +2292,7 @@ fn genBodyBlock(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
- @setEvalBranchQuota(29_600);
+ @setEvalBranchQuota(31_000);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@@ -4168,6 +4168,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -4201,6 +4202,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -4212,7 +4246,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
@@ -4227,15 +4261,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -4247,7 +4282,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
@@ -4262,15 +4297,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -4282,7 +4318,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
@@ -4297,13 +4333,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@@ -14775,6 +14919,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -14808,6 +14953,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -14819,7 +14997,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } },
@@ -14834,15 +15012,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -14854,7 +15033,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } },
@@ -14869,15 +15048,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -14889,7 +15069,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } },
@@ -14904,13 +15084,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@@ -24415,6 +24703,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -24448,6 +24737,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -24459,7 +24781,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
@@ -24474,15 +24796,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -24494,7 +24817,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
@@ -24509,15 +24832,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -24529,7 +24853,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
@@ -24544,13 +24868,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@@ -26350,18 +26782,53 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .add, .tmp0p, .sa(.src0, .add_elem_size), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
- }, .{
- .required_features = .{ .f16c, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .word, .is = .word } },
- .{ .scalar_float = .{ .of = .word, .is = .word } },
- .any,
+ } }) catch |err| switch (err) {
+ error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
+ @tagName(air_tag),
+ ty.fmt(pt),
+ ops[0].tracking(cg),
+ ops[1].tracking(cg),
+ }),
+ else => |e| return e,
+ };
+ res[0].wrapInt(cg) catch |err| switch (err) {
+ error.SelectFailed => return cg.fail("failed to select {s} wrap {f} {f}", .{
+ @tagName(air_tag),
+ cg.typeOf(bin_op.lhs).fmt(pt),
+ res[0].tracking(cg),
+ }),
+ else => |e| return e,
+ };
+ try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
+ },
+ .mul_sat => |air_tag| {
+ const bin_op = air_datas[@intFromEnum(inst)].bin_op;
+ var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+ var res: [1]Temp = undefined;
+ cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
+ .src_constraints = .{ .{ .exact_signed_int = 8 }, .{ .exact_signed_int = 8 }, .any },
+ .patterns = &.{
+ .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
},
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .src1b, ._, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
.patterns = &.{
- .{ .src = .{ .to_sse, .to_sse, .none } },
+ .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .f32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } },
+ .{ .type = .i8, .kind = .{ .rc = .gphi } },
.unused,
.unused,
.unused,
@@ -26373,30 +26840,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
- .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ },
- .{ ._, .v_ss, .mul, .dst0x, .dst0x, .tmp0d, ._ },
- .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ .{ ._, .i_, .mul, .src1b, ._, ._, ._ },
+ .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ },
+ .{ ._, ._r, .sa, .tmp0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp0b, .dst0h, ._, ._ },
+ .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
+ .{ .@"1:", ._r, .sa, .dst0w, .ui(15), ._, ._ },
+ .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .word, .is = .word } },
- .{ .scalar_float = .{ .of = .word, .is = .word } },
- .any,
- },
+ .src_constraints = .{ .{ .exact_unsigned_int = 8 }, .{ .exact_unsigned_int = 8 }, .any },
.patterns = &.{
- .{ .src = .{
- .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } },
- .{ .to_param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } },
- .none,
- } },
+ .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
},
- .call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } },
+ .{ .type = .u8, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -26409,25 +26873,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .mul, .src1b, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .@"or", .dst0b, .tmp0b, ._, ._ },
} },
}, .{
- .required_features = .{ .f16c, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .qword, .is = .word } },
- .{ .scalar_float = .{ .of = .qword, .is = .word } },
- .any,
- },
+ .required_features = .{ .cmov, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
.patterns = &.{
- .{ .src = .{ .mem, .mem, .none } },
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
+ .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .vector_4_f32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } },
+ .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -26439,28 +26900,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
- .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ },
- .{ ._, .v_ps, .mul, .dst0x, .dst0x, .tmp0x, ._ },
- .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ },
+ .{ ._, ._, .mul, .src1b, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .cmp, .dst0w, .tmp0w, ._, ._ },
+ .{ ._, ._a, .cmov, .dst0d, .tmp0d, ._, ._ },
} },
}, .{
- .required_features = .{ .f16c, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .word } },
- .{ .scalar_float = .{ .of = .xword, .is = .word } },
- .any,
- },
+ .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
.patterns = &.{
- .{ .src = .{ .mem, .mem, .none } },
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
+ .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .vector_8_f32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } },
+ .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -26472,27 +26928,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ },
- .{ ._, .v_ps, .cvtph2, .tmp0y, .src1x, ._, ._ },
- .{ ._, .v_ps, .mul, .dst0y, .dst0y, .tmp0y, ._ },
- .{ ._, .v_, .cvtps2ph, .dst0x, .dst0y, .rm(.{}), ._ },
+ .{ ._, ._, .mul, .src1b, ._, ._, ._ },
+ .{ ._, ._, .cmp, .dst0w, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._na, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
} },
}, .{
- .required_features = .{ .f16c, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } },
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } },
- .any,
- },
+ .required_features = .{ .fast_imm16, null, null, null },
+ .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
- .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .{ .type = .i16, .kind = .{ .reg = .dx } },
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
@@ -26502,33 +26957,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_ps, .cvtph2, .tmp2y, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_ps, .mul, .tmp1y, .tmp1y, .tmp2y, ._ },
- .{ ._, .v_, .cvtps2ph, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1y, .rm(.{}), ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
+ .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .any,
- },
+ .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
- .call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } },
+ .{ .type = .i16, .kind = .{ .reg = .dx } },
+ .unused,
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
@@ -26537,34 +26987,29 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ },
- .{ ._, .vp_w, .insr, .tmp1x, .tmp2x, .memia(.src0w, .tmp0, .add_unaligned_size), .ui(0) },
- .{ ._, .vp_w, .insr, .tmp2x, .tmp2x, .memia(.src1w, .tmp0, .add_unaligned_size), .ui(0) },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .vp_w, .extr, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1x, .ui(0), ._ },
- .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
+ .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
} },
}, .{
- .required_features = .{ .sse4_1, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .any,
- },
+ .required_features = .{ .fast_imm16, null, null, null },
+ .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
- .call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } },
+ .{ .type = .i16, .kind = .{ .reg = .dx } },
+ .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
@@ -26573,36 +27018,33 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
- .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
- .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), .ui(0), ._ },
- .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_unaligned_size), .ui(0), ._ },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .p_w, .extr, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1x, .ui(0), ._ },
- .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ },
+ .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
+ .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
+ .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .any,
- },
+ .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
- .call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } },
- .{ .type = .f16, .kind = .{ .reg = .ax } },
+ .{ .type = .i16, .kind = .{ .reg = .dx } },
+ .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
+ .unused,
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
@@ -26610,154 +27052,90 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
- .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
- .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), .ui(0), ._ },
- .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_unaligned_size), .ui(0), ._ },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .p_w, .extr, .tmp4d, .tmp1x, .ui(0), ._ },
- .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp4w, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ },
+ .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
+ .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
+ .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
- .any,
- },
+ .src_constraints = .{ .{ .exact_unsigned_int = 16 }, .{ .exact_unsigned_int = 16 }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
- .call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f16, .kind = .{ .reg = .ax } },
- .{ .type = .f32, .kind = .mem },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } },
+ .{ .type = .u16, .kind = .{ .reg = .dx } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
.unused,
.unused,
- },
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
- .each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ },
- .{ ._, ._ss, .mov, .tmp3x, .mem(.tmp2d), ._, ._ },
- .{ ._, ._, .movzx, .tmp1d, .memia(.src1w, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ },
- .{ ._, ._ss, .mov, .tmp4x, .mem(.tmp2d), ._, ._ },
- .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
- .{ ._, ._ss, .mov, .mem(.tmp2d), .tmp3x, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .mem(.tmp2d), ._, ._ },
- .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .dword, .is = .dword } },
- .{ .scalar_float = .{ .of = .dword, .is = .dword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
- },
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
- .each = .{ .once = &.{
- .{ ._, .v_ss, .mul, .dst0x, .src0x, .src1d, ._ },
- } },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .dword, .is = .dword } },
- .{ .scalar_float = .{ .of = .dword, .is = .dword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_mut_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_mut_sse, .to_sse, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._ss, .mul, .dst0x, .src1d, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .dword } },
- .{ .scalar_float = .{ .of = .xword, .is = .dword } },
- .any,
- },
+ .required_features = .{ .bmi, .cmov, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
.patterns = &.{
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
- },
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
- .each = .{ .once = &.{
- .{ ._, .v_ps, .mul, .dst0x, .src0x, .src1x, ._ },
- } },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .dword } },
- .{ .scalar_float = .{ .of = .xword, .is = .dword } },
- .any,
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
- .patterns = &.{
- .{ .src = .{ .to_mut_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_mut_sse, .to_sse, .none } },
+ .extra_temps = .{
+ .{ .type = .u16, .kind = .{ .reg = .dx } },
+ .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._ps, .mul, .dst0x, .src1x, ._, ._ },
- } },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .yword, .is = .dword } },
- .{ .scalar_float = .{ .of = .yword, .is = .dword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
- },
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
- .each = .{ .once = &.{
- .{ ._, .v_ps, .mul, .dst0y, .src0y, .src1y, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ },
+ .{ ._, ._, .@"or", .tmp2w, .tmp0w, ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
- .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
- .any,
- },
+ .required_features = .{ .cmov, .fast_imm16, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+ .{ .type = .u16, .kind = .{ .reg = .dx } },
+ .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -26768,29 +27146,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_ps, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_ps, .mul, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_unaligned_size), ._ },
- .{ ._, .v_ps, .mova, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ },
+ .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } },
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } },
- .any,
- },
+ .required_features = .{ .cmov, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+ .{ .type = .u16, .kind = .{ .reg = .dx } },
+ .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -26801,61 +27178,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mul, .tmp1x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .qword, .is = .qword } },
- .{ .scalar_float = .{ .of = .qword, .is = .qword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
- },
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
- .each = .{ .once = &.{
- .{ ._, .v_sd, .mul, .dst0x, .src0x, .src1q, ._ },
- } },
- }, .{
- .required_features = .{ .sse2, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .qword, .is = .qword } },
- .{ .scalar_float = .{ .of = .qword, .is = .qword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_mut_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_mut_sse, .to_sse, .none } },
- },
.dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._sd, .mul, .dst0x, .src1q, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
+ .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ },
} },
}, .{
- .required_features = .{ .x87, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .qword, .is = .qword } },
- .{ .scalar_float = .{ .of = .qword, .is = .qword } },
- .any,
- },
+ .required_features = .{ .fast_imm16, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
.patterns = &.{
- .{ .src = .{ .mem, .mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .f64, .kind = .{ .reg = .st6 } },
- .{ .type = .f64, .kind = .{ .reg = .st7 } },
+ .{ .type = .u16, .kind = .{ .reg = .dx } },
+ .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -26866,73 +27210,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
- .each = .{ .once = &.{
- .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
- .{ ._, .f_, .mul, .src1q, ._, ._, ._ },
- .{ ._, .f_p, .st, .dst0q, ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .qword } },
- .{ .scalar_float = .{ .of = .xword, .is = .qword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
- },
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
- .each = .{ .once = &.{
- .{ ._, .v_pd, .mul, .dst0x, .src0x, .src1x, ._ },
- } },
- }, .{
- .required_features = .{ .sse2, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .qword } },
- .{ .scalar_float = .{ .of = .xword, .is = .qword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_mut_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_mut_sse, .to_sse, .none } },
- },
.dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._pd, .mul, .dst0x, .src1x, ._, ._ },
- } },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .yword, .is = .qword } },
- .{ .scalar_float = .{ .of = .yword, .is = .qword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_sse, .mem, .none } },
- .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_sse, .to_sse, .none } },
- },
- .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
- .each = .{ .once = &.{
- .{ ._, .v_pd, .mul, .dst0y, .src0y, .src1y, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ },
+ .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } },
- .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } },
- .any,
- },
+ .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } },
+ .{ .type = .u16, .kind = .{ .reg = .dx } },
+ .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -26943,29 +27241,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_pd, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_pd, .mul, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_unaligned_size), ._ },
- .{ ._, .v_pd, .mova, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mul, .src1w, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
+ .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
} },
}, .{
- .required_features = .{ .sse2, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } },
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } },
- .any,
- },
+ .src_constraints = .{ .{ .exact_signed_int = 32 }, .{ .exact_signed_int = 32 }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+ .{ .type = .i32, .kind = .{ .reg = .edx } },
+ .unused,
.unused,
.unused,
.unused,
@@ -26976,30 +27272,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._pd, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._pd, .mul, .tmp1x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._pd, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .i_, .mul, .src1d, ._, ._, ._ },
+ .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ },
+ .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
} },
}, .{
- .required_features = .{ .x87, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
- .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
- .any,
- },
+ .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f64, .kind = .{ .reg = .st6 } },
- .{ .type = .f64, .kind = .{ .reg = .st7 } },
+ .{ .type = .i32, .kind = .{ .reg = .edx } },
+ .{ .type = .i32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
+ .unused,
.unused,
.unused,
.unused,
@@ -27009,29 +27301,29 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .f_, .ld, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._, ._ },
- .{ ._, .f_, .mul, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._, ._ },
- .{ ._, .f_p, .st, .memia(.dst0q, .tmp0, .add_unaligned_size), ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .i_, .mul, .src1d, ._, ._, ._ },
+ .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._r, .sa, .tmp1d, .sia(-1, .src0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .cmp, .tmp1d, .tmp0d, ._, ._ },
+ .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
+ .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ },
+ .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ },
+ .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
} },
}, .{
- .required_features = .{ .x87, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
- .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
- .any,
- },
+ .src_constraints = .{ .{ .exact_unsigned_int = 32 }, .{ .exact_unsigned_int = 32 }, .any },
.patterns = &.{
- .{ .src = .{ .mem, .mem, .none } },
+ .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .f80, .kind = .{ .reg = .st6 } },
- .{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .u32, .kind = .{ .reg = .edx } },
+ .unused,
.unused,
.unused,
.unused,
@@ -27042,29 +27334,25 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .rc = .x87 }, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
- .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
- .{ ._, .f_p, .mul, ._, ._, ._, ._ },
- .{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
+ .{ ._, ._, .mul, .src1d, ._, ._, ._ },
+ .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ },
+ .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ },
} },
}, .{
- .required_features = .{ .x87, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
- .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
- .any,
- },
+ .required_features = .{ .bmi, .cmov, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
.patterns = &.{
- .{ .src = .{ .to_x87, .mem, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .mem, .to_x87, .none } },
- .{ .src = .{ .to_x87, .to_x87, .none } },
+ .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .f80, .kind = .{ .reg = .st7 } },
- .unused,
- .unused,
+ .{ .type = .u32, .kind = .{ .reg = .edx } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -27074,26 +27362,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .rc = .x87 }, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
- .{ ._, .f_, .mul, .tmp0t, .src1t, ._, ._ },
- .{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._, .mul, .src1d, ._, ._, ._ },
+ .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ },
+ .{ ._, ._, .@"or", .tmp2d, .tmp0d, ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ },
} },
}, .{
- .required_features = .{ .x87, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
- .any,
- },
+ .required_features = .{ .cmov, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
},
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f80, .kind = .{ .reg = .st6 } },
- .{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .u32, .kind = .{ .reg = .edx } },
+ .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
+ .unused,
.unused,
.unused,
.unused,
@@ -27103,35 +27392,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .mem, .unused },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .f_, .ld, .memia(.src0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
- .{ ._, .f_, .ld, .memia(.src1t, .tmp0, .add_unaligned_size), ._, ._, ._ },
- .{ ._, .f_p, .mul, ._, ._, ._, ._ },
- .{ ._, .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .mul, .src1d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
+ .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
+ .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ },
} },
}, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .scalar_float = .{ .of = .xword, .is = .xword } },
- .{ .scalar_float = .{ .of = .xword, .is = .xword } },
- .any,
- },
+ .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
.patterns = &.{
- .{ .src = .{
- .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } },
- .{ .to_param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } },
- .none,
- } },
+ .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
},
- .call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
- .unused,
+ .{ .type = .u32, .kind = .{ .reg = .edx } },
+ .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
@@ -27143,775 +27423,25 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .clobbers = .{ .eflags = true },
.each = .{ .once = &.{
- .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .mul, .src1d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
+ .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
+ .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ },
+ .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
} },
}, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
- .any,
- },
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .exact_signed_int = 64 }, .{ .exact_signed_int = 64 }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
+ .{ .src = .{ .{ .to_reg = .rax }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .rax }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .rax }, .to_gpr, .none } },
},
- .call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
- .each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .sse2, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
- },
- .call_frame = .{ .alignment = .@"16" },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
- .each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
- .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
- .any,
- },
- .patterns = &.{
- .{ .src = .{ .to_mem, .to_mem, .none } },
- },
- .call_frame = .{ .alignment = .@"16" },
- .extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .mem, .unused },
- .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
- .each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
- } },
- } }) catch |err| switch (err) {
- error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
- @tagName(air_tag),
- ty.fmt(pt),
- ops[0].tracking(cg),
- ops[1].tracking(cg),
- }),
- else => |e| return e,
- };
- res[0].wrapInt(cg) catch |err| switch (err) {
- error.SelectFailed => return cg.fail("failed to select {s} wrap {f} {f}", .{
- @tagName(air_tag),
- cg.typeOf(bin_op.lhs).fmt(pt),
- res[0].tracking(cg),
- }),
- else => |e| return e,
- };
- try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
- },
- .mul_sat => |air_tag| {
- const bin_op = air_datas[@intFromEnum(inst)].bin_op;
- var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
- var res: [1]Temp = undefined;
- cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
- .src_constraints = .{ .{ .exact_signed_int = 8 }, .{ .exact_signed_int = 8 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, .i_, .mul, .src1b, ._, ._, ._ },
- .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
- .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i8, .kind = .{ .rc = .gphi } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, .i_, .mul, .src1b, ._, ._, ._ },
- .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ },
- .{ ._, ._r, .sa, .tmp0b, .sia(-1, .src0, .add_bit_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp0b, .dst0h, ._, ._ },
- .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
- .{ .@"1:", ._r, .sa, .dst0w, .ui(15), ._, ._ },
- .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .exact_unsigned_int = 8 }, .{ .exact_unsigned_int = 8 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u8, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mul, .src1b, ._, ._, ._ },
- .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .@"or", .dst0b, .tmp0b, ._, ._ },
- } },
- }, .{
- .required_features = .{ .cmov, null, null, null },
- .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mul, .src1b, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
- .{ ._, ._, .cmp, .dst0w, .tmp0w, ._, ._ },
- .{ ._, ._a, .cmov, .dst0d, .tmp0d, ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mul, .src1b, ._, ._, ._ },
- .{ ._, ._, .cmp, .dst0w, .ua(.src0, .add_umax), ._, ._ },
- .{ ._, ._na, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
- } },
- }, .{
- .required_features = .{ .fast_imm16, null, null, null },
- .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i16, .kind = .{ .reg = .dx } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ },
- .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
- .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i16, .kind = .{ .reg = .dx } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ },
- .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
- .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .required_features = .{ .fast_imm16, null, null, null },
- .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i16, .kind = .{ .reg = .dx } },
- .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ },
- .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
- .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ },
- .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
- .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i16, .kind = .{ .reg = .dx } },
- .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, .i_, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ },
- .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
- .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ },
- .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ },
- .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .exact_unsigned_int = 16 }, .{ .exact_unsigned_int = 16 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .reg = .dx } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ },
- } },
- }, .{
- .required_features = .{ .bmi, .cmov, null, null },
- .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .reg = .dx } },
- .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ },
- .{ ._, ._, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ },
- .{ ._, ._, .@"or", .tmp2w, .tmp0w, ._, ._ },
- .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ },
- } },
- }, .{
- .required_features = .{ .cmov, .fast_imm16, null, null },
- .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .reg = .dx } },
- .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ },
- .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
- .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
- .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ },
- } },
- }, .{
- .required_features = .{ .cmov, null, null, null },
- .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .reg = .dx } },
- .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
- .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
- .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
- .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ },
- } },
- }, .{
- .required_features = .{ .fast_imm16, null, null, null },
- .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .reg = .dx } },
- .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ },
- .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u16, .kind = .{ .reg = .dx } },
- .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .mul, .src1w, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
- .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .exact_signed_int = 32 }, .{ .exact_signed_int = 32 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i32, .kind = .{ .reg = .edx } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, .i_, .mul, .src1d, ._, ._, ._ },
- .{ ._, ._nc, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ },
- .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ },
- .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i32, .kind = .{ .reg = .edx } },
- .{ .type = .i32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, .i_, .mul, .src1d, ._, ._, ._ },
- .{ ._, ._c, .j, .@"1f", ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._r, .sa, .tmp1d, .sia(-1, .src0, .add_bit_size), ._, ._ },
- .{ ._, ._, .cmp, .tmp1d, .tmp0d, ._, ._ },
- .{ ._, ._e, .j, .@"0f", ._, ._, ._ },
- .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ },
- .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ },
- .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .exact_unsigned_int = 32 }, .{ .exact_unsigned_int = 32 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u32, .kind = .{ .reg = .edx } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mul, .src1d, ._, ._, ._ },
- .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ },
- .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ },
- } },
- }, .{
- .required_features = .{ .bmi, .cmov, null, null },
- .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u32, .kind = .{ .reg = .edx } },
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ },
- .{ ._, ._, .mul, .src1d, ._, ._, ._ },
- .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ },
- .{ ._, ._, .@"or", .tmp2d, .tmp0d, ._, ._ },
- .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ },
- } },
- }, .{
- .required_features = .{ .cmov, null, null, null },
- .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u32, .kind = .{ .reg = .edx } },
- .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mul, .src1d, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
- .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ },
- .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ },
- .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .u32, .kind = .{ .reg = .edx } },
- .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } },
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- .unused,
- },
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
- .clobbers = .{ .eflags = true },
- .each = .{ .once = &.{
- .{ ._, ._, .mul, .src1d, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ },
- .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ },
- .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ },
- .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
- .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ },
- } },
- }, .{
- .required_features = .{ .@"64bit", null, null, null },
- .src_constraints = .{ .{ .exact_signed_int = 64 }, .{ .exact_signed_int = 64 }, .any },
- .patterns = &.{
- .{ .src = .{ .{ .to_reg = .rax }, .mem, .none } },
- .{ .src = .{ .mem, .{ .to_reg = .rax }, .none }, .commute = .{ 0, 1 } },
- .{ .src = .{ .{ .to_reg = .rax }, .to_gpr, .none } },
- },
- .extra_temps = .{
- .{ .type = .i64, .kind = .{ .reg = .rdx } },
+ .{ .type = .i64, .kind = .{ .reg = .rdx } },
.unused,
.unused,
.unused,
@@ -33431,6 +32961,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -33464,6 +32995,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -33475,7 +33039,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -33490,15 +33054,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -33510,7 +33075,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -33525,15 +33090,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -33545,7 +33111,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -33560,13 +33126,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) else err: {
assert(air_tag == .div_exact);
@@ -34659,6 +34333,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -34693,6 +34368,112 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp1d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = "truncq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = "truncq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = "truncq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -34704,7 +34485,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -34719,16 +34500,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -34740,7 +34522,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -34755,16 +34537,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -34776,7 +34559,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -34791,14 +34574,131 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "truncq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "truncq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "truncq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) else err: {
res[0] = ops[0].divTruncInts(&ops[1], cg) catch |err| break :err err;
@@ -35955,6 +35855,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -35993,6 +35894,124 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp1d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .zero => "truncq",
+ .down => "floorq",
+ } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .zero => "truncq",
+ .down => "floorq",
+ } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .zero => "truncq",
+ .down => "floorq",
+ } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -36004,7 +36023,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -36023,16 +36042,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -36044,7 +36064,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -36063,16 +36083,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -36084,7 +36105,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -36103,14 +36124,143 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .zero => "truncq",
+ .down => "floorq",
+ } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .zero => "truncq",
+ .down => "floorq",
+ } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .zero => "truncq",
+ .down => "floorq",
+ } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} },
}) catch |err| switch (err) {
@@ -37438,6 +37588,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -37472,6 +37623,112 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp1d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = "floorq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = "floorq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .extern_func = "floorq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -37483,7 +37740,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -37498,16 +37755,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -37519,7 +37777,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -37534,16 +37792,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -37555,7 +37814,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
@@ -37570,14 +37829,131 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .call, .tmp4d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "floorq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "floorq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "floorq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .tmp5x, ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} })) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@@ -39080,6 +39456,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -39113,6 +39490,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -39124,7 +39534,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
@@ -39139,15 +39549,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -39159,7 +39570,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
@@ -39174,15 +39585,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -39194,7 +39606,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
@@ -39209,13 +39621,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@@ -39525,7 +40045,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_cc_abi = .sysv64,
- .required_features = .{ .cmov, null, null, null },
+ .required_features = .{ .@"64bit", .cmov, null, null },
.src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
.patterns = &.{
.{ .src = .{ .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .to_mem, .none } },
@@ -39565,6 +40085,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_cc_abi = .sysv64,
+ .required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
.patterns = &.{
.{ .src = .{ .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .to_mem, .none } },
@@ -39601,70 +40122,344 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .adc, .dst0q1, .src0q0, ._, ._ },
} },
}, .{
- .required_cc_abi = .sysv64,
- .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any },
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .cmov, .avx, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
.patterns = &.{
- .{ .src = .{
- .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } },
- .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 2, .at = 2 } },
- .none,
- } },
+ .{ .src = .{ .to_mem, .to_mem, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .extern_func = "__umodti3" } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .r10 } },
.unused,
.unused,
.unused,
.unused,
.unused,
+ },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, .v_q, .mov, .dst0q0, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, .vp_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ },
+ .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nae, .cmov, .tmp2q, .tmp5q, ._, ._ },
+ .{ ._, ._ae, .cmov, .tmp5q, .mem(.src1q), ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .tmp5q, ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .cmov, .sse4_1, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .r10 } },
.unused,
.unused,
.unused,
.unused,
.unused,
},
- .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, .p_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ },
+ .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nae, .cmov, .tmp2q, .tmp5q, ._, ._ },
+ .{ ._, ._ae, .cmov, .tmp5q, .mem(.src1q), ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .tmp5q, ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ },
} },
}, .{
.required_cc_abi = .win64,
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any },
+ .required_features = .{ .@"64bit", .cmov, .sse2, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__umodti3" } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .r10 } },
.unused,
.unused,
.unused,
.unused,
.unused,
+ },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, .p_d, .shuf, .tmp1x, .tmp1x, .ui(0b11_10_11_10), ._ },
+ .{ ._, ._q, .mov, .dst0q1, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ },
+ .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nae, .cmov, .tmp2q, .tmp5q, ._, ._ },
+ .{ ._, ._ae, .cmov, .tmp5q, .mem(.src1q), ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .tmp5q, ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .cmov, .sse, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .i128, .kind = .mem },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .r10 } },
+ .unused,
.unused,
.unused,
.unused,
},
- .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
- .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
- .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .mem(.tmp2x), .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, ._, .mov, .dst0q0, .mem(.tmp2q), ._, ._ },
+ .{ ._, ._, .mov, .dst0q1, .memd(.tmp2q, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ },
+ .{ ._, ._, .xor, .tmp5q, .dst0q1, ._, ._ },
+ .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5q, .tmp4q, ._, ._ },
+ .{ ._, ._nae, .cmov, .tmp3q, .tmp6q, ._, ._ },
+ .{ ._, ._ae, .cmov, .tmp6q, .mem(.src1q), ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .tmp6q, ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp3q, ._, ._ },
} },
}, .{
.required_cc_abi = .win64,
- .required_features = .{ .sse, null, null, null },
+ .required_features = .{ .@"64bit", .avx, null, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, .v_q, .mov, .dst0q0, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, .vp_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse4_1, null, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, .p_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse2, null, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, .p_d, .shuf, .tmp1x, .tmp1x, .ui(0b11_10_11_10), ._ },
+ .{ ._, ._q, .mov, .dst0q1, .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, null, null },
+ .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .i128, .kind = .mem },
+ .{ .type = .u64, .kind = .{ .reg = .r8 } },
+ .{ .type = .u64, .kind = .{ .reg = .r9 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .mem(.tmp2x), .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .tmp3q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, ._, .mov, .dst0q0, .mem(.tmp2q), ._, ._ },
+ .{ ._, ._, .mov, .dst0q1, .memd(.tmp2q, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp3q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ },
+ .{ ._, ._, .xor, .tmp5q, .dst0q1, ._, ._ },
+ .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5q, .tmp4q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ },
+ .{ ._, ._, .adc, .dst0q1, .tmp3q, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any },
.patterns = &.{
.{ .src = .{
- .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } },
- .{ .to_param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } },
+ .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } },
+ .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 2, .at = 2 } },
.none,
} },
},
@@ -39682,11 +40477,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__umodti3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
@@ -41082,8 +41905,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f64, .kind = .{ .reg = .rdx } },
.{ .type = .f64, .kind = .mem },
.{ .type = .f64, .kind = .{ .reg = .rax } },
- .{ .type = .f64, .kind = .{ .reg = .st6 } },
.{ .type = .f64, .kind = .{ .reg = .st7 } },
+ .{ .type = .f64, .kind = .{ .reg = .st6 } },
.unused,
.unused,
},
@@ -41130,13 +41953,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .{ .reg = .st0 }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41145,17 +41968,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .v_dqa, .mov, .mem(.tmp1x), .tmp0x, ._, ._ },
.{ ._, .v_dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ },
.{ ._, .v_dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ },
+ .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .f_, .ld, .dst0t, ._, ._, ._ },
+ .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ },
- .{ ._, ._, .movzx, .tmp4d, .memd(.tmp1w, 16 + 8), ._, ._ },
- .{ ._, ._, .@"and", .tmp4w, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ },
+ .{ ._, ._, .movzx, .tmp5d, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
} },
}, .{
.required_abi = .gnu,
@@ -41175,13 +42000,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .{ .reg = .st0 }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41191,16 +42016,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .v_dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ },
.{ ._, .v_dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .f_, .ld, .dst0t, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
+ .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .@"and", .tmp4w, .memd(.tmp1w, 16 + 8), ._, ._ },
- .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
} },
}, .{
.required_abi = .gnu,
@@ -41220,13 +42047,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .{ .reg = .st0 }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41236,16 +42063,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ },
.{ ._, ._dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .f_, .ld, .dst0t, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
+ .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ },
- .{ ._, ._, .movzx, .tmp4d, .memd(.tmp1w, 16 + 8), ._, ._ },
- .{ ._, ._, .@"and", .tmp4w, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ },
+ .{ ._, ._, .movzx, .tmp5d, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
} },
}, .{
.required_abi = .gnu,
@@ -41265,13 +42094,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .{ .reg = .st0 }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41281,16 +42110,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ },
.{ ._, ._dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .f_, .ld, .dst0t, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
+ .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .@"and", .tmp4w, .memd(.tmp1w, 16 + 8), ._, ._ },
- .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
} },
}, .{
.required_abi = .gnu,
@@ -41310,13 +42141,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .{ .reg = .st0 }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41326,16 +42157,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ps, .mova, .tmp0x, .mem(.src1x), ._, ._ },
.{ ._, ._ps, .mova, .memd(.tmp1x, 16), .tmp0x, ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .f_, .ld, .dst0t, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
+ .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ },
- .{ ._, ._, .movzx, .tmp4d, .memd(.tmp1w, 16 + 8), ._, ._ },
- .{ ._, ._, .@"and", .tmp4w, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ },
+ .{ ._, ._, .movzx, .tmp5d, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memd(.src1t, 16), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
} },
}, .{
.required_abi = .gnu,
@@ -41355,13 +42188,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .{ .reg = .st0 }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41371,16 +42204,106 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ps, .mova, .tmp0x, .mem(.src1x), ._, ._ },
.{ ._, ._ps, .mova, .memd(.tmp1x, 16), .tmp0x, ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .f_, .ld, .dst0t, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
+ .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ },
- .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .@"and", .tmp4w, .memd(.tmp1w, 16 + 8), ._, ._ },
- .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
+ } },
+ }, .{
+ .required_abi = .gnu,
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .x87, .fast_imm16, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .f80, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
+ .{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
+ .{ .type = .f80, .kind = .{ .reg = .rax } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .st0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.tmp0t), ._, ._, ._ },
+ .{ ._, ._, .movzx, .tmp7d, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp7w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp7w, .memd(.tmp0w, 8), ._, ._ },
+ .{ ._, ._, .cmp, .mem(.tmp0q), .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp7w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ },
+ .{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
+ } },
+ }, .{
+ .required_abi = .gnu,
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .x87, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .f80, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
+ .{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
+ .{ .type = .f80, .kind = .{ .reg = .rax } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .reg = .st0 }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.tmp0t), ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp7d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp7w, .memd(.src1w, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp7w, .memd(.tmp0w, 8), ._, ._ },
+ .{ ._, ._, .cmp, .mem(.tmp0q), .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp7w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ },
+ .{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ },
} },
}, .{
.required_abi = .gnu,
@@ -41401,12 +42324,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41420,13 +42343,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, .f_, .ld, .tmp4t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ },
- .{ ._, ._, .movzx, .tmp5d, .memd(.tmp2w, 16 + 8), ._, ._ },
- .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .movzx, .tmp6d, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp6w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
.{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
@@ -41451,12 +42374,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41470,13 +42393,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, .f_, .ld, .tmp4t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ },
- .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .@"and", .tmp5w, .memd(.tmp2w, 16 + 8), ._, ._ },
- .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp6d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp6w, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
.{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
@@ -41501,12 +42424,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41520,13 +42443,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, .f_, .ld, .tmp4t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ },
- .{ ._, ._, .movzx, .tmp5d, .memd(.tmp2w, 16 + 8), ._, ._ },
- .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .movzx, .tmp6d, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp6w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
.{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
@@ -41551,12 +42474,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41570,13 +42493,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, .f_, .ld, .tmp4t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ },
- .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .@"and", .tmp5w, .memd(.tmp2w, 16 + 8), ._, ._ },
- .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp6d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp6w, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
.{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
@@ -41601,12 +42524,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41620,13 +42543,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, .f_, .ld, .tmp4t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ },
- .{ ._, ._, .movzx, .tmp5d, .memd(.tmp2w, 16 + 8), ._, ._ },
- .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .movzx, .tmp6d, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp6w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
.{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
@@ -41651,12 +42574,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f80, .kind = .{ .frame = .call_frame } },
.{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
.{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
.{ .type = .f80, .kind = .{ .reg = .rax } },
.unused,
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
@@ -41670,19 +42593,114 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, .f_, .ld, .tmp4t, ._, ._, ._ },
.{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ },
- .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ },
- .{ ._, ._, .@"and", .tmp5w, .memd(.tmp2w, 16 + 8), ._, ._ },
- .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .mov, .tmp6d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp6w, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ },
.{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
- .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ },
.{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
- .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
.{ ._, .f_p, .add, ._, ._, ._, ._ },
.{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_abi = .gnu,
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, .x87, .fast_imm16 },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
+ .{ .type = .f80, .kind = .{ .reg = .rax } },
+ .{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, ._, .movzx, .tmp5d, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.dst0t, .tmp0), ._, ._, ._ },
+ .{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ },
+ .{ .@"1:", ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_abi = .gnu,
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, .x87, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } },
+ .{ .type = .f80, .kind = .{ .reg = .rax } },
+ .{ .type = .f80, .kind = .{ .reg = .st7 } },
+ .{ .type = .f80, .kind = .{ .reg = .st6 } },
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._, .@"and", .tmp5w, .memid(.src1w, .tmp0, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ },
+ .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ },
+ .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ },
+ .{ ._, .f_, .ld, .memi(.dst0t, .tmp0), ._, ._, ._ },
+ .{ ._, .f_p, .add, ._, ._, ._, ._ },
+ .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ },
+ .{ .@"1:", ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .avx, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -41700,9 +42718,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .f128, .kind = .mem },
.{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
- .{ .type = .f128, .kind = .{ .reg = .rcx } },
- .{ .type = .f128, .kind = .{ .reg = .rdx } },
- .{ .type = .f128, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .rcx } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
.{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
.unused,
.unused,
@@ -41728,6 +42746,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp5d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse4_1, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -41745,9 +42764,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .f128, .kind = .mem },
.{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
- .{ .type = .f128, .kind = .{ .reg = .rcx } },
- .{ .type = .f128, .kind = .{ .reg = .rdx } },
- .{ .type = .f128, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .rcx } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
.{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
.unused,
.unused,
@@ -41773,6 +42792,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp5d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse2, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -41790,9 +42810,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .f128, .kind = .mem },
.{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
- .{ .type = .f128, .kind = .{ .reg = .rcx } },
- .{ .type = .f128, .kind = .{ .reg = .rdx } },
- .{ .type = .f128, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .rcx } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
.{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
.unused,
.unused,
@@ -41805,8 +42825,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.each = .{ .once = &.{
.{ ._, ._dqa, .mov, .mem(.tmp0x), .src1x, ._, ._ },
.{ ._, ._, .call, .tmp1d, ._, ._, ._ },
- .{ ._, ._, .mov, .tmp2q, .ua(.src0, .add_smin), ._, ._ },
.{ ._, .p_d, .shuf, .src1x, .dst0x, .ui(0b11_10_11_10), ._ },
+ .{ ._, ._, .mov, .tmp2q, .ua(.src0, .add_smin), ._, ._ },
.{ ._, ._q, .mov, .tmp3q, .src1x, ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ },
.{ ._, ._, .@"and", .tmp4q, .memd(.tmp0q, 8), ._, ._ },
@@ -41819,6 +42839,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp5d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -41836,9 +42857,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .f128, .kind = .mem },
.{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
- .{ .type = .f128, .kind = .{ .reg = .rdx } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .f128, .kind = .mem },
- .{ .type = .f128, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
.{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
.unused,
.unused,
@@ -41863,6 +42884,186 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp5d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .avx, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, .vp_q, .extr, .tmp1q, .dst0x, .ui(1), ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp0q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp5q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp5q, .tmp1q, ._, ._ },
+ .{ ._, .v_q, .mov, .tmp1q, .dst0x, ._, ._ },
+ .{ ._, ._, .cmp, .tmp1q, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5q, .tmp0q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse4_1, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, .p_q, .extr, .tmp1q, .dst0x, .ui(1), ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp0q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp5q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp5q, .tmp1q, ._, ._ },
+ .{ ._, ._q, .mov, .tmp1q, .dst0x, ._, ._ },
+ .{ ._, ._, .cmp, .tmp1q, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5q, .tmp0q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse2, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .p_d, .shuf, .tmp4x, .dst0x, .ui(0b11_10_11_10), ._ },
+ .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._q, .mov, .tmp1q, .tmp4x, ._, ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp0q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp5q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp5q, .tmp1q, ._, ._ },
+ .{ ._, ._q, .mov, .tmp1q, .dst0x, ._, ._ },
+ .{ ._, ._, .cmp, .tmp1q, .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp5q, .tmp0q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp6d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } },
+ .{ .type = .f128, .kind = .mem },
+ .{ .type = .usize, .kind = .{ .reg = .rax } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+ .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_smin), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp1q, ._, ._ },
+ .{ ._, ._, .@"and", .tmp4q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp4q, .lead(.tmp0q, 8), ._, ._ },
+ .{ ._, ._, .cmp, .lea(.tmp0q), .si(1), ._, ._ },
+ .{ ._, ._, .sbb, .tmp4q, .tmp1q, ._, ._ },
+ .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .avx, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -41909,6 +43110,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse4_1, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -41955,6 +43157,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse2, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -42002,6 +43205,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -46317,6 +47521,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -46350,6 +47555,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -46361,7 +47599,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
@@ -46376,15 +47614,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -46396,7 +47635,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
@@ -46411,15 +47650,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -46431,7 +47671,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
@@ -46446,13 +47686,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@@ -50476,6 +51824,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -50509,6 +51858,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -50544,6 +51926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -50579,6 +51962,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -50613,6 +51997,114 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@tagName(air_tag),
@@ -74864,6 +76356,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -74889,6 +76382,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -74896,7 +76417,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } },
.unused,
@@ -74911,14 +76432,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -74926,7 +76448,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } },
.unused,
@@ -74941,14 +76463,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -74956,7 +76479,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } },
.unused,
@@ -74971,12 +76494,105 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f}", .{
@@ -75589,6 +77205,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -75614,6 +77231,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -75644,6 +77289,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -75674,6 +77320,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -75703,6 +77350,99 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
} },
}) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f}", .{
@@ -78312,6 +80052,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -78342,6 +80083,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .down => "floorq",
+ .up => "ceilq",
+ .zero => "truncq",
+ } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -78349,7 +80123,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
else => unreachable,
@@ -78369,14 +80143,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -78384,7 +80159,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
else => unreachable,
@@ -78404,14 +80179,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.patterns = &.{
@@ -78419,7 +80195,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
else => unreachable,
@@ -78439,12 +80215,120 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
- .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
- .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .down => "floorq",
+ .up => "ceilq",
+ .zero => "truncq",
+ } } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .down => "floorq",
+ .up => "ceilq",
+ .zero => "truncq",
+ } } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = switch (direction) {
+ else => unreachable,
+ .down => "floorq",
+ .up => "ceilq",
+ .zero => "truncq",
+ } } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
} },
}) catch |err| switch (err) {
@@ -79063,7 +80947,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -79398,6 +81282,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any },
.patterns = &.{
@@ -79410,7 +81295,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -79430,6 +81315,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
.{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .cc = switch (strict) {
+ true => .l,
+ false => .le,
+ } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3d, .tmp3d, ._, ._ },
+ } },
} },
});
} else err: {
@@ -79575,7 +81492,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -79934,6 +81851,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
},
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any },
.patterns = &.{
@@ -79946,7 +81864,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -79963,6 +81881,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
.{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .cc = .z }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .@"test", .tmp3d, .tmp3d, ._, ._ },
+ } },
} },
}) catch |err| break :err err;
switch (cmp_op) {
@@ -80018,14 +81965,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
},
- .cmp_vector, .cmp_vector_optimized => |air_tag| fallback: {
+ .cmp_vector, .cmp_vector_optimized => |air_tag| {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
const vector_cmp = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
- switch (vector_cmp.compareOperator()) {
- .eq, .neq => {},
- .lt, .lte, .gte, .gt => if (cg.floatBits(cg.typeOf(vector_cmp.lhs).childType(zcu)) == null)
- break :fallback try cg.airCmpVector(inst),
- }
var ops = try cg.tempsFromOperands(inst, .{ vector_cmp.lhs, vector_cmp.rhs });
var res: [1]Temp = undefined;
(err: switch (vector_cmp.compareOperator()) {
@@ -80615,7 +82557,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -80659,7 +82601,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -80703,7 +82645,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -80748,7 +82690,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -80793,7 +82735,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.{ .type = .f32, .kind = .mem },
@@ -80840,7 +82782,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.{ .type = .f32, .kind = .mem },
@@ -80887,7 +82829,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -80940,7 +82882,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -80993,7 +82935,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -81047,7 +82989,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -81101,7 +83043,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .f32, .kind = .mem },
@@ -81157,7 +83099,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .f32, .kind = .mem },
@@ -81984,7 +83926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -82028,7 +83970,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -82072,7 +84014,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -82116,7 +84058,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -82160,7 +84102,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -82204,7 +84146,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -82248,7 +84190,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -82301,7 +84243,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -82354,7 +84296,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -82407,7 +84349,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -82460,7 +84402,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -82513,7 +84455,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -85125,7 +87067,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -85169,7 +87111,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -85213,7 +87155,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -85258,7 +87200,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -85303,7 +87245,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.{ .type = .f32, .kind = .mem },
@@ -85350,7 +87292,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.{ .type = .f32, .kind = .mem },
@@ -85397,7 +87339,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -85450,7 +87392,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -85503,7 +87445,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -85557,7 +87499,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -85611,7 +87553,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .f32, .kind = .mem },
@@ -85667,7 +87609,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .f32, .kind = .mem },
@@ -86508,7 +88450,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -86552,7 +88494,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -86596,7 +88538,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -86640,7 +88582,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -86684,7 +88626,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -86728,7 +88670,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u32, .kind = .{ .reg = .edx } },
.unused,
@@ -86772,7 +88714,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -86825,7 +88767,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -86878,7 +88820,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -86931,7 +88873,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -86984,7 +88926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -87037,7 +88979,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } },
- .{ .type = .i32, .kind = .{ .reg = .eax } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .u8, .kind = .{ .reg = .cl } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
@@ -88690,6 +90632,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_float = .{ .of = .word, .is = .word } }, .any },
@@ -88716,6 +90659,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .scalar_float = .{ .of = .word, .is = .word } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
@@ -88747,6 +90719,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse4_1, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
@@ -88778,6 +90751,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
@@ -88810,6 +90784,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
@@ -88819,7 +90794,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f64, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } },
.{ .type = .f32, .kind = .mem },
.{ .type = .f16, .kind = .{ .reg = .ax } },
@@ -88843,6 +90818,138 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } },
+ .{ .type = .f16, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .vp_w, .extr, .memi(.dst0w, .tmp0), .tmp3x, .ui(0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse4_1, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } },
+ .{ .type = .f16, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .p_w, .extr, .memi(.dst0w, .tmp0), .tmp3x, .ui(0), ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } },
+ .{ .type = .f16, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .f16, .kind = .{ .reg = .ax } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .p_w, .extr, .tmp4d, .tmp3x, .ui(0), ._ },
+ .{ ._, ._, .mov, .memi(.dst0w, .tmp0), .tmp4w, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } },
+ .{ .type = .f32, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .f32, .kind = .mem },
+ .{ .type = .f16, .kind = .{ .reg = .ax } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._ss, .mov, .mem(.tmp4d), .tmp3x, ._, ._ },
+ .{ ._, ._, .mov, .tmp5d, .mem(.tmp4d), ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0w, .tmp0), .tmp5w, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_float = .{ .of = .dword, .is = .dword } }, .any },
@@ -88869,6 +90976,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .scalar_float = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfsf2" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any },
@@ -88900,6 +91036,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any },
@@ -88931,6 +91068,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any },
@@ -88962,6 +91100,71 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfsf2" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .v_ss, .mov, .memi(.dst0d, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfsf2" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._ss, .mov, .memi(.dst0d, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_float = .{ .of = .qword, .is = .qword } }, .any },
@@ -88988,6 +91191,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .scalar_float = .{ .of = .qword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any },
@@ -89019,6 +91251,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any },
@@ -89050,6 +91283,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any },
@@ -89081,46 +91315,83 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .sysv64,
- .required_features = .{ .sse, .x87, null, null },
- .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any },
.patterns = &.{
- .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } },
+ .{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
.unused,
.unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, .v_sd, .mov, .memi(.dst0q, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
- .dst_temps = .{ .{ .reg = .st0 }, .unused },
+ .dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._sd, .mov, .memi(.dst0q, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_cc_abi = .win64,
.required_features = .{ .sse, null, null, null },
- .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any },
.patterns = &.{
- .{ .src = .{ .{ .to_reg = .xmm1 }, .none, .none } },
+ .{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } },
- .unused,
- .unused,
+ .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -89132,21 +91403,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ },
- .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._ps, .movl, .memi(.dst0q, .tmp0), .tmp3x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_cc_abi = .sysv64,
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
+ .required_features = .{ .sse, .x87, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
.patterns = &.{
- .{ .src = .{ .to_mem, .none, .none } },
+ .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } },
.unused,
.unused,
@@ -89156,31 +91429,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
.unused,
+ .unused,
+ .unused,
},
- .dst_temps = .{ .mem, .unused },
+ .dst_temps = .{ .{ .reg = .st0 }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
- .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
- .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ },
- .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
- .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
.required_cc_abi = .win64,
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } },
.unused,
.unused,
@@ -89189,20 +91457,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
.unused,
+ .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ },
- .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src0x, .tmp0), ._, ._ },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
- .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
- .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
} },
}, .{
.required_cc_abi = .sysv64,
- .required_features = .{ .sse2, null, null, null },
+ .required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
.patterns = &.{
@@ -89226,7 +91492,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
.{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
.{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ },
@@ -89234,7 +91500,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .win64,
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
@@ -89244,8 +91510,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } },
.unused,
.unused,
@@ -89254,14 +91519,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
.unused,
+ .unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ },
- .{ ._, ._dqa, .mov, .tmp2x, .memi(.src0x, .tmp0), ._, ._ },
- .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ },
+ .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
@@ -89310,7 +91577,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } },
.unused,
.unused,
@@ -89323,9 +91590,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .mov, .tmp0d, .sa(.dst0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ },
.{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ },
- .{ ._, ._ps, .mova, .tmp2x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp3d, ._, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
@@ -110769,6 +113036,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, .slow_incdec, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
@@ -110802,6 +113070,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
@@ -110835,6 +113104,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, .slow_incdec, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
@@ -110868,6 +113138,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
@@ -110901,6 +113172,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, .slow_incdec, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
@@ -110934,6 +113206,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
@@ -110967,6 +113240,75 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, .slow_incdec, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .sia(-1, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2p, .tmp0p, ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp4b, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, -16), ._, ._ },
+ .{ ._, ._, .sub, .tmp1d, .si(1), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .sia(-1, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2p, .tmp0p, ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp4b, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, -16), ._, ._ },
+ .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+ .{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any },
@@ -110998,6 +113340,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any },
@@ -111029,6 +113372,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any },
@@ -111060,6 +113404,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0w, .tmp0), .tmp3w, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .signed_int = .dword }, .any },
@@ -111086,6 +113463,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .unsigned_int = .dword }, .any },
@@ -111112,6 +113490,63 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .any, .any },
+ .dst_constraints = .{ .{ .signed_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .any, .any },
+ .dst_constraints = .{ .{ .unsigned_int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfsi" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -111143,6 +113578,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -111174,6 +113610,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -111205,6 +113642,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -111236,6 +113674,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -111267,6 +113706,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -111298,6 +113738,71 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } },
+ .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0d, .tmp0), .tmp3d, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfsi" } },
+ .{ .type = .u32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0d, .tmp0), .tmp3d, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .signed_int = .qword }, .any },
@@ -111324,6 +113829,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .unsigned_int = .qword }, .any },
@@ -111350,6 +113856,63 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .any, .any },
+ .dst_constraints = .{ .{ .signed_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfdi" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .any, .any },
+ .dst_constraints = .{ .{ .unsigned_int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfdi" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .avx, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -111381,6 +113944,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .avx, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -111412,6 +113976,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse2, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -111443,6 +114008,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse2, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -111474,6 +114040,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -111505,6 +114072,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
@@ -111536,16 +114104,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .sysv64,
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{ .{ .float = .xword }, .any, .any },
- .dst_constraints = .{ .{ .signed_int = .xword }, .any },
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
.patterns = &.{
- .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } },
+ .{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
- .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfdi" } },
+ .{ .type = .i64, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111553,17 +114124,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
.unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", .sse, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfdi" } },
+ .{ .type = .u64, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
.unused,
.unused,
.unused,
},
- .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp2d, ._, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .win64,
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .signed_int = .xword }, .any },
@@ -111584,7 +114189,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
- .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
@@ -111616,16 +114221,45 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.each = .{ .once = &.{
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .any, .any },
+ .dst_constraints = .{ .{ .signed_int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
+ } },
}, .{
.required_cc_abi = .win64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .unsigned_int = .xword }, .any },
.patterns = &.{
- .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } },
+ .{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
.unused,
.unused,
@@ -111636,12 +114270,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
.unused,
- .unused,
},
.dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
- .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp1d, ._, ._, ._ },
} },
}, .{
.required_cc_abi = .sysv64,
@@ -111677,10 +114311,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .win64,
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -111688,8 +114322,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
- .unused,
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
+ .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111704,15 +114338,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
.{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_cc_abi = .sysv64,
- .required_features = .{ .avx, null, null, null },
+ .required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -111720,8 +114355,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
- .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
+ .{ .type = .i128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111734,7 +114369,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
.{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
@@ -111742,8 +114377,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .win64,
- .required_features = .{ .avx, null, null, null },
+ .required_cc_abi = .sysv64,
+ .required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
@@ -111754,7 +114389,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
- .unused,
+ .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111767,15 +114402,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_cc_abi = .sysv64,
- .required_features = .{ .sse2, null, null, null },
+ .required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
@@ -111799,7 +114435,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
.{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
.{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
@@ -111807,10 +114443,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .win64,
- .required_features = .{ .sse2, null, null, null },
+ .required_cc_abi = .sysv64,
+ .required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
@@ -111818,8 +114454,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
- .unused,
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
+ .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111832,26 +114468,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
+ .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .sysv64,
- .required_features = .{ .sse2, null, null, null },
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
- .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111864,16 +114501,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_cc_abi = .win64,
- .required_features = .{ .sse2, null, null, null },
+ .required_features = .{ .avx, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
@@ -111882,9 +114518,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .f128, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
- .unused,
+ .{ .type = .u128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111897,15 +114533,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .sysv64,
- .required_features = .{ .sse, null, null, null },
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
@@ -111914,9 +114550,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
- .{ .type = .i128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111929,27 +114565,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_cc_abi = .win64,
- .required_features = .{ .sse, null, null, null },
+ .required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
- .unused,
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
+ .{ .type = .u128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111962,26 +114597,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .required_cc_abi = .sysv64,
+ .required_cc_abi = .win64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
- .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+ .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .none, .none } },
},
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
- .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
- .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } },
+ .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -111994,10 +114629,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
- .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ },
- .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
@@ -112012,9 +114646,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.call_frame = .{ .alignment = .@"16" },
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
- .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } },
- .unused,
+ .{ .type = .u128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
.unused,
.unused,
.unused,
@@ -112027,13 +114661,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.clobbers = .{ .eflags = true, .caller_preserved = .ccc },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
- .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
.{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112062,6 +114697,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .float = .xword }, .any, .any },
.dst_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112090,6 +114726,67 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp2d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .any, .any },
+ .dst_constraints = .{ .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfei" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .float = .xword }, .any, .any },
+ .dst_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfei" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ },
+ .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .avx, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112124,6 +114821,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .avx, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112158,6 +114856,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse2, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112192,6 +114891,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse2, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112226,6 +114926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112260,6 +114961,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .@"64bit", .sse, null, null },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
.dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
@@ -112293,6 +114995,76 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixtfei" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mema(.dst0, .add_unaligned_size_sub_elem_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2p, .tmp1p, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp4p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .leaa(.tmp1, .sub_dst0_elem_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfei" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mema(.dst0, .add_unaligned_size_sub_elem_size), ._, ._ },
+ .{ .@"0:", ._, .mov, .tmp2p, .tmp1p, ._, ._ },
+ .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp4p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .leaa(.tmp1, .sub_dst0_elem_size), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{
@tagName(air_tag),
@@ -139664,6 +142436,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -139695,6 +142468,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -139726,6 +142500,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -139756,6 +142531,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
} },
.Max => comptime &.{ .{
.required_features = .{ .avx, null, null, null },
@@ -149792,6 +152669,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -149823,6 +152701,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -149854,6 +152733,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -149884,6 +152764,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
} },
.Add => comptime &.{ .{
.required_features = .{ .avx, null, null, null },
@@ -154411,6 +157393,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -154442,6 +157425,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -154473,6 +157457,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -154503,6 +157488,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
} },
.Mul => comptime &.{ .{
.required_features = .{ .avx, null, null, null },
@@ -157989,6 +161076,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -158020,6 +161108,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -158051,6 +161140,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -158081,6 +161171,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+ .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+ } },
} },
}) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s}.{s} {f} {f}", .{
@@ -159711,6 +162903,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -159742,6 +162935,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -159773,6 +162967,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -159803,6 +162998,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fminq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
} },
.Max => comptime &.{ .{
.required_features = .{ .f16c, null, null, null },
@@ -161403,6 +164700,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -161434,6 +164732,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -161465,6 +164764,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -161495,6 +164795,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
} },
.Add => comptime &.{ .{
.required_features = .{ .f16c, .fast_hops, null, null },
@@ -163701,6 +167103,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -163732,6 +167135,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -163763,6 +167167,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -163793,6 +167198,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
} },
.Mul => comptime &.{ .{
.required_features = .{ .f16c, null, null, null },
@@ -165283,6 +168790,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -165314,6 +168822,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -165345,6 +168854,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.dst_constraints = .{ .{ .float = .xword }, .any },
.src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
@@ -165375,6 +168885,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .dst_constraints = .{ .{ .float = .xword }, .any },
+ .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .none, .none } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } },
+ .{ .type = .f128, .kind = .mem },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ },
+ .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },
+ } },
} },
}) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s}.{s} {f} {f}", .{
@@ -169007,6 +172619,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .xword, .is = .xword } },
@@ -169040,6 +172653,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .call, .tmp0d, ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .scalar_float = .{ .of = .xword, .is = .xword } },
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .to_mem } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .mem(.src2), ._, ._ },
+ .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .avx, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -169076,6 +172723,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -169112,6 +172760,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
+ .required_cc_abi = .sysv64,
.required_features = .{ .sse, null, null, null },
.src_constraints = .{
.{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
@@ -169147,6 +172796,117 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .to_mem } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .memi(.src2, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .to_mem } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .memi(.src2, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_cc_abi = .win64,
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .to_mem } },
+ },
+ .call_frame = .{ .alignment = .@"16" },
+ .extra_temps = .{
+ .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } },
+ .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } },
+ .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } },
+ .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .memi(.src2, .tmp0), ._, ._ },
+ .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+ .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f} {f}", .{
@tagName(air_tag),
@@ -170541,4887 +174301,78 @@ fn copyToRegisterWithInstTracking(
return MCValue{ .register = reg };
}
-fn airAlloc(self: *CodeGen, inst: Air.Inst.Index) !void {
- const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } };
- return self.finishAir(inst, result, .{ .none, .none, .none });
-}
-
-fn airRetPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const result: MCValue = switch (self.ret_mcv.long) {
- else => unreachable,
- .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } },
- .load_frame => .{ .register_offset = .{
- .reg = (try self.copyToRegisterWithInstTracking(
- inst,
- self.typeOfIndex(inst),
- self.ret_mcv.long,
- )).register,
- .off = self.ret_mcv.short.indirect.off,
- } },
- };
- return self.finishAir(inst, result, .{ .none, .none, .none });
-}
-
-fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const dst_ty = self.typeOfIndex(inst);
- const dst_bits = dst_ty.floatBits(self.target);
- const src_ty = self.typeOf(ty_op.operand);
- const src_bits = src_ty.floatBits(self.target);
-
- const result = result: {
- if (switch (dst_bits) {
- 16 => switch (src_bits) {
- 32 => !self.hasFeature(.f16c),
- 64, 80, 128 => true,
- else => unreachable,
- },
- 32 => switch (src_bits) {
- 64 => false,
- 80, 128 => true,
- else => unreachable,
- },
- 64 => switch (src_bits) {
- 80, 128 => true,
- else => unreachable,
- },
- 80 => switch (src_bits) {
- 128 => true,
- else => unreachable,
- },
- else => unreachable,
- }) {
- var sym_buf: ["__trunc?f?f2".len]u8 = undefined;
- break :result try self.genCall(.{ .extern_func = .{
- .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(),
- .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()},
- .sym = std.fmt.bufPrint(&sym_buf, "__trunc{c}f{c}f2", .{
- floatCompilerRtAbiName(src_bits),
- floatCompilerRtAbiName(dst_bits),
- }) catch unreachable,
- } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
- }
-
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?.to128();
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- if (dst_bits == 16) {
- assert(self.hasFeature(.f16c));
- switch (src_bits) {
- 32 => {
- const mat_src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv);
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- mat_src_reg.to128(),
- bits.RoundMode.imm(.{}),
- );
- },
- else => unreachable,
- }
- } else {
- assert(src_bits == 64 and dst_bits == 32);
- if (self.hasFeature(.avx)) if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
- .{ .v_ss, .cvtsd2 },
- dst_reg,
- dst_reg,
- try src_mcv.mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegisterRegister(
- .{ .v_ss, .cvtsd2 },
- dst_reg,
- dst_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
- ) else if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._ss, .cvtsd2 },
- dst_reg,
- try src_mcv.mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._ss, .cvtsd2 },
- dst_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
- );
- }
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const dst_ty = self.typeOfIndex(inst);
- const dst_scalar_ty = dst_ty.scalarType(zcu);
- const dst_bits = dst_scalar_ty.floatBits(self.target);
- const src_ty = self.typeOf(ty_op.operand);
- const src_scalar_ty = src_ty.scalarType(zcu);
- const src_bits = src_scalar_ty.floatBits(self.target);
-
- const result = result: {
- if (switch (src_bits) {
- 16 => switch (dst_bits) {
- 32, 64 => !self.hasFeature(.f16c),
- 80, 128 => true,
- else => unreachable,
- },
- 32 => switch (dst_bits) {
- 64 => false,
- 80, 128 => true,
- else => unreachable,
- },
- 64 => switch (dst_bits) {
- 80, 128 => true,
- else => unreachable,
- },
- 80 => switch (dst_bits) {
- 128 => true,
- else => unreachable,
- },
- else => unreachable,
- }) {
- if (dst_ty.isVector(zcu)) break :result null;
- var sym_buf: ["__extend?f?f2".len]u8 = undefined;
- break :result try self.genCall(.{ .extern_func = .{
- .return_type = self.floatCompilerRtAbiType(dst_scalar_ty, src_scalar_ty).toIntern(),
- .param_types = &.{self.floatCompilerRtAbiType(src_scalar_ty, dst_scalar_ty).toIntern()},
- .sym = std.fmt.bufPrint(&sym_buf, "__extend{c}f{c}f2", .{
- floatCompilerRtAbiName(src_bits),
- floatCompilerRtAbiName(dst_bits),
- }) catch unreachable,
- } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
- }
-
- const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?;
- const dst_alias = registerAlias(dst_reg, @intCast(@max(dst_ty.abiSize(zcu), 16)));
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const vec_len = if (dst_ty.isVector(zcu)) dst_ty.vectorLen(zcu) else 1;
- if (src_bits == 16) {
- assert(self.hasFeature(.f16c));
- const mat_src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv);
- try self.asmRegisterRegister(
- .{ .v_ps, .cvtph2 },
- dst_alias,
- registerAlias(mat_src_reg, src_abi_size),
- );
- switch (dst_bits) {
- 32 => {},
- 64 => try self.asmRegisterRegisterRegister(
- .{ .v_sd, .cvtss2 },
- dst_alias,
- dst_alias,
- dst_alias,
- ),
- else => unreachable,
- }
- } else {
- assert(src_bits == 32 and dst_bits == 64);
- if (self.hasFeature(.avx)) switch (vec_len) {
- 1 => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
- .{ .v_sd, .cvtss2 },
- dst_alias,
- dst_alias,
- try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
- ) else try self.asmRegisterRegisterRegister(
- .{ .v_sd, .cvtss2 },
- dst_alias,
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
- ),
- 2...4 => if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_pd, .cvtps2 },
- dst_alias,
- try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
- ) else try self.asmRegisterRegister(
- .{ .v_pd, .cvtps2 },
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
- ),
- else => break :result null,
- } else if (src_mcv.isBase()) try self.asmRegisterMemory(
- switch (vec_len) {
- 1 => .{ ._sd, .cvtss2 },
- 2 => .{ ._pd, .cvtps2 },
- else => break :result null,
- },
- dst_alias,
- try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
- ) else try self.asmRegisterRegister(
- switch (vec_len) {
- 1 => .{ ._sd, .cvtss2 },
- 2 => .{ ._pd, .cvtps2 },
- else => break :result null,
- },
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
- );
- }
- break :result dst_mcv;
- } orelse return self.fail("TODO implement airFpext from {f} to {f}", .{
- src_ty.fmt(pt), dst_ty.fmt(pt),
- });
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const src_ty = self.typeOf(ty_op.operand);
- const dst_ty = self.typeOfIndex(inst);
-
- const result = @as(?MCValue, result: {
- const src_abi_size: u31 = @intCast(src_ty.abiSize(zcu));
- const dst_abi_size: u31 = @intCast(dst_ty.abiSize(zcu));
-
- const src_int_info = src_ty.intInfo(zcu);
- const dst_int_info = dst_ty.intInfo(zcu);
- const extend = switch (src_int_info.signedness) {
- .signed => dst_int_info,
- .unsigned => src_int_info,
- }.signedness;
-
- const src_mcv = try self.resolveInst(ty_op.operand);
- if (dst_ty.isVector(zcu)) {
- const max_abi_size = @max(dst_abi_size, src_abi_size);
- const has_avx = self.hasFeature(.avx);
-
- const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu);
- const src_elem_abi_size = src_ty.childType(zcu).abiSize(zcu);
- switch (std.math.order(dst_elem_abi_size, src_elem_abi_size)) {
- .lt => {
- if (max_abi_size > self.vectorSize(.int)) break :result null;
- const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) {
- else => break :result null,
- 1 => switch (src_elem_abi_size) {
- else => break :result null,
- 2 => switch (dst_int_info.signedness) {
- .signed => if (has_avx) .{ .vp_b, .ackssw } else .{ .p_b, .ackssw },
- .unsigned => if (has_avx) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
- },
- },
- 2 => switch (src_elem_abi_size) {
- else => break :result null,
- 4 => switch (dst_int_info.signedness) {
- .signed => if (has_avx) .{ .vp_w, .ackssd } else .{ .p_w, .ackssd },
- .unsigned => if (has_avx)
- .{ .vp_w, .ackusd }
- else if (self.hasFeature(.sse4_1))
- .{ .p_w, .ackusd }
- else
- break :result null,
- },
- },
- };
-
- const dst_mcv: MCValue = if (src_mcv.isRegister() and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else if (has_avx and src_mcv.isRegister())
- .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
- else
- try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?;
- const dst_alias = registerAlias(dst_reg, dst_abi_size);
-
- if (has_avx) try self.asmRegisterRegisterRegister(
- mir_tag,
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- dst_reg, src_abi_size),
- dst_alias,
- ) else try self.asmRegisterRegister(
- mir_tag,
- dst_alias,
- dst_alias,
- );
- break :result dst_mcv;
- },
- .eq => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- break :result src_mcv
- else {
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
- break :result dst_mcv;
- },
- .gt => if (self.hasFeature(.sse4_1)) {
- if (max_abi_size > self.vectorSize(.int)) break :result null;
- const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) {
- else => break :result null,
- 2 => if (has_avx) .vp_w else .p_w,
- 4 => if (has_avx) .vp_d else .p_d,
- 8 => if (has_avx) .vp_q else .p_q,
- }, switch (src_elem_abi_size) {
- else => break :result null,
- 1 => switch (extend) {
- .signed => .movsxb,
- .unsigned => .movzxb,
- },
- 2 => switch (extend) {
- .signed => .movsxw,
- .unsigned => .movzxw,
- },
- 4 => switch (extend) {
- .signed => .movsxd,
- .unsigned => .movzxd,
- },
- } };
-
- const dst_mcv: MCValue = if (src_mcv.isRegister() and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) };
- const dst_reg = dst_mcv.getReg().?;
- const dst_alias = registerAlias(dst_reg, dst_abi_size);
-
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- mir_tag,
- dst_alias,
- try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }),
- ) else try self.asmRegisterRegister(
- mir_tag,
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
- );
- break :result dst_mcv;
- } else {
- const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) {
- else => break :result null,
- 2 => switch (src_elem_abi_size) {
- else => break :result null,
- 1 => .{ .p_, .unpcklbw },
- },
- 4 => switch (src_elem_abi_size) {
- else => break :result null,
- 2 => .{ .p_, .unpcklwd },
- },
- 8 => switch (src_elem_abi_size) {
- else => break :result null,
- 2 => .{ .p_, .unpckldq },
- },
- };
-
- const dst_mcv: MCValue = if (src_mcv.isRegister() and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?;
-
- const ext_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
- const ext_alias = registerAlias(ext_reg, src_abi_size);
- const ext_lock = self.register_manager.lockRegAssumeUnused(ext_reg);
- defer self.register_manager.unlockReg(ext_lock);
-
- try self.asmRegisterRegister(.{ .p_, .xor }, ext_alias, ext_alias);
- switch (extend) {
- .signed => try self.asmRegisterRegister(
- .{ switch (src_elem_abi_size) {
- else => unreachable,
- 1 => .p_b,
- 2 => .p_w,
- 4 => .p_d,
- }, .cmpgt },
- ext_alias,
- registerAlias(dst_reg, src_abi_size),
- ),
- .unsigned => {},
- }
- try self.asmRegisterRegister(
- mir_tag,
- registerAlias(dst_reg, dst_abi_size),
- registerAlias(ext_reg, dst_abi_size),
- );
- break :result dst_mcv;
- },
- }
- @compileError("unreachable");
- }
-
- const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty;
-
- const src_storage_bits: u16 = switch (src_mcv) {
- .register, .register_offset => 64,
- .register_pair => 128,
- .load_frame => |frame_addr| @intCast(self.getFrameAddrSize(frame_addr) * 8),
- else => src_int_info.bits,
- };
-
- const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.isClass(.general_purpose) else src_abi_size > 8) and
- dst_int_info.bits <= src_storage_bits and
- std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable ==
- std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(min_ty, dst_mcv, src_mcv, .{});
- break :dst dst_mcv;
- };
-
- if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister())
- .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }
- else
- dst_mcv;
-
- if (dst_mcv.isRegister()) {
- try self.truncateRegister(src_ty, dst_mcv.getReg().?);
- break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) };
- }
-
- const src_limbs_len = std.math.divCeil(u31, src_abi_size, 8) catch unreachable;
- const dst_limbs_len = @divExact(dst_abi_size, 8);
-
- const high_mcv: MCValue = if (dst_mcv.isBase())
- dst_mcv.address().offset((src_limbs_len - 1) * 8).deref()
- else
- .{ .register = dst_mcv.register_pair[1] };
- const high_reg = if (high_mcv.isRegister())
- high_mcv.getReg().?
- else
- try self.copyToTmpRegister(switch (src_int_info.signedness) {
- .signed => .isize,
- .unsigned => .usize,
- }, high_mcv);
- const high_lock = self.register_manager.lockRegAssumeUnused(high_reg);
- defer self.register_manager.unlockReg(high_lock);
-
- const high_bits = src_int_info.bits % 64;
- if (high_bits > 0) {
- try self.truncateRegister(src_ty, high_reg);
- const high_ty: Type = if (dst_int_info.bits >= 64) .usize else dst_ty;
- try self.genCopy(high_ty, high_mcv, .{ .register = high_reg }, .{});
- }
-
- if (dst_limbs_len > src_limbs_len) try self.genInlineMemset(
- dst_mcv.address().offset(src_limbs_len * 8),
- switch (extend) {
- .signed => extend: {
- const extend_mcv = MCValue{ .register = high_reg };
- try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, extend_mcv, .u8, .{ .immediate = 63 });
- break :extend extend_mcv;
- },
- .unsigned => .{ .immediate = 0 },
- },
- .{ .immediate = (dst_limbs_len - src_limbs_len) * 8 },
- .{},
- );
-
- break :result dst_mcv;
- }) orelse return self.fail("TODO implement airIntCast from {f} to {f}", .{
- src_ty.fmt(pt), dst_ty.fmt(pt),
- });
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airTrunc(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const dst_ty = self.typeOfIndex(inst);
- const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
- const src_ty = self.typeOf(ty_op.operand);
- const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
-
- const result = result: {
- const src_mcv = try self.resolveInst(ty_op.operand);
- const src_lock =
- if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null;
- defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const dst_mcv = if (src_mcv.isRegister() and src_mcv.getReg().?.isClass(self.regClassForType(dst_ty)) and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else if (dst_abi_size <= 8)
- try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv)
- else if (dst_abi_size <= 16 and !dst_ty.isVector(zcu)) dst: {
- const dst_regs =
- try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
- const dst_mcv: MCValue = .{ .register_pair = dst_regs };
- const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
- defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
-
- try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
- break :dst dst_mcv;
- } else dst: {
- const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true);
- try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
- break :dst dst_mcv;
- };
-
- if (dst_ty.zigTypeTag(zcu) == .vector) {
- assert(src_ty.zigTypeTag(zcu) == .vector and dst_ty.vectorLen(zcu) == src_ty.vectorLen(zcu));
- const dst_elem_ty = dst_ty.childType(zcu);
- const dst_elem_abi_size: u32 = @intCast(dst_elem_ty.abiSize(zcu));
- const src_elem_ty = src_ty.childType(zcu);
- const src_elem_abi_size: u32 = @intCast(src_elem_ty.abiSize(zcu));
-
- const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_elem_abi_size) {
- 1 => switch (src_elem_abi_size) {
- 2 => switch (dst_ty.vectorLen(zcu)) {
- 1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
- 9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null,
- else => null,
- },
- else => null,
- },
- 2 => switch (src_elem_abi_size) {
- 4 => switch (dst_ty.vectorLen(zcu)) {
- 1...4 => if (self.hasFeature(.avx))
- .{ .vp_w, .ackusd }
- else if (self.hasFeature(.sse4_1))
- .{ .p_w, .ackusd }
- else
- null,
- 5...8 => if (self.hasFeature(.avx2)) .{ .vp_w, .ackusd } else null,
- else => null,
- },
- else => null,
- },
- else => null,
- }) orelse return self.fail("TODO implement airTrunc for {f}", .{dst_ty.fmt(pt)});
-
- const dst_info = dst_elem_ty.intInfo(zcu);
- const src_info = src_elem_ty.intInfo(zcu);
-
- const mask_val = try pt.intValue(src_elem_ty, @as(u64, std.math.maxInt(u64)) >> @intCast(64 - dst_info.bits));
-
- const splat_ty = try pt.vectorType(.{
- .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)),
- .child = src_elem_ty.ip_index,
- });
- const splat_abi_size: u32 = @intCast(splat_ty.abiSize(zcu));
-
- const splat_val = try pt.aggregateSplatValue(splat_ty, mask_val);
-
- const splat_mcv = try self.lowerValue(splat_val);
- const splat_addr_mcv: MCValue = switch (splat_mcv) {
- .memory, .indirect, .load_frame => splat_mcv.address(),
- else => .{ .register = try self.copyToTmpRegister(.usize, splat_mcv.address()) },
- };
-
- const dst_reg = dst_mcv.getReg().?;
- const dst_alias = registerAlias(dst_reg, src_abi_size);
- if (self.hasFeature(.avx)) {
- try self.asmRegisterRegisterMemory(
- .{ .vp_, .@"and" },
- dst_alias,
- dst_alias,
- try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }),
- );
- if (src_abi_size > 16) {
- const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
- const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
- defer self.register_manager.unlockReg(temp_lock);
-
- try self.asmRegisterRegisterImmediate(
- .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract },
- registerAlias(temp_reg, dst_abi_size),
- dst_alias,
- .u(1),
- );
- try self.asmRegisterRegisterRegister(
- mir_tag,
- registerAlias(dst_reg, dst_abi_size),
- registerAlias(dst_reg, dst_abi_size),
- registerAlias(temp_reg, dst_abi_size),
- );
- } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias);
- } else {
- try self.asmRegisterMemory(
- .{ .p_, .@"and" },
- dst_alias,
- try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }),
- );
- try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias);
- }
- break :result dst_mcv;
- }
-
- // when truncating a `u16` to `u5`, for example, those top 3 bits in the result
- // have to be removed. this only happens if the dst if not a power-of-two size.
- if (dst_abi_size <= 8) {
- if (self.regExtraBits(dst_ty) > 0) {
- try self.truncateRegister(dst_ty, dst_mcv.register.to64());
- }
- } else if (dst_abi_size <= 16) {
- const dst_info = dst_ty.intInfo(zcu);
- const high_ty = try pt.intType(dst_info.signedness, dst_info.bits - 64);
- if (self.regExtraBits(high_ty) > 0) {
- try self.truncateRegister(high_ty, dst_mcv.register_pair[1].to64());
- }
- }
-
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airSlice(self: *CodeGen, inst: Air.Inst.Index) !void {
- const zcu = self.pt.zcu;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
-
- const slice_ty = self.typeOfIndex(inst);
- const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu));
-
- const ptr_ty = self.typeOf(bin_op.lhs);
- try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, .{ .air_ref = bin_op.lhs }, .{});
-
- const len_ty = self.typeOf(bin_op.rhs);
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(ptr_ty.abiSize(zcu)),
- len_ty,
- .{ .air_ref = bin_op.rhs },
- .{},
- );
-
- const result = MCValue{ .load_frame = .{ .index = frame_index } };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airUnOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand);
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-fn airBinOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
-
- const dst_ty = self.typeOfIndex(inst);
- if (dst_ty.isAbiInt(zcu)) {
- const abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
- const bit_size: u32 = @intCast(dst_ty.bitSize(zcu));
- if (abi_size * 8 > bit_size) {
- const dst_lock = switch (dst_mcv) {
- .register => |dst_reg| self.register_manager.lockRegAssumeUnused(dst_reg),
- else => null,
- };
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- if (dst_mcv.isRegister()) {
- try self.truncateRegister(dst_ty, dst_mcv.getReg().?);
- } else {
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const hi_ty = try pt.intType(.unsigned, @intCast((dst_ty.bitSize(zcu) - 1) % 64 + 1));
- const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
- try self.genSetReg(tmp_reg, hi_ty, hi_mcv, .{});
- try self.truncateRegister(dst_ty, tmp_reg);
- try self.genCopy(hi_ty, hi_mcv, .{ .register = tmp_reg }, .{});
- }
- }
- }
- return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airPtrArithmetic(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
- const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
- return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn activeIntBits(self: *CodeGen, dst_air: Air.Inst.Ref) u16 {
- const pt = self.pt;
- const zcu = pt.zcu;
- const air_tag = self.air.instructions.items(.tag);
- const air_data = self.air.instructions.items(.data);
-
- const dst_ty = self.typeOf(dst_air);
- const dst_info = dst_ty.intInfo(zcu);
- if (dst_air.toIndex()) |inst| {
- switch (air_tag[@intFromEnum(inst)]) {
- .intcast => {
- const src_ty = self.typeOf(air_data[@intFromEnum(inst)].ty_op.operand);
- const src_info = src_ty.intInfo(zcu);
- return @min(switch (src_info.signedness) {
- .signed => switch (dst_info.signedness) {
- .signed => src_info.bits,
- .unsigned => src_info.bits - 1,
- },
- .unsigned => switch (dst_info.signedness) {
- .signed => src_info.bits + 1,
- .unsigned => src_info.bits,
- },
- }, dst_info.bits);
- },
- else => {},
- }
- } else if (dst_air.toInterned()) |ip_index| {
- var space: Value.BigIntSpace = undefined;
- const src_int = Value.fromInterned(ip_index).toBigInt(&space, zcu);
- return @as(u16, @intCast(src_int.bitCountTwosComp())) +
- @intFromBool(src_int.positive and dst_info.signedness == .signed);
- }
- return dst_info.bits;
-}
-
-fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const result = result: {
- const dst_ty = self.typeOfIndex(inst);
- switch (dst_ty.zigTypeTag(zcu)) {
- .float, .vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs),
- else => {},
- }
- const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
-
- const dst_info = dst_ty.intInfo(zcu);
- const src_ty = try pt.intType(dst_info.signedness, switch (tag) {
- else => unreachable,
- .mul, .mul_wrap => @max(
- self.activeIntBits(bin_op.lhs),
- self.activeIntBits(bin_op.rhs),
- dst_info.bits / 2,
- ),
- .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits,
- });
- const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
-
- if (dst_abi_size == 16 and src_abi_size == 16) switch (tag) {
- else => unreachable,
- .mul, .mul_wrap => {},
- .div_trunc, .div_floor, .div_exact, .rem, .mod => {
- const signed = dst_ty.isSignedInt(zcu);
- var sym_buf: ["__udiv?i3".len]u8 = undefined;
- const signed_div_floor_state: struct {
- frame_index: FrameIndex,
- state: State,
- reloc: Mir.Inst.Index,
- } = if (signed and tag == .div_floor) state: {
- const frame_index = try self.allocFrameIndex(.initType(.usize, zcu));
- try self.asmMemoryImmediate(
- .{ ._, .mov },
- .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } },
- .u(0),
- );
-
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const mat_lhs_mcv = switch (lhs_mcv) {
- .load_nav, .load_uav, .load_lazy_sym => mat_lhs_mcv: {
- // TODO clean this up!
- const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
- break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
- },
- else => lhs_mcv,
- };
- const mat_lhs_lock = switch (mat_lhs_mcv) {
- .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
- else => null,
- };
- defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .mov },
- tmp_reg,
- try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._, .mov },
- tmp_reg,
- mat_lhs_mcv.register_pair[1],
- );
-
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const mat_rhs_mcv = switch (rhs_mcv) {
- .load_nav, .load_uav, .load_lazy_sym => mat_rhs_mcv: {
- // TODO clean this up!
- const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
- break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
- },
- else => rhs_mcv,
- };
- const mat_rhs_lock = switch (mat_rhs_mcv) {
- .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
- else => null,
- };
- defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .xor },
- tmp_reg,
- try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._, .xor },
- tmp_reg,
- mat_rhs_mcv.register_pair[1],
- );
- const state = try self.saveState();
- const reloc = try self.asmJccReloc(.ns, undefined);
-
- break :state .{ .frame_index = frame_index, .state = state, .reloc = reloc };
- } else undefined;
- const call_mcv = try self.genCall(
- .{ .extern_func = .{
- .return_type = dst_ty.toIntern(),
- .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() },
- .sym = std.fmt.bufPrint(&sym_buf, "__{s}{s}{c}i3", .{
- if (signed) "" else "u",
- switch (tag) {
- .div_trunc, .div_exact => "div",
- .div_floor => if (signed) "mod" else "div",
- .rem, .mod => "mod",
- else => unreachable,
- },
- intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))),
- }) catch unreachable,
- } },
- &.{ src_ty, src_ty },
- &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } },
- .{},
- );
- break :result if (signed) switch (tag) {
- .div_floor => {
- try self.asmRegisterRegister(
- .{ ._, .@"or" },
- call_mcv.register_pair[0],
- call_mcv.register_pair[1],
- );
- try self.asmSetccMemory(.nz, .{
- .base = .{ .frame = signed_div_floor_state.frame_index },
- .mod = .{ .rm = .{ .size = .byte } },
- });
- try self.restoreState(signed_div_floor_state.state, &.{}, .{
- .emit_instructions = true,
- .update_tracking = true,
- .resurrect = true,
- .close_scope = true,
- });
- self.performReloc(signed_div_floor_state.reloc);
- const dst_mcv = try self.genCall(
- .{ .extern_func = .{
- .return_type = dst_ty.toIntern(),
- .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() },
- .sym = std.fmt.bufPrint(&sym_buf, "__div{c}i3", .{
- intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))),
- }) catch unreachable,
- } },
- &.{ src_ty, src_ty },
- &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } },
- .{},
- );
- try self.asmRegisterMemory(
- .{ ._, .sub },
- dst_mcv.register_pair[0],
- .{
- .base = .{ .frame = signed_div_floor_state.frame_index },
- .mod = .{ .rm = .{ .size = .qword } },
- },
- );
- try self.asmRegisterImmediate(.{ ._, .sbb }, dst_mcv.register_pair[1], .u(0));
- try self.freeValue(
- .{ .load_frame = .{ .index = signed_div_floor_state.frame_index } },
- );
- break :result dst_mcv;
- },
- .mod => {
- const dst_regs = call_mcv.register_pair;
- const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
- defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
-
- const tmp_regs =
- try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp);
- const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
- defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
-
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const mat_rhs_mcv = switch (rhs_mcv) {
- .load_nav, .load_uav, .load_lazy_sym => mat_rhs_mcv: {
- // TODO clean this up!
- const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
- break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
- },
- else => rhs_mcv,
- };
- const mat_rhs_lock = switch (mat_rhs_mcv) {
- .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
- else => null,
- };
- defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- for (tmp_regs, dst_regs) |tmp_reg, dst_reg|
- try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_reg);
- if (mat_rhs_mcv.isBase()) {
- try self.asmRegisterMemory(
- .{ ._, .add },
- tmp_regs[0],
- try mat_rhs_mcv.mem(self, .{ .size = .qword }),
- );
- try self.asmRegisterMemory(
- .{ ._, .adc },
- tmp_regs[1],
- try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- );
- } else for (
- [_]Mir.Inst.Tag{ .add, .adc },
- tmp_regs,
- mat_rhs_mcv.register_pair,
- ) |op, tmp_reg, rhs_reg|
- try self.asmRegisterRegister(.{ ._, op }, tmp_reg, rhs_reg);
- try self.asmRegisterRegister(.{ ._, .@"test" }, dst_regs[1], dst_regs[1]);
- for (dst_regs, tmp_regs) |dst_reg, tmp_reg|
- try self.asmCmovccRegisterRegister(.s, dst_reg, tmp_reg);
- break :result call_mcv;
- },
- else => call_mcv,
- } else call_mcv;
- },
- };
-
- try self.spillEflagsIfOccupied();
- try self.spillRegisters(&.{ .rax, .rcx, .rdx });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- break :result try self.genMulDivBinOp(tag, inst, dst_ty, src_ty, lhs_mcv, rhs_mcv);
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const ty = self.typeOf(bin_op.lhs);
- if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail(
- "TODO implement airAddSat for {f}",
- .{ty.fmt(pt)},
- );
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
- lhs_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
- const dst_reg = dst_mcv.register;
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const rhs_lock = switch (rhs_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const limit_mcv = MCValue{ .register = limit_reg };
- const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
- defer self.register_manager.unlockReg(limit_lock);
-
- const reg_bits = self.regBitSize(ty);
- const reg_extra_bits = self.regExtraBits(ty);
- const cc: Condition = if (ty.isSignedInt(zcu)) cc: {
- if (reg_extra_bits > 0) {
- try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
- }
- try self.genSetReg(limit_reg, ty, dst_mcv, .{});
- try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 });
- try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
- .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
- });
- if (reg_extra_bits > 0) {
- const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
- const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
- const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
- defer self.register_manager.unlockReg(shifted_rhs_lock);
-
- try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits });
- try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv);
- } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
- break :cc .o;
- } else cc: {
- try self.genSetReg(limit_reg, ty, .{
- .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - ty.bitSize(zcu)),
- }, .{});
-
- try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv);
- if (reg_extra_bits > 0) {
- try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, limit_mcv);
- break :cc .a;
- }
- break :cc .c;
- };
-
- const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
- try self.asmCmovccRegisterRegister(
- cc,
- registerAlias(dst_reg, cmov_abi_size),
- registerAlias(limit_reg, cmov_abi_size),
- );
-
- if (reg_extra_bits > 0 and ty.isSignedInt(zcu))
- try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
-
- return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const ty = self.typeOf(bin_op.lhs);
- if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail(
- "TODO implement airSubSat for {f}",
- .{ty.fmt(pt)},
- );
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
- lhs_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv);
- const dst_reg = dst_mcv.register;
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const rhs_lock = switch (rhs_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const limit_mcv = MCValue{ .register = limit_reg };
- const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
- defer self.register_manager.unlockReg(limit_lock);
-
- const reg_bits = self.regBitSize(ty);
- const reg_extra_bits = self.regExtraBits(ty);
- const cc: Condition = if (ty.isSignedInt(zcu)) cc: {
- if (reg_extra_bits > 0) {
- try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
- }
- try self.genSetReg(limit_reg, ty, dst_mcv, .{});
- try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 });
- try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
- .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
- });
- if (reg_extra_bits > 0) {
- const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv);
- const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg };
- const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg);
- defer self.register_manager.unlockReg(shifted_rhs_lock);
-
- try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits });
- try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv);
- } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
- break :cc .o;
- } else cc: {
- try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }, .{});
- try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv);
- break :cc .c;
- };
-
- const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
- try self.asmCmovccRegisterRegister(
- cc,
- registerAlias(dst_reg, cmov_abi_size),
- registerAlias(limit_reg, cmov_abi_size),
- );
-
- if (reg_extra_bits > 0 and ty.isSignedInt(zcu))
- try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits });
-
- return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const ty = self.typeOf(bin_op.lhs);
-
- const result = result: {
- if (ty.toIntern() == .i128_type) {
- const ptr_c_int = try pt.singleMutPtrType(.c_int);
- const overflow = try self.allocTempRegOrMem(.c_int, false);
-
- const dst_mcv = try self.genCall(.{ .extern_func = .{
- .return_type = .i128_type,
- .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() },
- .sym = "__muloti4",
- } }, &.{ .i128, .i128, ptr_c_int }, &.{
- .{ .air_ref = bin_op.lhs },
- .{ .air_ref = bin_op.rhs },
- overflow.address(),
- }, .{});
- const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_mcv.register_pair);
- defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
-
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const mat_lhs_mcv = switch (lhs_mcv) {
- .load_nav, .load_uav, .load_lazy_sym => mat_lhs_mcv: {
- // TODO clean this up!
- const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
- break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
- },
- else => lhs_mcv,
- };
- const mat_lhs_lock = switch (mat_lhs_mcv) {
- .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
- else => null,
- };
- defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .mov },
- tmp_reg,
- try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._, .mov },
- tmp_reg,
- mat_lhs_mcv.register_pair[1],
- );
-
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const mat_rhs_mcv = switch (rhs_mcv) {
- .load_nav, .load_uav, .load_lazy_sym => mat_rhs_mcv: {
- // TODO clean this up!
- const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
- break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
- },
- else => rhs_mcv,
- };
- const mat_rhs_lock = switch (mat_rhs_mcv) {
- .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg),
- else => null,
- };
- defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .xor },
- tmp_reg,
- try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._, .xor },
- tmp_reg,
- mat_rhs_mcv.register_pair[1],
- );
-
- try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63));
- try self.asmRegister(.{ ._, .not }, tmp_reg);
- try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .{ .size = .dword }), .s(0));
- try self.freeValue(overflow);
- try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg);
- try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, .u(63));
- try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[1], tmp_reg);
- break :result dst_mcv;
- }
-
- if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail(
- "TODO implement airMulSat for {f}",
- .{ty.fmt(pt)},
- );
-
- try self.spillRegisters(&.{ .rax, .rcx, .rdx });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const lhs_lock = switch (lhs_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const rhs_lock = switch (rhs_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const limit_mcv = MCValue{ .register = limit_reg };
- const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg);
- defer self.register_manager.unlockReg(limit_lock);
-
- const reg_bits = self.regBitSize(ty);
- const cc: Condition = if (ty.isSignedInt(zcu)) cc: {
- try self.genSetReg(limit_reg, ty, lhs_mcv, .{});
- try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv);
- try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 });
- try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{
- .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1,
- });
- break :cc .o;
- } else cc: {
- try self.genSetReg(limit_reg, ty, .{
- .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - reg_bits),
- }, .{});
- break :cc .c;
- };
-
- const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv);
- const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
- try self.asmCmovccRegisterRegister(
- cc,
- registerAlias(dst_mcv.register, cmov_abi_size),
- registerAlias(limit_reg, cmov_abi_size),
- );
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airAddSubWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
- const result: MCValue = result: {
- const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
- const ty = self.typeOf(bin_op.lhs);
- switch (ty.zigTypeTag(zcu)) {
- .vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}),
- .int => {
- try self.spillEflagsIfOccupied();
- try self.spillRegisters(&.{ .rcx, .rdi, .rsi });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const partial_mcv = try self.genBinOp(null, switch (tag) {
- .add_with_overflow => .add,
- .sub_with_overflow => .sub,
- else => unreachable,
- }, bin_op.lhs, bin_op.rhs);
- const int_info = ty.intInfo(zcu);
- const cc: Condition = switch (int_info.signedness) {
- .unsigned => .c,
- .signed => .o,
- };
-
- const tuple_ty = self.typeOfIndex(inst);
- if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) {
- switch (partial_mcv) {
- .register => |reg| {
- self.eflags_inst = inst;
- break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
- },
- else => {},
- }
-
- const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(1, zcu)),
- .u1,
- .{ .eflags = cc },
- .{},
- );
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(0, zcu)),
- ty,
- partial_mcv,
- .{},
- );
- break :result .{ .load_frame = .{ .index = frame_index } };
- }
-
- const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
- try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
- break :result .{ .load_frame = .{ .index = frame_index } };
- },
- else => unreachable,
- }
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airShlWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
- const result: MCValue = result: {
- const lhs_ty = self.typeOf(bin_op.lhs);
- const rhs_ty = self.typeOf(bin_op.rhs);
- switch (lhs_ty.zigTypeTag(zcu)) {
- .vector => return self.fail("TODO implement shl with overflow for Vector type", .{}),
- .int => {
- try self.spillEflagsIfOccupied();
- try self.spillRegisters(&.{ .rcx, .rdi, .rsi });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const lhs = try self.resolveInst(bin_op.lhs);
- const rhs = try self.resolveInst(bin_op.rhs);
-
- const int_info = lhs_ty.intInfo(zcu);
-
- const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty);
- const partial_lock = switch (partial_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (partial_lock) |lock| self.register_manager.unlockReg(lock);
-
- const tmp_mcv = try self.genShiftBinOp(.shr, null, partial_mcv, rhs, lhs_ty, rhs_ty);
- const tmp_lock = switch (tmp_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
-
- try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs);
- const cc = Condition.ne;
-
- const tuple_ty = self.typeOfIndex(inst);
- if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) {
- switch (partial_mcv) {
- .register => |reg| {
- self.eflags_inst = inst;
- break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
- },
- else => {},
- }
-
- const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(1, zcu)),
- tuple_ty.fieldType(1, zcu),
- .{ .eflags = cc },
- .{},
- );
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(0, zcu)),
- tuple_ty.fieldType(0, zcu),
- partial_mcv,
- .{},
- );
- break :result .{ .load_frame = .{ .index = frame_index } };
- }
-
- const frame_index =
- try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
- try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
- break :result .{ .load_frame = .{ .index = frame_index } };
- },
- else => unreachable,
- }
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn genSetFrameTruncatedOverflowCompare(
- self: *CodeGen,
- tuple_ty: Type,
- frame_index: FrameIndex,
- src_mcv: MCValue,
- overflow_cc: ?Condition,
-) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const src_lock = switch (src_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const ty = tuple_ty.fieldType(0, zcu);
- const ty_size = ty.abiSize(zcu);
- const int_info = ty.intInfo(zcu);
-
- const hi_bits = (int_info.bits - 1) % 64 + 1;
- const hi_ty = try pt.intType(int_info.signedness, hi_bits);
-
- const limb_bits: u16 = @intCast(if (int_info.bits <= 64) self.regBitSize(ty) else 64);
- const limb_ty = try pt.intType(int_info.signedness, limb_bits);
-
- const rest_ty = try pt.intType(.unsigned, int_info.bits - hi_bits);
-
- const temp_regs =
- try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp);
- const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs);
- defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
-
- const overflow_reg = temp_regs[0];
- if (overflow_cc) |cc| try self.asmSetccRegister(cc, overflow_reg.to8());
-
- const scratch_reg = temp_regs[1];
- const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8;
- const hi_limb_mcv = if (hi_limb_off > 0)
- src_mcv.address().offset(int_info.bits / 64 * 8).deref()
- else
- src_mcv;
- try self.genSetReg(scratch_reg, limb_ty, hi_limb_mcv, .{});
- try self.truncateRegister(hi_ty, scratch_reg);
- try self.genBinOpMir(.{ ._, .cmp }, limb_ty, .{ .register = scratch_reg }, hi_limb_mcv);
-
- const eq_reg = temp_regs[2];
- if (overflow_cc) |_| {
- try self.asmSetccRegister(.ne, eq_reg.to8());
- try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg });
- }
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(1, zcu)),
- tuple_ty.fieldType(1, zcu),
- if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
- .{},
- );
-
- const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu));
- if (hi_limb_off > 0) try self.genSetMem(
- .{ .frame = frame_index },
- payload_off,
- rest_ty,
- src_mcv,
- .{},
- );
- try self.genSetMem(
- .{ .frame = frame_index },
- payload_off + hi_limb_off,
- limb_ty,
- .{ .register = scratch_reg },
- .{},
- );
- var ext_off: i32 = hi_limb_off + 8;
- if (ext_off < ty_size) {
- switch (int_info.signedness) {
- .signed => try self.asmRegisterImmediate(.{ ._r, .sa }, scratch_reg.to64(), .s(63)),
- .unsigned => try self.asmRegisterRegister(.{ ._, .xor }, scratch_reg.to32(), scratch_reg.to32()),
- }
- while (ext_off < ty_size) : (ext_off += 8) try self.genSetMem(
- .{ .frame = frame_index },
- payload_off + ext_off,
- limb_ty,
- .{ .register = scratch_reg },
- .{},
- );
- }
-}
-
-fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
- const tuple_ty = self.typeOfIndex(inst);
- const dst_ty = self.typeOf(bin_op.lhs);
- const result: MCValue = switch (dst_ty.zigTypeTag(zcu)) {
- .vector => return self.fail("TODO implement airMulWithOverflow for {f}", .{dst_ty.fmt(pt)}),
- .int => result: {
- const dst_info = dst_ty.intInfo(zcu);
- if (dst_info.bits > 128 and dst_info.signedness == .unsigned) {
- const slow_inc = self.hasFeature(.slow_incdec);
- const abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
- const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
-
- try self.spillRegisters(&.{ .rax, .rcx, .rdx });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const dst_mcv = try self.allocRegOrMem(inst, false);
- try self.genInlineMemset(
- dst_mcv.address(),
- .{ .immediate = 0 },
- .{ .immediate = tuple_ty.abiSize(zcu) },
- .{},
- );
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
-
- const temp_regs =
- try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
- const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs);
- defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
-
- try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32());
-
- const outer_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
- try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[1].to64(), .{
- .base = .{ .frame = rhs_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[0].to64(),
- .scale = .@"8",
- .disp = rhs_mcv.load_frame.off,
- } },
- });
- try self.asmRegisterRegister(.{ ._, .@"test" }, temp_regs[1].to64(), temp_regs[1].to64());
- const skip_inner = try self.asmJccReloc(.z, undefined);
-
- try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[2].to32(), temp_regs[2].to32());
- try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[3].to32(), temp_regs[0].to32());
- try self.asmRegisterRegister(.{ ._, .xor }, .ecx, .ecx);
- try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx);
-
- const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
- try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1));
- try self.asmMemoryRegister(.{ ._, .adc }, .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[3].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off +
- @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))),
- } },
- }, .rdx);
- try self.asmSetccRegister(.c, .cl);
-
- try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{
- .base = .{ .frame = lhs_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[2].to64(),
- .scale = .@"8",
- .disp = lhs_mcv.load_frame.off,
- } },
- });
- try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64());
-
- try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1));
- try self.asmMemoryRegister(.{ ._, .adc }, .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[3].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off +
- @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))),
- } },
- }, .rax);
- try self.asmSetccRegister(.c, .ch);
-
- if (slow_inc) {
- try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
- try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1));
- } else {
- try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32());
- try self.asmRegister(.{ ._c, .in }, temp_regs[3].to32());
- }
- try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len));
- _ = try self.asmJccReloc(.b, inner_loop);
-
- try self.asmRegisterRegister(.{ ._, .@"or" }, .rdx, .rcx);
- const overflow = try self.asmJccReloc(.nz, undefined);
- const overflow_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
- try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[2].to32(), .u(limb_len));
- const no_overflow = try self.asmJccReloc(.nb, undefined);
- if (slow_inc) {
- try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
- } else {
- try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32());
- }
- try self.asmMemoryImmediate(.{ ._, .cmp }, .{
- .base = .{ .frame = lhs_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[2].to64(),
- .scale = .@"8",
- .disp = lhs_mcv.load_frame.off - 8,
- } },
- }, .u(0));
- _ = try self.asmJccReloc(.z, overflow_loop);
- self.performReloc(overflow);
- try self.asmMemoryImmediate(.{ ._, .mov }, .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .byte,
- .disp = dst_mcv.load_frame.off +
- @as(i32, @intCast(tuple_ty.structFieldOffset(1, zcu))),
- } },
- }, .u(1));
- self.performReloc(no_overflow);
-
- self.performReloc(skip_inner);
- if (slow_inc) {
- try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
- } else {
- try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32());
- }
- try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len));
- _ = try self.asmJccReloc(.b, outer_loop);
-
- break :result dst_mcv;
- }
-
- const lhs_active_bits = self.activeIntBits(bin_op.lhs);
- const rhs_active_bits = self.activeIntBits(bin_op.rhs);
- const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2);
- const src_ty = try pt.intType(dst_info.signedness, src_bits);
- if (src_bits > 64 and src_bits <= 128 and
- dst_info.bits > 64 and dst_info.bits <= 128) switch (dst_info.signedness) {
- .signed => {
- const ptr_c_int = try pt.singleMutPtrType(.c_int);
- const overflow = try self.allocTempRegOrMem(.c_int, false);
- const result = try self.genCall(.{ .extern_func = .{
- .return_type = .i128_type,
- .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() },
- .sym = "__muloti4",
- } }, &.{ .i128, .i128, ptr_c_int }, &.{
- .{ .air_ref = bin_op.lhs },
- .{ .air_ref = bin_op.rhs },
- overflow.address(),
- }, .{});
-
- const dst_mcv = try self.allocRegOrMem(inst, false);
- try self.genSetMem(
- .{ .frame = dst_mcv.load_frame.index },
- @intCast(tuple_ty.structFieldOffset(0, zcu)),
- tuple_ty.fieldType(0, zcu),
- result,
- .{},
- );
- try self.asmMemoryImmediate(
- .{ ._, .cmp },
- try overflow.mem(self, .{ .size = self.memSize(.c_int) }),
- .s(0),
- );
- try self.genSetMem(
- .{ .frame = dst_mcv.load_frame.index },
- @intCast(tuple_ty.structFieldOffset(1, zcu)),
- tuple_ty.fieldType(1, zcu),
- .{ .eflags = .ne },
- .{},
- );
- try self.freeValue(overflow);
- break :result dst_mcv;
- },
- .unsigned => {
- try self.spillEflagsIfOccupied();
- try self.spillRegisters(&.{ .rax, .rdx });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const tmp_regs =
- try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
- const tmp_locks = self.register_manager.lockRegsAssumeUnused(4, tmp_regs);
- defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const mat_lhs_mcv = mat_lhs_mcv: switch (lhs_mcv) {
- .register => |lhs_reg| switch (lhs_reg.class()) {
- else => lhs_mcv,
- .sse => {
- const mat_lhs_mcv: MCValue = .{
- .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp),
- };
- try self.genCopy(dst_ty, mat_lhs_mcv, lhs_mcv, .{});
- break :mat_lhs_mcv mat_lhs_mcv;
- },
- },
- .load_nav, .load_uav, .load_lazy_sym => {
- // TODO clean this up!
- const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address());
- break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
- },
- else => lhs_mcv,
- };
- const mat_lhs_locks: [2]?RegisterLock = switch (mat_lhs_mcv) {
- .register_pair => |mat_lhs_regs| self.register_manager.lockRegs(2, mat_lhs_regs),
- .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null },
- else => @splat(null),
- };
- defer for (mat_lhs_locks) |mat_lhs_lock| if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock);
- const mat_rhs_mcv = mat_rhs_mcv: switch (rhs_mcv) {
- .register => |rhs_reg| switch (rhs_reg.class()) {
- else => rhs_mcv,
- .sse => {
- const mat_rhs_mcv: MCValue = .{
- .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp),
- };
- try self.genCopy(dst_ty, mat_rhs_mcv, rhs_mcv, .{});
- break :mat_rhs_mcv mat_rhs_mcv;
- },
- },
- .load_nav, .load_uav, .load_lazy_sym => {
- // TODO clean this up!
- const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address());
- break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } };
- },
- else => rhs_mcv,
- };
- const mat_rhs_locks: [2]?RegisterLock = switch (mat_rhs_mcv) {
- .register_pair => |mat_rhs_regs| self.register_manager.lockRegs(2, mat_rhs_regs),
- .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null },
- else => @splat(null),
- };
- defer for (mat_rhs_locks) |mat_rhs_lock| if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .mov },
- .rax,
- try mat_lhs_mcv.mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._, .mov },
- .rax,
- mat_lhs_mcv.register_pair[0],
- );
- if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .mov },
- tmp_regs[0],
- try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._, .mov },
- tmp_regs[0],
- mat_rhs_mcv.register_pair[1],
- );
- try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
- try self.asmSetccRegister(.nz, tmp_regs[1].to8());
- try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax);
- try self.asmSetccRegister(.o, tmp_regs[2].to8());
- if (mat_rhs_mcv.isBase())
- try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword }))
- else
- try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
- try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
- try self.asmSetccRegister(.c, tmp_regs[3].to8());
- try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8());
- if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .mov },
- tmp_regs[0],
- try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ ._, .mov },
- tmp_regs[0],
- mat_lhs_mcv.register_pair[1],
- );
- try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
- try self.asmSetccRegister(.nz, tmp_regs[3].to8());
- try self.asmRegisterRegister(
- .{ ._, .@"and" },
- tmp_regs[1].to8(),
- tmp_regs[3].to8(),
- );
- try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
- if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ .i_, .mul },
- tmp_regs[0],
- try mat_rhs_mcv.mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ .i_, .mul },
- tmp_regs[0],
- mat_rhs_mcv.register_pair[0],
- );
- try self.asmSetccRegister(.o, tmp_regs[2].to8());
- try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
- try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
- try self.asmSetccRegister(.c, tmp_regs[2].to8());
- try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
-
- const dst_mcv = try self.allocRegOrMem(inst, false);
- try self.genSetMem(
- .{ .frame = dst_mcv.load_frame.index },
- @intCast(tuple_ty.structFieldOffset(0, zcu)),
- tuple_ty.fieldType(0, zcu),
- .{ .register_pair = .{ .rax, .rdx } },
- .{},
- );
- try self.genSetMem(
- .{ .frame = dst_mcv.load_frame.index },
- @intCast(tuple_ty.structFieldOffset(1, zcu)),
- tuple_ty.fieldType(1, zcu),
- .{ .register = tmp_regs[1] },
- .{},
- );
- break :result dst_mcv;
- },
- };
-
- try self.spillEflagsIfOccupied();
- try self.spillRegisters(&.{ .rax, .rcx, .rdx, .rdi, .rsi });
- const reg_locks = self.register_manager.lockRegsAssumeUnused(5, .{ .rax, .rcx, .rdx, .rdi, .rsi });
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- const cc: Condition = switch (dst_info.signedness) {
- .unsigned => .c,
- .signed => .o,
- };
-
- const lhs = try self.resolveInst(bin_op.lhs);
- const rhs = try self.resolveInst(bin_op.rhs);
-
- const extra_bits = if (dst_info.bits <= 64)
- self.regExtraBits(dst_ty)
- else
- dst_info.bits % 64;
- const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs);
-
- switch (partial_mcv) {
- .register => |reg| if (extra_bits == 0) {
- self.eflags_inst = inst;
- break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
- } else {
- const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
- try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
- break :result .{ .load_frame = .{ .index = frame_index } };
- },
- else => {
- // For now, this is the only supported multiply that doesn't fit in a register.
- if (dst_info.bits > 128 or src_bits != 64)
- return self.fail("TODO implement airWithOverflow from {f} to {f}", .{
- src_ty.fmt(pt), dst_ty.fmt(pt),
- });
-
- const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu));
- if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(0, zcu)),
- tuple_ty.fieldType(0, zcu),
- partial_mcv,
- .{},
- );
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(1, zcu)),
- tuple_ty.fieldType(1, zcu),
- .{ .immediate = 0 }, // cc being set is impossible
- .{},
- );
- } else try self.genSetFrameTruncatedOverflowCompare(
- tuple_ty,
- frame_index,
- partial_mcv,
- null,
- );
- break :result .{ .load_frame = .{ .index = frame_index } };
- },
- }
- },
- else => unreachable,
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-/// Generates signed or unsigned integer multiplication/division.
-/// Clobbers .rax and .rdx registers.
-/// Quotient is saved in .rax and remainder in .rdx.
-fn genIntMulDivOpMir(self: *CodeGen, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void {
- const pt = self.pt;
- const abi_size: u32 = @intCast(ty.abiSize(pt.zcu));
- const bit_size: u32 = @intCast(self.regBitSize(ty));
- if (abi_size > 8) {
- return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{});
- }
-
- try self.genSetReg(.rax, ty, lhs, .{});
- switch (tag[1]) {
- else => unreachable,
- .mul => {},
- .div => switch (tag[0]) {
- ._ => {
- const hi_reg: Register =
- switch (bit_size) {
- 8 => .ah,
- 16, 32, 64 => .edx,
- else => unreachable,
- };
- try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg);
- },
- .i_ => try self.asmOpOnly(.{ ._, switch (bit_size) {
- 8 => .cbw,
- 16 => .cwd,
- 32 => .cdq,
- 64 => .cqo,
- else => unreachable,
- } }),
- else => unreachable,
- },
- }
-
- const mat_rhs: MCValue = switch (rhs) {
- .register, .indirect, .load_frame => rhs,
- else => .{ .register = try self.copyToTmpRegister(ty, rhs) },
- };
- switch (mat_rhs) {
- .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)),
- .memory, .indirect, .load_frame => try self.asmMemory(
- tag,
- try mat_rhs.mem(self, .{ .size = .fromSize(abi_size) }),
- ),
- else => unreachable,
- }
- if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah);
-}
-
-/// Always returns a register.
-/// Clobbers .rax and .rdx registers.
-fn genInlineIntDivFloor(self: *CodeGen, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue {
- const pt = self.pt;
- const zcu = pt.zcu;
- const abi_size: u32 = @intCast(ty.abiSize(zcu));
- const int_info = ty.intInfo(zcu);
- const dividend = switch (lhs) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(ty, lhs),
- };
- const dividend_lock = self.register_manager.lockReg(dividend);
- defer if (dividend_lock) |lock| self.register_manager.unlockReg(lock);
-
- const divisor = switch (rhs) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(ty, rhs),
- };
- const divisor_lock = self.register_manager.lockReg(divisor);
- defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock);
-
- try self.genIntMulDivOpMir(
- switch (int_info.signedness) {
- .signed => .{ .i_, .div },
- .unsigned => .{ ._, .div },
- },
- ty,
- .{ .register = dividend },
- .{ .register = divisor },
- );
-
- try self.asmRegisterRegister(
- .{ ._, .xor },
- registerAlias(divisor, abi_size),
- registerAlias(dividend, abi_size),
- );
- try self.asmRegisterImmediate(
- .{ ._r, .sa },
- registerAlias(divisor, abi_size),
- .u(int_info.bits - 1),
- );
- try self.asmRegisterRegister(
- .{ ._, .@"test" },
- registerAlias(.rdx, abi_size),
- registerAlias(.rdx, abi_size),
- );
- try self.asmCmovccRegisterRegister(
- .z,
- registerAlias(divisor, @max(abi_size, 2)),
- registerAlias(.rdx, @max(abi_size, 2)),
- );
- try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax });
- return MCValue{ .register = divisor };
-}
-
-fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-
- const air_tags = self.air.instructions.items(.tag);
- const tag = air_tags[@intFromEnum(inst)];
- const lhs_ty = self.typeOf(bin_op.lhs);
- const rhs_ty = self.typeOf(bin_op.rhs);
- const result: MCValue = result: {
- switch (lhs_ty.zigTypeTag(zcu)) {
- .int => {
- try self.spillRegisters(&.{.rcx});
- try self.register_manager.getKnownReg(.rcx, null);
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
-
- const dst_mcv = try self.genShiftBinOp(tag, inst, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
- switch (tag) {
- .shr, .shr_exact, .shl_exact => {},
- .shl => switch (dst_mcv) {
- .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
- .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
- .load_frame => |frame_addr| {
- const tmp_reg =
- try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const lhs_bits: u31 = @intCast(lhs_ty.bitSize(zcu));
- const tmp_ty: Type = if (lhs_bits > 64) .usize else lhs_ty;
- const off = frame_addr.off + (lhs_bits - 1) / 64 * 8;
- try self.genSetReg(
- tmp_reg,
- tmp_ty,
- .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
- .{},
- );
- try self.truncateRegister(lhs_ty, tmp_reg);
- try self.genSetMem(
- .{ .frame = frame_addr.index },
- off,
- tmp_ty,
- .{ .register = tmp_reg },
- .{},
- );
- },
- else => {},
- },
- else => unreachable,
- }
- break :result dst_mcv;
- },
- .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .int => if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.childType(zcu).intInfo(zcu).bits) {
- else => null,
- 16 => switch (lhs_ty.vectorLen(zcu)) {
- else => null,
- 1...8 => switch (tag) {
- else => unreachable,
- .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_w, .sra }
- else
- .{ .p_w, .sra },
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_w, .srl }
- else
- .{ .p_w, .srl },
- },
- .shl, .shl_exact => if (self.hasFeature(.avx))
- .{ .vp_w, .sll }
- else
- .{ .p_w, .sll },
- },
- 9...16 => switch (tag) {
- else => unreachable,
- .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .sra } else null,
- .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .srl } else null,
- },
- .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_w, .sll } else null,
- },
- },
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- else => null,
- 1...4 => switch (tag) {
- else => unreachable,
- .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_d, .sra }
- else
- .{ .p_d, .sra },
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_d, .srl }
- else
- .{ .p_d, .srl },
- },
- .shl, .shl_exact => if (self.hasFeature(.avx))
- .{ .vp_d, .sll }
- else
- .{ .p_d, .sll },
- },
- 5...8 => switch (tag) {
- else => unreachable,
- .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .sra } else null,
- .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .srl } else null,
- },
- .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_d, .sll } else null,
- },
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- else => null,
- 1...2 => switch (tag) {
- else => unreachable,
- .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_q, .sra }
- else
- .{ .p_q, .sra },
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_q, .srl }
- else
- .{ .p_q, .srl },
- },
- .shl, .shl_exact => if (self.hasFeature(.avx))
- .{ .vp_q, .sll }
- else
- .{ .p_q, .sll },
- },
- 3...4 => switch (tag) {
- else => unreachable,
- .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_q, .sra } else null,
- .unsigned => if (self.hasFeature(.avx2)) .{ .vp_q, .srl } else null,
- },
- .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_q, .sll } else null,
- },
- },
- })) |mir_tag| if (try self.air.value(bin_op.rhs, pt)) |rhs_val| {
- switch (zcu.intern_pool.indexToKey(rhs_val.toIntern())) {
- .aggregate => |rhs_aggregate| switch (rhs_aggregate.storage) {
- .repeated_elem => |rhs_elem| {
- const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
- self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
- .{lhs_mcv.getReg().?} ** 2
- else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
- try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
- lhs_mcv.getReg().?,
- } else .{(try self.copyToRegisterWithInstTracking(
- inst,
- lhs_ty,
- lhs_mcv,
- )).register} ** 2;
- const reg_locks =
- self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
- defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
- self.register_manager.unlockReg(lock);
-
- const shift_imm: Immediate =
- .u(@intCast(Value.fromInterned(rhs_elem).toUnsignedInt(zcu)));
- if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate(
- mir_tag,
- registerAlias(dst_reg, abi_size),
- registerAlias(lhs_reg, abi_size),
- shift_imm,
- ) else {
- assert(dst_reg.id() == lhs_reg.id());
- try self.asmRegisterImmediate(
- mir_tag,
- registerAlias(dst_reg, abi_size),
- shift_imm,
- );
- }
- break :result .{ .register = dst_reg };
- },
- else => {},
- },
- else => {},
- }
- } else if (bin_op.rhs.toIndex()) |rhs_inst| switch (air_tags[@intFromEnum(rhs_inst)]) {
- .splat => {
- const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
- self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
- .{lhs_mcv.getReg().?} ** 2
- else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
- try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
- lhs_mcv.getReg().?,
- } else .{(try self.copyToRegisterWithInstTracking(
- inst,
- lhs_ty,
- lhs_mcv,
- )).register} ** 2;
- const reg_locks = self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
- defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
- self.register_manager.unlockReg(lock);
-
- const shift_reg =
- try self.copyToTmpRegister(rhs_ty, .{ .air_ref = bin_op.rhs });
- const shift_lock = self.register_manager.lockRegAssumeUnused(shift_reg);
- defer self.register_manager.unlockReg(shift_lock);
-
- const mask_ty = try pt.vectorType(.{ .len = 16, .child = .u8_type });
- const mask_mcv = try self.lowerValue(try pt.aggregateValue(
- mask_ty,
- &([1]InternPool.Index{
- (try rhs_ty.childType(zcu).maxIntScalar(pt, .u8)).toIntern(),
- } ++ [1]InternPool.Index{.zero_u8} ** 15),
- ));
- const mask_addr_reg = try self.copyToTmpRegister(.usize, mask_mcv.address());
- const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg);
- defer self.register_manager.unlockReg(mask_addr_lock);
-
- if (self.hasFeature(.avx)) {
- try self.asmRegisterRegisterMemory(
- .{ .vp_, .@"and" },
- shift_reg.to128(),
- shift_reg.to128(),
- .{
- .base = .{ .reg = mask_addr_reg },
- .mod = .{ .rm = .{ .size = .xword } },
- },
- );
- try self.asmRegisterRegisterRegister(
- mir_tag,
- registerAlias(dst_reg, abi_size),
- registerAlias(lhs_reg, abi_size),
- shift_reg.to128(),
- );
- } else {
- try self.asmRegisterMemory(
- .{ .p_, .@"and" },
- shift_reg.to128(),
- .{
- .base = .{ .reg = mask_addr_reg },
- .mod = .{ .rm = .{ .size = .xword } },
- },
- );
- assert(dst_reg.id() == lhs_reg.id());
- try self.asmRegisterRegister(
- mir_tag,
- registerAlias(dst_reg, abi_size),
- shift_reg.to128(),
- );
- }
- break :result .{ .register = dst_reg };
- },
- else => {},
- },
- else => {},
- },
- else => {},
- }
- return self.fail("TODO implement airShlShrBinOp for {f}", .{lhs_ty.fmt(pt)});
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void {
- const zcu = self.pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const lhs_ty = self.typeOf(bin_op.lhs);
- const rhs_ty = self.typeOf(bin_op.rhs);
-
- const result: MCValue = result: {
- switch (lhs_ty.zigTypeTag(zcu)) {
- .int => {
- const lhs_bits = lhs_ty.bitSize(zcu);
- const rhs_bits = rhs_ty.bitSize(zcu);
- if (!(lhs_bits <= 32 and rhs_bits <= 5) and !(lhs_bits > 32 and lhs_bits <= 64 and rhs_bits <= 6) and !(rhs_bits <= std.math.log2(lhs_bits))) {
- return self.fail("TODO implement shl_sat for {} with lhs bits {}, rhs bits {}", .{ self.target.cpu.arch, lhs_bits, rhs_bits });
- }
-
- // clobberred by genShiftBinOp
- try self.spillRegisters(&.{.rcx});
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- var lhs_temp1 = try self.tempInit(lhs_ty, lhs_mcv);
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
-
- const lhs_lock = switch (lhs_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- // shift left
- const dst_mcv = try self.genShiftBinOp(.shl, null, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
- switch (dst_mcv) {
- .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
- .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
- .load_frame => |frame_addr| {
- const tmp_reg =
- try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const lhs_bits_u31: u31 = @intCast(lhs_bits);
- const tmp_ty: Type = if (lhs_bits_u31 > 64) .usize else lhs_ty;
- const off = frame_addr.off + (lhs_bits_u31 - 1) / 64 * 8;
- try self.genSetReg(
- tmp_reg,
- tmp_ty,
- .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
- .{},
- );
- try self.truncateRegister(lhs_ty, tmp_reg);
- try self.genSetMem(
- .{ .frame = frame_addr.index },
- off,
- tmp_ty,
- .{ .register = tmp_reg },
- .{},
- );
- },
- else => {},
- }
- const dst_lock = switch (dst_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- // shift right
- const tmp_mcv = try self.genShiftBinOp(.shr, null, dst_mcv, rhs_mcv, lhs_ty, rhs_ty);
- var tmp_temp = try self.tempInit(lhs_ty, tmp_mcv);
-
- // check if overflow happens
- const cc_temp = lhs_temp1.cmpInts(.neq, &tmp_temp, self) catch |err| switch (err) {
- error.SelectFailed => unreachable,
- else => |e| return e,
- };
- try lhs_temp1.die(self);
- try tmp_temp.die(self);
- const overflow_reloc = try self.genCondBrMir(lhs_ty, cc_temp.tracking(self).short);
- try cc_temp.die(self);
-
- // if overflow,
- // for unsigned integers, the saturating result is just its max
- // for signed integers,
- // if lhs is positive, the result is its max
- // if lhs is negative, it is min
- switch (lhs_ty.intInfo(zcu).signedness) {
- .unsigned => {
- const bound_mcv = try self.lowerValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
- try self.genCopy(lhs_ty, dst_mcv, bound_mcv, .{});
- },
- .signed => {
- // check the sign of lhs
- // TODO: optimize this.
- // we only need the highest bit so shifting the highest part of lhs_mcv
- // is enough to check the signedness. other parts can be skipped here.
- var lhs_temp2 = try self.tempInit(lhs_ty, lhs_mcv);
- var zero_temp = try self.tempInit(lhs_ty, try self.lowerValue(try self.pt.intValue(lhs_ty, 0)));
- const sign_cc_temp = lhs_temp2.cmpInts(.lt, &zero_temp, self) catch |err| switch (err) {
- error.SelectFailed => unreachable,
- else => |e| return e,
- };
- try lhs_temp2.die(self);
- try zero_temp.die(self);
- const sign_reloc_condbr = try self.genCondBrMir(lhs_ty, sign_cc_temp.tracking(self).short);
- try sign_cc_temp.die(self);
-
- // if it is negative
- const min_mcv = try self.lowerValue(try lhs_ty.minIntScalar(self.pt, lhs_ty));
- try self.genCopy(lhs_ty, dst_mcv, min_mcv, .{});
- const sign_reloc_br = try self.asmJmpReloc(undefined);
- self.performReloc(sign_reloc_condbr);
-
- // if it is positive
- const max_mcv = try self.lowerValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
- try self.genCopy(lhs_ty, dst_mcv, max_mcv, .{});
- self.performReloc(sign_reloc_br);
- },
- }
-
- self.performReloc(overflow_reloc);
- break :result dst_mcv;
- },
- else => {
- return self.fail("TODO implement shl_sat for {} op type {}", .{ self.target.cpu.arch, lhs_ty.zigTypeTag(zcu) });
- },
- }
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
- const zcu = self.pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result: MCValue = result: {
- const pl_ty = self.typeOfIndex(inst);
- if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
-
- const opt_mcv = try self.resolveInst(ty_op.operand);
- if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) {
- const pl_mcv: MCValue = switch (opt_mcv) {
- .register_overflow => |ro| pl: {
- self.eflags_inst = null; // actually stop tracking the overflow part
- break :pl .{ .register = ro.reg };
- },
- else => opt_mcv,
- };
- switch (pl_mcv) {
- .register => |pl_reg| try self.truncateRegister(pl_ty, pl_reg),
- else => {},
- }
- break :result pl_mcv;
- }
-
- const pl_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) {
- else => opt_mcv,
- .register_overflow => |ro| .{ .register = ro.reg },
- }, .{});
- break :result pl_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airOptionalPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const dst_ty = self.typeOfIndex(inst);
- const opt_mcv = try self.resolveInst(ty_op.operand);
-
- const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
- opt_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-fn airOptionalPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result = result: {
- const dst_ty = self.typeOfIndex(inst);
- const src_ty = self.typeOf(ty_op.operand);
- const opt_ty = src_ty.childType(zcu);
- const src_mcv = try self.resolveInst(ty_op.operand);
-
- if (opt_ty.optionalReprIsPayload(zcu)) {
- break :result if (self.liveness.isUnused(inst))
- .unreach
- else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
- }
-
- const dst_mcv: MCValue = if (src_mcv.isRegister() and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else if (self.liveness.isUnused(inst))
- .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) }
- else
- try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
-
- const pl_ty = dst_ty.childType(zcu);
- const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu));
- try self.genSetMem(
- .{ .reg = dst_mcv.getReg().? },
- pl_abi_size,
- .bool,
- .{ .immediate = 1 },
- .{},
- );
- break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airUnwrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const err_union_ty = self.typeOf(ty_op.operand);
- const err_ty = err_union_ty.errorUnionSet(zcu);
- const payload_ty = err_union_ty.errorUnionPayload(zcu);
- const operand = try self.resolveInst(ty_op.operand);
-
- const result: MCValue = result: {
- if (err_ty.errorSetIsEmpty(zcu)) {
- break :result MCValue{ .immediate = 0 };
- }
-
- if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
- break :result try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand);
- }
-
- const err_off = codegen.errUnionErrorOffset(payload_ty, zcu);
- switch (operand) {
- .register => |reg| {
- // TODO reuse operand
- const eu_lock = self.register_manager.lockReg(reg);
- defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
-
- const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand);
- if (err_off > 0) try self.genShiftBinOpMir(
- .{ ._r, .sh },
- err_union_ty,
- result,
- .u8,
- .{ .immediate = @as(u6, @intCast(err_off * 8)) },
- ) else try self.truncateRegister(.anyerror, result.register);
- break :result result;
- },
- .load_frame => |frame_addr| break :result .{ .load_frame = .{
- .index = frame_addr.index,
- .off = frame_addr.off + @as(i32, @intCast(err_off)),
- } },
- else => return self.fail("TODO implement unwrap_err_err for {f}", .{operand}),
- }
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airUnwrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const operand_ty = self.typeOf(ty_op.operand);
- const operand = try self.resolveInst(ty_op.operand);
- const result = try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand);
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-// *(E!T) -> E
-fn airUnwrapErrUnionErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const src_ty = self.typeOf(ty_op.operand);
- const src_mcv = try self.resolveInst(ty_op.operand);
- const src_reg = switch (src_mcv) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(src_ty, src_mcv),
- };
- const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
- defer self.register_manager.unlockReg(src_lock);
-
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- const dst_mcv = MCValue{ .register = dst_reg };
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- const eu_ty = src_ty.childType(zcu);
- const pl_ty = eu_ty.errorUnionPayload(zcu);
- const err_ty = eu_ty.errorUnionSet(zcu);
- const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
- const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu));
- try self.asmRegisterMemory(
- .{ ._, .mov },
- registerAlias(dst_reg, err_abi_size),
- .{
- .base = .{ .reg = src_reg },
- .mod = .{ .rm = .{
- .size = .fromSize(err_abi_size),
- .disp = err_off,
- } },
- },
- );
-
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-// *(E!T) -> *T
-fn airUnwrapErrUnionPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const operand_ty = self.typeOf(ty_op.operand);
- const operand = try self.resolveInst(ty_op.operand);
- const result = try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand);
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result: MCValue = result: {
- const src_ty = self.typeOf(ty_op.operand);
- const src_mcv = try self.resolveInst(ty_op.operand);
- const src_reg = switch (src_mcv) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(src_ty, src_mcv),
- };
- const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
- defer self.register_manager.unlockReg(src_lock);
-
- const eu_ty = src_ty.childType(zcu);
- const pl_ty = eu_ty.errorUnionPayload(zcu);
- const err_ty = eu_ty.errorUnionSet(zcu);
- const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
- const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu));
- try self.asmMemoryImmediate(
- .{ ._, .mov },
- .{
- .base = .{ .reg = src_reg },
- .mod = .{ .rm = .{
- .size = .fromSize(err_abi_size),
- .disp = err_off,
- } },
- },
- .u(0),
- );
-
- if (self.liveness.isUnused(inst)) break :result .unreach;
-
- const dst_ty = self.typeOfIndex(inst);
- const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_reg
- else
- try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu));
- const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
- try self.asmRegisterMemory(
- .{ ._, .lea },
- registerAlias(dst_reg, dst_abi_size),
- .{
- .base = .{ .reg = src_reg },
- .mod = .{ .rm = .{ .disp = pl_off } },
- },
- );
- break :result .{ .register = dst_reg };
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn genUnwrapErrUnionPayloadMir(
- self: *CodeGen,
- maybe_inst: ?Air.Inst.Index,
- err_union_ty: Type,
- err_union: MCValue,
-) !MCValue {
- const pt = self.pt;
- const zcu = pt.zcu;
- const payload_ty = err_union_ty.errorUnionPayload(zcu);
-
- const result: MCValue = result: {
- if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
-
- const payload_off: u31 = @intCast(codegen.errUnionPayloadOffset(payload_ty, zcu));
- switch (err_union) {
- .load_frame => |frame_addr| break :result .{ .load_frame = .{
- .index = frame_addr.index,
- .off = frame_addr.off + payload_off,
- } },
- .register => |reg| {
- // TODO reuse operand
- const eu_lock = self.register_manager.lockReg(reg);
- defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
-
- const payload_in_gp = self.regSetForType(payload_ty).supersetOf(abi.RegisterClass.gp);
- const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null)
- try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union)
- else
- .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
- if (payload_off > 0) try self.genShiftBinOpMir(
- .{ ._r, .sh },
- err_union_ty,
- result_mcv,
- .u8,
- .{ .immediate = @as(u6, @intCast(payload_off * 8)) },
- ) else try self.truncateRegister(payload_ty, result_mcv.register);
- break :result if (payload_in_gp)
- result_mcv
- else if (maybe_inst) |inst|
- try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv)
- else
- .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) };
- },
- else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {f}", .{err_union}),
- }
- };
-
- return result;
-}
-
-fn genUnwrapErrUnionPayloadPtrMir(
- self: *CodeGen,
- maybe_inst: ?Air.Inst.Index,
- ptr_ty: Type,
- ptr_mcv: MCValue,
-) !MCValue {
- const pt = self.pt;
- const zcu = pt.zcu;
- const err_union_ty = ptr_ty.childType(zcu);
- const payload_ty = err_union_ty.errorUnionPayload(zcu);
-
- const result: MCValue = result: {
- const payload_off = codegen.errUnionPayloadOffset(payload_ty, zcu);
- const result_mcv: MCValue = if (maybe_inst) |inst|
- try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv)
- else
- .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) };
- try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off });
- break :result result_mcv;
- };
-
- return result;
-}
-
-fn airWrapOptional(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result: MCValue = result: {
- const pl_ty = self.typeOf(ty_op.operand);
- if (!pl_ty.hasRuntimeBits(zcu)) break :result .{ .immediate = 1 };
-
- const opt_ty = self.typeOfIndex(inst);
- const pl_mcv = try self.resolveInst(ty_op.operand);
- const same_repr = opt_ty.optionalReprIsPayload(zcu);
- if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv;
-
- const pl_lock: ?RegisterLock = switch (pl_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (pl_lock) |lock| self.register_manager.unlockReg(lock);
-
- const opt_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(pl_ty, opt_mcv, pl_mcv, .{});
-
- if (!same_repr) {
- const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu));
- switch (opt_mcv) {
- else => unreachable,
-
- .register => |opt_reg| {
- try self.truncateRegister(pl_ty, opt_reg);
- try self.asmRegisterImmediate(
- .{ ._s, .bt },
- opt_reg,
- .u(@as(u6, @intCast(pl_abi_size * 8))),
- );
- },
-
- .load_frame => |frame_addr| try self.asmMemoryImmediate(
- .{ ._, .mov },
- .{
- .base = .{ .frame = frame_addr.index },
- .mod = .{ .rm = .{
- .size = .byte,
- .disp = frame_addr.off + pl_abi_size,
- } },
- },
- .u(1),
- ),
- }
- }
- break :result opt_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-/// T to E!T
-fn airWrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const eu_ty = ty_op.ty.toType();
- const pl_ty = eu_ty.errorUnionPayload(zcu);
- const err_ty = eu_ty.errorUnionSet(zcu);
- const operand = try self.resolveInst(ty_op.operand);
-
- const result: MCValue = result: {
- if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .{ .immediate = 0 };
-
- const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu));
- const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu));
- const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
- try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand, .{});
- try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 }, .{});
- break :result .{ .load_frame = .{ .index = frame_index } };
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-/// E to E!T
-fn airWrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const eu_ty = ty_op.ty.toType();
- const pl_ty = eu_ty.errorUnionPayload(zcu);
- const err_ty = eu_ty.errorUnionSet(zcu);
-
- const result: MCValue = result: {
- if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result try self.resolveInst(ty_op.operand);
-
- const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu));
- const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu));
- const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu));
- try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef, .{});
- const operand = try self.resolveInst(ty_op.operand);
- try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand, .{});
- break :result .{ .load_frame = .{ .index = frame_index } };
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airSlicePtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result = result: {
- const src_mcv = try self.resolveInst(ty_op.operand);
- const ptr_mcv: MCValue = switch (src_mcv) {
- .register_pair => |regs| .{ .register = regs[0] },
- else => src_mcv,
- };
- if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
- switch (src_mcv) {
- .register_pair => |regs| try self.freeValue(.{ .register = regs[1] }),
- else => {},
- }
- break :result ptr_mcv;
- }
-
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(self.typeOfIndex(inst), dst_mcv, ptr_mcv, .{});
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airSliceLen(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result = result: {
- const src_mcv = try self.resolveInst(ty_op.operand);
- const len_mcv: MCValue = switch (src_mcv) {
- .register_pair => |regs| .{ .register = regs[1] },
- .load_frame => |frame_addr| .{ .load_frame = .{
- .index = frame_addr.index,
- .off = frame_addr.off + 8,
- } },
- else => return self.fail("TODO implement slice_len for {f}", .{src_mcv}),
- };
- if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
- switch (src_mcv) {
- .register_pair => |regs| try self.freeValue(.{ .register = regs[0] }),
- .load_frame => {},
- else => unreachable,
- }
- break :result len_mcv;
- }
-
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(self.typeOfIndex(inst), dst_mcv, len_mcv, .{});
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const src_ty = self.typeOf(ty_op.operand);
- const src_mcv = try self.resolveInst(ty_op.operand);
- const src_reg = switch (src_mcv) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(src_ty, src_mcv),
- };
- const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
- defer self.register_manager.unlockReg(src_lock);
-
- const dst_ty = self.typeOfIndex(inst);
- const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_reg
- else
- try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- const dst_mcv = MCValue{ .register = dst_reg };
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const dst_abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu));
- try self.asmRegisterMemory(
- .{ ._, .lea },
- registerAlias(dst_reg, dst_abi_size),
- .{
- .base = .{ .reg = src_reg },
- .mod = .{ .rm = .{ .disp = 8 } },
- },
- );
-
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-fn airPtrSlicePtrPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const dst_ty = self.typeOfIndex(inst);
- const opt_mcv = try self.resolveInst(ty_op.operand);
-
- const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv))
- opt_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv);
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-fn elemOffset(self: *CodeGen, index_ty: Type, index: MCValue, elem_size: u64) !Register {
- const reg: Register = blk: {
- switch (index) {
- .immediate => |imm| {
- // Optimisation: if index MCValue is an immediate, we can multiply in `comptime`
- // and set the register directly to the scaled offset as an immediate.
- const reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size }, .{});
- break :blk reg;
- },
- else => {
- const reg = try self.copyToTmpRegister(index_ty, index);
- try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size });
- break :blk reg;
- },
- }
- };
- return reg;
-}
-
-fn genSliceElemPtr(self: *CodeGen, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
- const pt = self.pt;
- const zcu = pt.zcu;
- const slice_ty = self.typeOf(lhs);
- const slice_mcv = try self.resolveInst(lhs);
- const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock);
-
- const elem_ty = slice_ty.childType(zcu);
- const elem_size = elem_ty.abiSize(zcu);
- const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu);
-
- const index_ty = self.typeOf(rhs);
- const index_mcv = try self.resolveInst(rhs);
- const index_mcv_lock: ?RegisterLock = switch (index_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (index_mcv_lock) |lock| self.register_manager.unlockReg(lock);
-
- const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size);
- const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
- defer self.register_manager.unlockReg(offset_reg_lock);
-
- const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- try self.genSetReg(addr_reg, .usize, slice_mcv, .{});
- // TODO we could allocate register here, but need to expect addr register and potentially
- // offset register.
- try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{
- .register = offset_reg,
- });
- return MCValue{ .register = addr_reg.to64() };
-}
-
-fn airSliceElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-
- const result: MCValue = result: {
- const elem_ty = self.typeOfIndex(inst);
- if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
-
- const slice_ty = self.typeOf(bin_op.lhs);
- const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu);
- const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
- const dst_mcv = try self.allocRegOrMem(inst, false);
- try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airSliceElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
- const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs);
- return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
-}
-
-fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-
- const result: MCValue = result: {
- const array_ty = self.typeOf(bin_op.lhs);
- const elem_ty = array_ty.childType(zcu);
-
- const array_mcv = try self.resolveInst(bin_op.lhs);
- const array_lock: ?RegisterLock = switch (array_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (array_lock) |lock| self.register_manager.unlockReg(lock);
-
- const index_ty = self.typeOf(bin_op.rhs);
- const index_mcv = try self.resolveInst(bin_op.rhs);
- const index_lock = switch (index_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
-
- try self.spillEflagsIfOccupied();
- if (array_ty.isVector(zcu) and elem_ty.bitSize(zcu) == 1) {
- const array_mat_mcv: MCValue = switch (array_mcv) {
- else => array_mcv,
- .register_mask => .{ .register = try self.copyToTmpRegister(array_ty, array_mcv) },
- };
- const array_mat_lock = switch (array_mat_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (array_mat_lock) |lock| self.register_manager.unlockReg(lock);
-
- switch (array_mat_mcv) {
- .register => |array_reg| switch (array_reg.class()) {
- .general_purpose => switch (index_mcv) {
- .immediate => |index_imm| try self.asmRegisterImmediate(
- .{ ._, .bt },
- array_reg.to64(),
- .u(index_imm),
- ),
- else => try self.asmRegisterRegister(
- .{ ._, .bt },
- array_reg.to64(),
- switch (index_mcv) {
- .register => |index_reg| index_reg,
- else => try self.copyToTmpRegister(index_ty, index_mcv),
- }.to64(),
- ),
- },
- .sse => {
- const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu));
- try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mat_mcv, .{});
- switch (index_mcv) {
- .immediate => |index_imm| try self.asmMemoryImmediate(
- .{ ._, .bt },
- .{
- .base = .{ .frame = frame_index },
- .mod = .{ .rm = .{
- .size = .qword,
- .disp = @intCast(index_imm / 64 * 8),
- } },
- },
- .u(index_imm % 64),
- ),
- else => try self.asmMemoryRegister(
- .{ ._, .bt },
- .{
- .base = .{ .frame = frame_index },
- .mod = .{ .rm = .{ .size = .qword } },
- },
- switch (index_mcv) {
- .register => |index_reg| index_reg,
- else => try self.copyToTmpRegister(index_ty, index_mcv),
- }.to64(),
- ),
- }
- },
- else => unreachable,
- },
- .load_frame => switch (index_mcv) {
- .immediate => |index_imm| try self.asmMemoryImmediate(
- .{ ._, .bt },
- try array_mat_mcv.mem(self, .{
- .size = .qword,
- .disp = @intCast(index_imm / 64 * 8),
- }),
- .u(index_imm % 64),
- ),
- else => try self.asmMemoryRegister(
- .{ ._, .bt },
- try array_mat_mcv.mem(self, .{ .size = .qword }),
- switch (index_mcv) {
- .register => |index_reg| index_reg,
- else => try self.copyToTmpRegister(index_ty, index_mcv),
- }.to64(),
- ),
- },
- .memory,
- .load_nav,
- .load_uav,
- .load_lazy_sym,
- .load_extern_func,
- => switch (index_mcv) {
- .immediate => |index_imm| try self.asmMemoryImmediate(
- .{ ._, .bt },
- .{
- .base = .{
- .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()),
- },
- .mod = .{ .rm = .{
- .size = .qword,
- .disp = @intCast(index_imm / 64 * 8),
- } },
- },
- .u(index_imm % 64),
- ),
- else => try self.asmMemoryRegister(
- .{ ._, .bt },
- .{
- .base = .{
- .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()),
- },
- .mod = .{ .rm = .{ .size = .qword } },
- },
- switch (index_mcv) {
- .register => |index_reg| index_reg,
- else => try self.copyToTmpRegister(index_ty, index_mcv),
- }.to64(),
- ),
- },
- else => return self.fail("TODO airArrayElemVal for {s} of {f}", .{
- @tagName(array_mat_mcv), array_ty.fmt(pt),
- }),
- }
-
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- try self.asmSetccRegister(.c, dst_reg.to8());
- break :result .{ .register = dst_reg };
- }
-
- const elem_abi_size = elem_ty.abiSize(zcu);
- const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
- defer self.register_manager.unlockReg(addr_lock);
-
- switch (array_mcv) {
- .register => {
- const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu));
- try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{});
- try self.asmRegisterMemory(
- .{ ._, .lea },
- addr_reg,
- .{ .base = .{ .frame = frame_index } },
- );
- },
- .load_frame => |frame_addr| try self.asmRegisterMemory(
- .{ ._, .lea },
- addr_reg,
- .{
- .base = .{ .frame = frame_addr.index },
- .mod = .{ .rm = .{ .disp = frame_addr.off } },
- },
- ),
- .memory,
- .load_nav,
- .lea_nav,
- .load_uav,
- .lea_uav,
- .load_lazy_sym,
- .lea_lazy_sym,
- .load_extern_func,
- .lea_extern_func,
- => try self.genSetReg(addr_reg, .usize, array_mcv.address(), .{}),
- else => return self.fail("TODO airArrayElemVal_val for {s} of {f}", .{
- @tagName(array_mcv), array_ty.fmt(pt),
- }),
- }
-
- const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
- const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
- defer self.register_manager.unlockReg(offset_lock);
-
- // TODO we could allocate register here, but need to expect addr register and potentially
- // offset register.
- const dst_mcv = try self.allocRegOrMem(inst, false);
- try self.genBinOpMir(.{ ._, .add }, .usize, .{ .register = addr_reg }, .{ .register = offset_reg });
- try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }, .{});
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airPtrElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const ptr_ty = self.typeOf(bin_op.lhs);
-
- // this is identical to the `airPtrElemPtr` codegen expect here an
- // additional `mov` is needed at the end to get the actual value
-
- const result = result: {
- const elem_ty = ptr_ty.elemType2(zcu);
- if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
-
- const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
- const index_ty = self.typeOf(bin_op.rhs);
- const index_mcv = try self.resolveInst(bin_op.rhs);
- const index_lock = switch (index_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
-
- const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
- const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
- defer self.register_manager.unlockReg(offset_lock);
-
- const ptr_mcv = try self.resolveInst(bin_op.lhs);
- const elem_ptr_reg = if (ptr_mcv.isRegister() and self.liveness.operandDies(inst, 0))
- ptr_mcv.register
- else
- try self.copyToTmpRegister(ptr_ty, ptr_mcv);
- const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg);
- defer self.register_manager.unlockReg(elem_ptr_lock);
- try self.asmRegisterRegister(
- .{ ._, .add },
- elem_ptr_reg,
- offset_reg,
- );
-
- const dst_mcv = try self.allocRegOrMem(inst, true);
- const dst_lock = switch (dst_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
- try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg });
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airPtrElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
-
- const result = result: {
- const elem_ptr_ty = self.typeOfIndex(inst);
- const base_ptr_ty = self.typeOf(extra.lhs);
-
- const base_ptr_mcv = try self.resolveInst(extra.lhs);
- const base_ptr_lock: ?RegisterLock = switch (base_ptr_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (base_ptr_lock) |lock| self.register_manager.unlockReg(lock);
-
- if (elem_ptr_ty.ptrInfo(zcu).flags.vector_index != .none) {
- break :result if (self.reuseOperand(inst, extra.lhs, 0, base_ptr_mcv))
- base_ptr_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv);
- }
-
- const elem_ty = base_ptr_ty.elemType2(zcu);
- const elem_abi_size = elem_ty.abiSize(zcu);
- const index_ty = self.typeOf(extra.rhs);
- const index_mcv = try self.resolveInst(extra.rhs);
- const index_lock: ?RegisterLock = switch (index_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
-
- const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size);
- const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
- defer self.register_manager.unlockReg(offset_reg_lock);
-
- const dst_mcv = try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv);
- try self.genBinOpMir(.{ ._, .add }, elem_ptr_ty, dst_mcv, .{ .register = offset_reg });
-
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none });
-}
-
-fn airSetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const ptr_union_ty = self.typeOf(bin_op.lhs);
- const union_ty = ptr_union_ty.childType(zcu);
- const tag_ty = self.typeOf(bin_op.rhs);
- const layout = union_ty.unionGetLayout(zcu);
-
- if (layout.tag_size == 0) {
- return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
- }
-
- const ptr = try self.resolveInst(bin_op.lhs);
- const ptr_lock: ?RegisterLock = switch (ptr) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
-
- const tag = try self.resolveInst(bin_op.rhs);
- const tag_lock: ?RegisterLock = switch (tag) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (tag_lock) |lock| self.register_manager.unlockReg(lock);
-
- const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align.compare(.lt, layout.payload_align)) blk: {
- // TODO reusing the operand
- const reg = try self.copyToTmpRegister(ptr_union_ty, ptr);
- try self.genBinOpMir(
- .{ ._, .add },
- ptr_union_ty,
- .{ .register = reg },
- .{ .immediate = layout.payload_size },
- );
- break :blk MCValue{ .register = reg };
- } else ptr;
-
- const ptr_tag_ty = try pt.adjustPtrTypeChild(ptr_union_ty, tag_ty);
- try self.store(ptr_tag_ty, adjusted_ptr, tag, .{});
-
- return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airGetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void {
- const zcu = self.pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const tag_ty = self.typeOfIndex(inst);
- const union_ty = self.typeOf(ty_op.operand);
- const layout = union_ty.unionGetLayout(zcu);
-
- if (layout.tag_size == 0) {
- return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none });
- }
-
- // TODO reusing the operand
- const operand = try self.resolveInst(ty_op.operand);
- const operand_lock: ?RegisterLock = switch (operand) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
-
- const tag_abi_size = tag_ty.abiSize(zcu);
- const dst_mcv: MCValue = blk: {
- switch (operand) {
- .load_frame => |frame_addr| {
- if (tag_abi_size <= 8) {
- const off: i32 = @intCast(layout.tagOffset());
- break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{
- .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
- });
- }
-
- return self.fail(
- "TODO implement get_union_tag for ABI larger than 8 bytes and operand {f}",
- .{operand},
- );
- },
- .register => {
- const shift: u6 = @intCast(layout.tagOffset() * 8);
- const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand);
- try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, result, .u8, .{ .immediate = shift });
- break :blk MCValue{
- .register = registerAlias(result.register, @intCast(layout.tag_size)),
- };
- },
- else => return self.fail("TODO implement get_union_tag for {f}", .{operand}),
- }
- };
-
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result = result: {
- try self.spillEflagsIfOccupied();
-
- const dst_ty = self.typeOfIndex(inst);
- const src_ty = self.typeOf(ty_op.operand);
- if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airClz for {f}", .{
- src_ty.fmt(pt),
- });
-
- const src_mcv = try self.resolveInst(ty_op.operand);
- const mat_src_mcv = switch (src_mcv) {
- .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
- else => src_mcv,
- };
- const mat_src_lock = switch (mat_src_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- const dst_mcv = MCValue{ .register = dst_reg };
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- const abi_size: u31 = @intCast(src_ty.abiSize(zcu));
- const src_bits: u31 = @intCast(src_ty.bitSize(zcu));
- const has_lzcnt = self.hasFeature(.lzcnt);
- if (src_bits > @as(u32, if (has_lzcnt) 128 else 64)) {
- const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) {
- .load_frame => |src_frame_addr| src_frame_addr,
- else => {
- const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu));
- try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{});
- break :src_frame_addr .{ .index = src_frame_addr };
- },
- };
-
- const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
- const extra_bits = abi_size * 8 - src_bits;
-
- const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
- defer self.register_manager.unlockReg(index_lock);
-
- try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .u(limbs_len));
- switch (extra_bits) {
- 1 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()),
- else => try self.asmRegisterImmediate(
- .{ ._, .mov },
- dst_reg.to32(),
- .s(@as(i32, extra_bits) - 1),
- ),
- }
- const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
- try self.asmRegisterRegister(.{ ._, .@"test" }, index_reg.to32(), index_reg.to32());
- const zero = try self.asmJccReloc(.z, undefined);
- if (self.hasFeature(.slow_incdec)) {
- try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), .u(1));
- } else {
- try self.asmRegister(.{ ._c, .de }, index_reg.to32());
- }
- try self.asmMemoryImmediate(.{ ._, .cmp }, .{
- .base = .{ .frame = src_frame_addr.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = index_reg.to64(),
- .scale = .@"8",
- .disp = src_frame_addr.off,
- } },
- }, .u(0));
- _ = try self.asmJccReloc(.e, loop);
- try self.asmRegisterMemory(.{ ._r, .bs }, dst_reg.to64(), .{
- .base = .{ .frame = src_frame_addr.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = index_reg.to64(),
- .scale = .@"8",
- .disp = src_frame_addr.off,
- } },
- });
- self.performReloc(zero);
- try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6));
- try self.asmRegisterRegister(.{ ._, .add }, index_reg.to32(), dst_reg.to32());
- try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(src_bits - 1));
- try self.asmRegisterRegister(.{ ._, .sub }, dst_reg.to32(), index_reg.to32());
- break :result dst_mcv;
- }
-
- if (has_lzcnt) {
- if (src_bits <= 8) {
- const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
- try self.truncateRegister(src_ty, wide_reg);
- try self.genBinOpMir(.{ ._, .lzcnt }, .u32, dst_mcv, .{ .register = wide_reg });
- try self.genBinOpMir(
- .{ ._, .sub },
- dst_ty,
- dst_mcv,
- .{ .immediate = 32 - src_bits },
- );
- } else if (src_bits <= 64) {
- try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv);
- const extra_bits = self.regExtraBits(src_ty);
- if (extra_bits > 0) {
- try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits });
- }
- } else {
- assert(src_bits <= 128);
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_mcv = MCValue{ .register = tmp_reg };
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- try self.genBinOpMir(.{ ._, .lzcnt }, .u64, dst_mcv, if (mat_src_mcv.isBase())
- mat_src_mcv
- else
- .{ .register = mat_src_mcv.register_pair[0] });
- try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
- try self.genBinOpMir(.{ ._, .lzcnt }, .u64, tmp_mcv, if (mat_src_mcv.isBase())
- mat_src_mcv.address().offset(8).deref()
- else
- .{ .register = mat_src_mcv.register_pair[1] });
- try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32());
-
- if (src_bits < 128) try self.genBinOpMir(
- .{ ._, .sub },
- dst_ty,
- dst_mcv,
- .{ .immediate = 128 - src_bits },
- );
- }
- break :result dst_mcv;
- }
-
- assert(src_bits <= 64);
- const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2);
- if (std.math.isPowerOfTwo(src_bits)) {
- const imm_reg = try self.copyToTmpRegister(dst_ty, .{
- .immediate = src_bits ^ (src_bits - 1),
- });
- const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
- defer self.register_manager.unlockReg(imm_lock);
-
- if (src_bits <= 8) {
- const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
- const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
- defer self.register_manager.unlockReg(wide_lock);
-
- try self.truncateRegister(src_ty, wide_reg);
- try self.genBinOpMir(.{ ._r, .bs }, .u16, dst_mcv, .{ .register = wide_reg });
- } else try self.genBinOpMir(.{ ._r, .bs }, src_ty, dst_mcv, mat_src_mcv);
-
- try self.asmCmovccRegisterRegister(
- .z,
- registerAlias(dst_reg, cmov_abi_size),
- registerAlias(imm_reg, cmov_abi_size),
- );
-
- try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 });
- } else {
- const imm_reg = try self.copyToTmpRegister(dst_ty, .{
- .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)),
- });
- const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
- defer self.register_manager.unlockReg(imm_lock);
-
- const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
- const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
- defer self.register_manager.unlockReg(wide_lock);
-
- try self.truncateRegister(src_ty, wide_reg);
- try self.genBinOpMir(
- .{ ._r, .bs },
- if (src_bits <= 8) .u16 else src_ty,
- dst_mcv,
- .{ .register = wide_reg },
- );
-
- try self.asmCmovccRegisterRegister(
- .nz,
- registerAlias(imm_reg, cmov_abi_size),
- registerAlias(dst_reg, cmov_abi_size),
- );
-
- try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }, .{});
- try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg });
- }
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result = result: {
- try self.spillEflagsIfOccupied();
-
- const dst_ty = self.typeOfIndex(inst);
- const src_ty = self.typeOf(ty_op.operand);
- if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airCtz for {f}", .{
- src_ty.fmt(pt),
- });
-
- const src_mcv = try self.resolveInst(ty_op.operand);
- const mat_src_mcv = switch (src_mcv) {
- .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
- else => src_mcv,
- };
- const mat_src_lock = switch (mat_src_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- const dst_mcv = MCValue{ .register = dst_reg };
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const abi_size: u31 = @intCast(src_ty.abiSize(zcu));
- const src_bits: u31 = @intCast(src_ty.bitSize(zcu));
- const has_bmi = self.hasFeature(.bmi);
- if (src_bits > @as(u32, if (has_bmi) 128 else 64)) {
- const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) {
- .load_frame => |src_frame_addr| src_frame_addr,
- else => {
- const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu));
- try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{});
- break :src_frame_addr .{ .index = src_frame_addr };
- },
- };
-
- const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
- const extra_bits = abi_size * 8 - src_bits;
-
- const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const index_lock = self.register_manager.lockRegAssumeUnused(index_reg);
- defer self.register_manager.unlockReg(index_lock);
-
- try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .s(-1));
- switch (extra_bits) {
- 0 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()),
- 1 => try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to32(), dst_reg.to32()),
- else => try self.asmRegisterImmediate(
- .{ ._, .mov },
- dst_reg.to32(),
- .s(-@as(i32, extra_bits)),
- ),
- }
- const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
- if (self.hasFeature(.slow_incdec)) {
- try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1));
- } else {
- try self.asmRegister(.{ ._c, .in }, index_reg.to32());
- }
- try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len));
- const zero = try self.asmJccReloc(.nb, undefined);
- try self.asmMemoryImmediate(.{ ._, .cmp }, .{
- .base = .{ .frame = src_frame_addr.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = index_reg.to64(),
- .scale = .@"8",
- .disp = src_frame_addr.off,
- } },
- }, .u(0));
- _ = try self.asmJccReloc(.e, loop);
- try self.asmRegisterMemory(.{ ._f, .bs }, dst_reg.to64(), .{
- .base = .{ .frame = src_frame_addr.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = index_reg.to64(),
- .scale = .@"8",
- .disp = src_frame_addr.off,
- } },
- });
- self.performReloc(zero);
- try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6));
- try self.asmRegisterRegister(.{ ._, .add }, dst_reg.to32(), index_reg.to32());
- break :result dst_mcv;
- }
-
- const wide_ty: Type = if (src_bits <= 8) .u16 else src_ty;
- if (has_bmi) {
- if (src_bits <= 64) {
- const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0);
- const masked_mcv = if (extra_bits > 0) masked: {
- const tmp_mcv = tmp: {
- if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
- break :tmp src_mcv;
- try self.genSetReg(dst_reg, wide_ty, src_mcv, .{});
- break :tmp dst_mcv;
- };
- try self.genBinOpMir(
- .{ ._, .@"or" },
- wide_ty,
- tmp_mcv,
- .{ .immediate = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - extra_bits)) <<
- @intCast(src_bits) },
- );
- break :masked tmp_mcv;
- } else mat_src_mcv;
- try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv);
- } else {
- assert(src_bits <= 128);
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_mcv = MCValue{ .register = tmp_reg };
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isBase())
- mat_src_mcv
- else
- .{ .register = mat_src_mcv.register_pair[0] };
- const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isBase())
- mat_src_mcv.address().offset(8).deref()
- else
- .{ .register = mat_src_mcv.register_pair[1] };
- const masked_mcv = if (src_bits < 128) masked: {
- try self.genCopy(.u64, dst_mcv, hi_mat_src_mcv, .{});
- try self.genBinOpMir(
- .{ ._, .@"or" },
- .u64,
- dst_mcv,
- .{ .immediate = @as(u64, std.math.maxInt(u64)) << @intCast(src_bits - 64) },
- );
- break :masked dst_mcv;
- } else hi_mat_src_mcv;
- try self.genBinOpMir(.{ ._, .tzcnt }, .u64, dst_mcv, masked_mcv);
- try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 });
- try self.genBinOpMir(.{ ._, .tzcnt }, .u64, tmp_mcv, lo_mat_src_mcv);
- try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32());
- }
- break :result dst_mcv;
- }
-
- assert(src_bits <= 64);
- const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
- const width_lock = self.register_manager.lockRegAssumeUnused(width_reg);
- defer self.register_manager.unlockReg(width_lock);
-
- if (src_bits <= 8 or !std.math.isPowerOfTwo(src_bits)) {
- const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
- const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
- defer self.register_manager.unlockReg(wide_lock);
-
- try self.truncateRegister(src_ty, wide_reg);
- try self.genBinOpMir(.{ ._f, .bs }, wide_ty, dst_mcv, .{ .register = wide_reg });
- } else try self.genBinOpMir(.{ ._f, .bs }, src_ty, dst_mcv, mat_src_mcv);
-
- const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2);
- try self.asmCmovccRegisterRegister(
- .z,
- registerAlias(dst_reg, cmov_abi_size),
- registerAlias(width_reg, cmov_abi_size),
- );
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result: MCValue = result: {
- try self.spillEflagsIfOccupied();
-
- const src_ty = self.typeOf(ty_op.operand);
- const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
- if (src_ty.zigTypeTag(zcu) == .vector or src_abi_size > 16)
- return self.fail("TODO implement airPopCount for {f}", .{src_ty.fmt(pt)});
- const src_mcv = try self.resolveInst(ty_op.operand);
-
- const mat_src_mcv = switch (src_mcv) {
- .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
- else => src_mcv,
- };
- const mat_src_lock = switch (mat_src_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
-
- if (src_abi_size <= 8) {
- const dst_contains_src =
- src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv);
- const dst_reg = if (dst_contains_src)
- src_mcv.getReg().?
- else
- try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- try self.genPopCount(dst_reg, src_ty, mat_src_mcv, dst_contains_src);
- break :result .{ .register = dst_reg };
- }
-
- assert(src_abi_size > 8 and src_abi_size <= 16);
- const tmp_regs = try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
- const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs);
- defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
-
- try self.genPopCount(tmp_regs[0], .usize, if (mat_src_mcv.isBase())
- mat_src_mcv
- else
- .{ .register = mat_src_mcv.register_pair[0] }, false);
- const src_info = src_ty.intInfo(zcu);
- const hi_ty = try pt.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1);
- try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isBase())
- mat_src_mcv.address().offset(8).deref()
- else
- .{ .register = mat_src_mcv.register_pair[1] }, false);
- try self.asmRegisterRegister(.{ ._, .add }, tmp_regs[0].to8(), tmp_regs[1].to8());
- break :result .{ .register = tmp_regs[0] };
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn genPopCount(
- self: *CodeGen,
- dst_reg: Register,
- src_ty: Type,
- src_mcv: MCValue,
- dst_contains_src: bool,
-) !void {
- const pt = self.pt;
-
- const src_abi_size: u32 = @intCast(src_ty.abiSize(pt.zcu));
- if (self.hasFeature(.popcnt)) return self.genBinOpMir(
- .{ ._, .popcnt },
- if (src_abi_size > 1) src_ty else .u32,
- .{ .register = dst_reg },
- if (src_abi_size > 1) src_mcv else src: {
- if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv, .{});
- try self.truncateRegister(try src_ty.toUnsigned(pt), dst_reg);
- break :src .{ .register = dst_reg };
- },
- );
-
- const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8);
- const imm_0_1: Immediate = .u(mask / 0b1_1);
- const imm_00_11: Immediate = .u(mask / 0b01_01);
- const imm_0000_1111: Immediate = .u(mask / 0b0001_0001);
- const imm_0000_0001: Immediate = .u(mask / 0b1111_1111);
-
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const dst = registerAlias(dst_reg, src_abi_size);
- const tmp = registerAlias(tmp_reg, src_abi_size);
- const imm = if (src_abi_size > 4)
- try self.register_manager.allocReg(null, abi.RegisterClass.gp)
- else
- undefined;
-
- if (!dst_contains_src) try self.genSetReg(dst, src_ty, src_mcv, .{});
- // dst = operand
- try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
- // tmp = operand
- try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1));
- // tmp = operand >> 1
- if (src_abi_size > 4) {
- try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
- try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
- } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
- // tmp = (operand >> 1) & 0x55...55
- try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp);
- // dst = temp1 = operand - ((operand >> 1) & 0x55...55)
- try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
- // tmp = temp1
- try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2));
- // dst = temp1 >> 2
- if (src_abi_size > 4) {
- try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
- try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
- try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
- } else {
- try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
- try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
- }
- // tmp = temp1 & 0x33...33
- // dst = (temp1 >> 2) & 0x33...33
- try self.asmRegisterRegister(.{ ._, .add }, tmp, dst);
- // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33)
- try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
- // dst = temp2
- try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(4));
- // tmp = temp2 >> 4
- try self.asmRegisterRegister(.{ ._, .add }, dst, tmp);
- // dst = temp2 + (temp2 >> 4)
- if (src_abi_size > 4) {
- try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
- try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001);
- try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
- try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp);
- } else {
- try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
- if (src_abi_size > 1) {
- try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001);
- }
- }
- // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f
- // dst = temp3 * 0x01...01
- if (src_abi_size > 1) {
- try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u((src_abi_size - 1) * 8));
- }
- // dst = (temp3 * 0x01...01) >> (bits - 8)
-}
-
-fn genByteSwap(
+fn genUnwrapErrUnionPayloadMir(
self: *CodeGen,
- inst: Air.Inst.Index,
- src_ty: Type,
- src_mcv: MCValue,
- mem_ok: bool,
+ maybe_inst: ?Air.Inst.Index,
+ err_union_ty: Type,
+ err_union: MCValue,
) !MCValue {
const pt = self.pt;
const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const has_movbe = self.hasFeature(.movbe);
-
- if (src_ty.zigTypeTag(zcu) == .vector) return self.fail(
- "TODO implement genByteSwap for {f}",
- .{src_ty.fmt(pt)},
- );
-
- const src_lock = switch (src_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const abi_size: u32 = @intCast(src_ty.abiSize(zcu));
- switch (abi_size) {
- 0 => unreachable,
- 1 => return if ((mem_ok or src_mcv.isRegister()) and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv),
- 2 => if ((mem_ok or src_mcv.isRegister()) and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- {
- try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 });
- return src_mcv;
- },
- 3...8 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
- try self.genUnOpMir(.{ .b_, .swap }, src_ty, src_mcv);
- return src_mcv;
- },
- 9...16 => {
- const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) {
- .register => {
- const frame_index = try self.allocFrameIndex(.initSpill(src_ty, zcu));
- try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{});
- break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } };
- },
- .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
- for (src_regs) |src_reg| try self.asmRegister(.{ .b_, .swap }, src_reg.to64());
- return .{ .register_pair = .{ src_regs[1], src_regs[0] } };
- } else src_mcv,
- else => src_mcv,
- };
-
- const dst_regs =
- try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
- const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
- defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
-
- for (dst_regs, 0..) |dst_reg, limb_index| {
- if (mat_src_mcv.isBase()) {
- try self.asmRegisterMemory(
- .{ if (has_movbe) ._be else ._, .mov },
- dst_reg.to64(),
- try mat_src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }),
- );
- if (!has_movbe) try self.asmRegister(.{ .b_, .swap }, dst_reg.to64());
- } else {
- try self.asmRegisterRegister(
- .{ ._, .mov },
- dst_reg.to64(),
- mat_src_mcv.register_pair[limb_index].to64(),
- );
- try self.asmRegister(.{ .b_, .swap }, dst_reg.to64());
- }
- }
- return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } };
- },
- else => {
- const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
-
- const temp_regs =
- try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp);
- const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs);
- defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
-
- const dst_mcv = try self.allocRegOrMem(inst, false);
- try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32());
- try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1));
-
- const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
- try self.asmRegisterMemory(
- .{ if (has_movbe) ._be else ._, .mov },
- temp_regs[2].to64(),
- .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[0].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off,
- } },
- },
- );
- try self.asmRegisterMemory(
- .{ if (has_movbe) ._be else ._, .mov },
- temp_regs[3].to64(),
- .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[1].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off,
- } },
- },
- );
- if (!has_movbe) {
- try self.asmRegister(.{ .b_, .swap }, temp_regs[2].to64());
- try self.asmRegister(.{ .b_, .swap }, temp_regs[3].to64());
- }
- try self.asmMemoryRegister(.{ ._, .mov }, .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[0].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off,
- } },
- }, temp_regs[3].to64());
- try self.asmMemoryRegister(.{ ._, .mov }, .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = temp_regs[1].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off,
- } },
- }, temp_regs[2].to64());
- if (self.hasFeature(.slow_incdec)) {
- try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
- try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1));
- } else {
- try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32());
- try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32());
- }
- try self.asmRegisterRegister(.{ ._, .cmp }, temp_regs[0].to32(), temp_regs[1].to32());
- _ = try self.asmJccReloc(.be, loop);
- return dst_mcv;
- },
- }
-
- const dst_mcv: MCValue = if (mem_ok and has_movbe and src_mcv.isRegister())
- try self.allocRegOrMem(inst, true)
- else
- .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) };
- if (dst_mcv.getReg()) |dst_reg| {
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
- defer self.register_manager.unlockReg(dst_lock);
-
- try self.genSetReg(dst_reg, src_ty, src_mcv, .{});
- switch (abi_size) {
- else => unreachable,
- 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
- 3...8 => try self.genUnOpMir(.{ .b_, .swap }, src_ty, dst_mcv),
- }
- } else try self.genBinOpMir(.{ ._be, .mov }, src_ty, dst_mcv, src_mcv);
- return dst_mcv;
-}
-
-fn airByteSwap(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const src_ty = self.typeOf(ty_op.operand);
- const src_bits: u32 = @intCast(src_ty.bitSize(zcu));
- const src_mcv = try self.resolveInst(ty_op.operand);
-
- const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, true);
- try self.genShiftBinOpMir(
- .{ ._r, switch (if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned) {
- .signed => .sa,
- .unsigned => .sh,
- } },
- src_ty,
- dst_mcv,
- if (src_bits > 256) .u16 else .u8,
- .{ .immediate = src_ty.abiSize(zcu) * 8 - src_bits },
- );
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const src_ty = self.typeOf(ty_op.operand);
- const abi_size: u32 = @intCast(src_ty.abiSize(zcu));
- const bit_size: u32 = @intCast(src_ty.bitSize(zcu));
- const src_mcv = try self.resolveInst(ty_op.operand);
-
- const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, false);
- const dst_locks: [2]?RegisterLock = switch (dst_mcv) {
- .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null },
- .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs),
- else => unreachable,
- };
- defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const limb_abi_size: u32 = @min(abi_size, 8);
- const tmp = registerAlias(tmp_reg, limb_abi_size);
- const imm = if (limb_abi_size > 4)
- try self.register_manager.allocReg(null, abi.RegisterClass.gp)
- else
- undefined;
-
- const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_size * 8);
- const imm_0000_1111: Immediate = .u(mask / 0b0001_0001);
- const imm_00_11: Immediate = .u(mask / 0b01_01);
- const imm_0_1: Immediate = .u(mask / 0b1_1);
-
- for (dst_mcv.getRegs()) |dst_reg| {
- const dst = registerAlias(dst_reg, limb_abi_size);
-
- // dst = temp1 = bswap(operand)
- try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
- // tmp = temp1
- try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(4));
- // dst = temp1 >> 4
- if (limb_abi_size > 4) {
- try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111);
- try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
- try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
- } else {
- try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111);
- try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
- }
- // tmp = temp1 & 0x0f...0f
- // dst = (temp1 >> 4) & 0x0f...0f
- try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, .u(4));
- // tmp = (temp1 & 0x0f...0f) << 4
- try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp);
- // dst = temp2 = ((temp1 >> 4) & 0x0f...0f) | ((temp1 & 0x0f...0f) << 4)
- try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
- // tmp = temp2
- try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2));
- // dst = temp2 >> 2
- if (limb_abi_size > 4) {
- try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11);
- try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
- try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
- } else {
- try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11);
- try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11);
- }
- // tmp = temp2 & 0x33...33
- // dst = (temp2 >> 2) & 0x33...33
- try self.asmRegisterMemory(
- .{ ._, .lea },
- if (limb_abi_size > 4) tmp.to64() else tmp.to32(),
- .{
- .base = .{ .reg = dst.to64() },
- .mod = .{ .rm = .{
- .index = tmp.to64(),
- .scale = .@"4",
- } },
- },
- );
- // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2)
- try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp);
- // dst = temp3
- try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1));
- // tmp = temp3 >> 1
- if (limb_abi_size > 4) {
- try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1);
- try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm);
- try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm);
- } else {
- try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1);
- try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1);
- }
- // dst = temp3 & 0x55...55
- // tmp = (temp3 >> 1) & 0x55...55
- try self.asmRegisterMemory(
- .{ ._, .lea },
- if (limb_abi_size > 4) dst.to64() else dst.to32(),
- .{
- .base = .{ .reg = tmp.to64() },
- .mod = .{ .rm = .{
- .index = dst.to64(),
- .scale = .@"2",
- } },
- },
- );
- // dst = ((temp3 >> 1) & 0x55...55) + ((temp3 & 0x55...55) << 1)
- }
-
- const extra_bits = abi_size * 8 - bit_size;
- const signedness: std.builtin.Signedness =
- if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned;
- if (extra_bits > 0) try self.genShiftBinOpMir(switch (signedness) {
- .signed => .{ ._r, .sa },
- .unsigned => .{ ._r, .sh },
- }, src_ty, dst_mcv, .u8, .{ .immediate = extra_bits });
-
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
-}
-
-fn floatSign(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag, operand: Air.Inst.Ref, ty: Type) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
-
- const result = result: {
- const scalar_bits = ty.scalarType(zcu).floatBits(self.target);
- if (scalar_bits == 80) {
- if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement floatSign for {f}", .{
- ty.fmt(pt),
- });
-
- const src_mcv = try self.resolveInst(operand);
- const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
- defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const dst_mcv: MCValue = .{ .register = .st0 };
- if (!std.meta.eql(src_mcv, dst_mcv) or !self.reuseOperand(inst, operand, 0, src_mcv))
- try self.register_manager.getKnownReg(.st0, inst);
-
- try self.genCopy(ty, dst_mcv, src_mcv, .{});
- switch (tag) {
- .neg => try self.asmOpOnly(.{ .f_, .chs }),
- .abs => try self.asmOpOnly(.{ .f_, .abs }),
- else => unreachable,
- }
- break :result dst_mcv;
- }
-
- const abi_size: u32 = switch (ty.abiSize(zcu)) {
- 1...16 => 16,
- 17...32 => 32,
- else => return self.fail("TODO implement floatSign for {f}", .{
- ty.fmt(pt),
- }),
- };
-
- const src_mcv = try self.resolveInst(operand);
- const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
- defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const dst_mcv: MCValue = if (src_mcv.isRegister() and
- self.reuseOperand(inst, operand, 0, src_mcv))
- src_mcv
- else if (self.hasFeature(.avx))
- .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
- else
- try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?;
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+ const payload_ty = err_union_ty.errorUnionPayload(zcu);
- const vec_ty = try pt.vectorType(.{
- .len = @divExact(abi_size * 8, scalar_bits),
- .child = (try pt.intType(.signed, scalar_bits)).ip_index,
- });
+ const result: MCValue = result: {
+ if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
- const sign_mcv = try self.lowerValue(switch (tag) {
- .neg => try vec_ty.minInt(pt, vec_ty),
- .abs => try vec_ty.maxInt(pt, vec_ty),
- else => unreachable,
- });
- const sign_mem: Memory = if (sign_mcv.isBase())
- try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) })
- else
- .{
- .base = .{ .reg = try self.copyToTmpRegister(.usize, sign_mcv.address()) },
- .mod = .{ .rm = .{ .size = .fromSize(abi_size) } },
- };
+ const payload_off: u31 = @intCast(codegen.errUnionPayloadOffset(payload_ty, zcu));
+ switch (err_union) {
+ .load_frame => |frame_addr| break :result .{ .load_frame = .{
+ .index = frame_addr.index,
+ .off = frame_addr.off + payload_off,
+ } },
+ .register => |reg| {
+ // TODO reuse operand
+ const eu_lock = self.register_manager.lockReg(reg);
+ defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
- if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
- switch (scalar_bits) {
- 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
- .neg => .{ .vp_, .xor },
- .abs => .{ .vp_, .@"and" },
- else => unreachable,
- } else switch (tag) {
- .neg => .{ .v_ps, .xor },
- .abs => .{ .v_ps, .@"and" },
- else => unreachable,
- },
- 32 => switch (tag) {
- .neg => .{ .v_ps, .xor },
- .abs => .{ .v_ps, .@"and" },
- else => unreachable,
- },
- 64 => switch (tag) {
- .neg => .{ .v_pd, .xor },
- .abs => .{ .v_pd, .@"and" },
- else => unreachable,
- },
- 80 => return self.fail("TODO implement floatSign for {f}", .{ty.fmt(pt)}),
- else => unreachable,
- },
- registerAlias(dst_reg, abi_size),
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv), abi_size),
- sign_mem,
- ) else try self.asmRegisterMemory(
- switch (scalar_bits) {
- 16, 128 => switch (tag) {
- .neg => .{ .p_, .xor },
- .abs => .{ .p_, .@"and" },
- else => unreachable,
- },
- 32 => switch (tag) {
- .neg => .{ ._ps, .xor },
- .abs => .{ ._ps, .@"and" },
- else => unreachable,
- },
- 64 => switch (tag) {
- .neg => .{ ._pd, .xor },
- .abs => .{ ._pd, .@"and" },
- else => unreachable,
- },
- 80 => return self.fail("TODO implement floatSign for {f}", .{ty.fmt(pt)}),
- else => unreachable,
+ const payload_in_gp = self.regSetForType(payload_ty).supersetOf(abi.RegisterClass.gp);
+ const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null)
+ try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union)
+ else
+ .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
+ if (payload_off > 0) try self.genShiftBinOpMir(
+ .{ ._r, .sh },
+ err_union_ty,
+ result_mcv,
+ .u8,
+ .{ .immediate = @as(u6, @intCast(payload_off * 8)) },
+ ) else try self.truncateRegister(payload_ty, result_mcv.register);
+ break :result if (payload_in_gp)
+ result_mcv
+ else if (maybe_inst) |inst|
+ try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv)
+ else
+ .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) };
},
- registerAlias(dst_reg, abi_size),
- sign_mem,
- );
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ operand, .none, .none });
-}
-
-fn airFloatSign(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
- const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
- const ty = self.typeOf(un_op);
- return self.floatSign(inst, tag, un_op, ty);
-}
-
-fn airRound(self: *CodeGen, inst: Air.Inst.Index, mode: bits.RoundMode) !void {
- const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
- const ty = self.typeOf(un_op);
-
- const result = result: {
- switch (try self.genRoundLibcall(ty, .{ .air_ref = un_op }, mode)) {
- .none => {},
- else => |dst_mcv| break :result dst_mcv,
+ else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {f}", .{err_union}),
}
-
- const src_mcv = try self.resolveInst(un_op);
- const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
- src_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?;
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
- try self.genRound(ty, dst_reg, src_mcv, mode);
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ un_op, .none, .none });
-}
-
-fn getRoundTag(self: *CodeGen, ty: Type) ?Mir.Inst.FixedTag {
- const pt = self.pt;
- const zcu = pt.zcu;
- return if (self.hasFeature(.sse4_1)) switch (ty.zigTypeTag(zcu)) {
- .float => switch (ty.floatBits(self.target)) {
- 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
- 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (ty.vectorLen(zcu)) {
- 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
- 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
- 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
- else => null,
- },
- 64 => switch (ty.vectorLen(zcu)) {
- 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
- 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
- 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => null,
- },
- else => unreachable,
- } else null;
-}
-
-fn genRoundLibcall(self: *CodeGen, ty: Type, src_mcv: MCValue, mode: bits.RoundMode) !MCValue {
- const pt = self.pt;
- const zcu = pt.zcu;
- if (self.getRoundTag(ty)) |_| return .none;
-
- if (ty.zigTypeTag(zcu) != .float)
- return self.fail("TODO implement genRound for {f}", .{ty.fmt(pt)});
-
- var sym_buf: ["__trunc?".len]u8 = undefined;
- return try self.genCall(.{ .extern_func = .{
- .return_type = ty.toIntern(),
- .param_types = &.{ty.toIntern()},
- .sym = std.fmt.bufPrint(&sym_buf, "{s}{s}{s}", .{
- floatLibcAbiPrefix(ty),
- switch (mode.direction) {
- .down => "floor",
- .up => "ceil",
- .zero => "trunc",
- else => unreachable,
- },
- floatLibcAbiSuffix(ty),
- }) catch unreachable,
- } }, &.{ty}, &.{src_mcv}, .{});
-}
-
-fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: bits.RoundMode) !void {
- const pt = self.pt;
- const mir_tag = self.getRoundTag(ty) orelse {
- const result = try self.genRoundLibcall(ty, src_mcv, mode);
- return self.genSetReg(dst_reg, ty, result, .{});
};
- const abi_size: u32 = @intCast(ty.abiSize(pt.zcu));
- const dst_alias = registerAlias(dst_reg, abi_size);
- switch (mir_tag[0]) {
- .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
- mir_tag,
- dst_alias,
- dst_alias,
- try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
- mode.imm(),
- ) else try self.asmRegisterRegisterRegisterImmediate(
- mir_tag,
- dst_alias,
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv), abi_size),
- mode.imm(),
- ),
- else => if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
- mir_tag,
- dst_alias,
- try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
- mode.imm(),
- ) else try self.asmRegisterRegisterImmediate(
- mir_tag,
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv), abi_size),
- mode.imm(),
- ),
- }
-}
-
-fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const ty = self.typeOf(ty_op.operand);
-
- const result: MCValue = result: {
- const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) {
- else => null,
- .int => switch (ty.abiSize(zcu)) {
- 0 => unreachable,
- 1...8 => {
- try self.spillEflagsIfOccupied();
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
-
- try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
-
- const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2);
- switch (src_mcv) {
- .register => |val_reg| try self.asmCmovccRegisterRegister(
- .l,
- registerAlias(dst_mcv.register, cmov_abi_size),
- registerAlias(val_reg, cmov_abi_size),
- ),
- .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
- .l,
- registerAlias(dst_mcv.register, cmov_abi_size),
- try src_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }),
- ),
- else => {
- const val_reg = try self.copyToTmpRegister(ty, src_mcv);
- try self.asmCmovccRegisterRegister(
- .l,
- registerAlias(dst_mcv.register, cmov_abi_size),
- registerAlias(val_reg, cmov_abi_size),
- );
- },
- }
- break :result dst_mcv;
- },
- 9...16 => {
- try self.spillEflagsIfOccupied();
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_mcv = if (src_mcv == .register_pair and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
- const dst_regs = try self.register_manager.allocRegs(
- 2,
- .{ inst, inst },
- abi.RegisterClass.gp,
- );
- const dst_mcv: MCValue = .{ .register_pair = dst_regs };
- const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
- defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
-
- try self.genCopy(ty, dst_mcv, src_mcv, .{});
- break :dst dst_mcv;
- };
- const dst_regs = dst_mcv.register_pair;
- const dst_locks = self.register_manager.lockRegs(2, dst_regs);
- defer for (dst_locks) |dst_lock| if (dst_lock) |lock|
- self.register_manager.unlockReg(lock);
-
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
- try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63));
- try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[0], tmp_reg);
- try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[1], tmp_reg);
- try self.asmRegisterRegister(.{ ._, .sub }, dst_regs[0], tmp_reg);
- try self.asmRegisterRegister(.{ ._, .sbb }, dst_regs[1], tmp_reg);
-
- break :result dst_mcv;
- },
- else => {
- const abi_size: u31 = @intCast(ty.abiSize(zcu));
- const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable;
-
- const tmp_regs =
- try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp);
- const tmp_locks = self.register_manager.lockRegsAssumeUnused(3, tmp_regs);
- defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
-
- try self.spillEflagsIfOccupied();
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- try self.allocRegOrMem(inst, false);
-
- try self.asmMemoryImmediate(
- .{ ._, .cmp },
- try dst_mcv.address().offset((limb_len - 1) * 8).deref().mem(self, .{ .size = .qword }),
- .u(0),
- );
- const positive = try self.asmJccReloc(.ns, undefined);
-
- try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[0].to32(), tmp_regs[0].to32());
- try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[1].to8(), tmp_regs[1].to8());
-
- const neg_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len);
- try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[2].to32(), tmp_regs[2].to32());
- try self.asmRegisterImmediate(.{ ._r, .sh }, tmp_regs[1].to8(), .u(1));
- try self.asmRegisterMemory(.{ ._, .sbb }, tmp_regs[2].to64(), .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = tmp_regs[0].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off,
- } },
- });
- try self.asmSetccRegister(.c, tmp_regs[1].to8());
- try self.asmMemoryRegister(.{ ._, .mov }, .{
- .base = .{ .frame = dst_mcv.load_frame.index },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = tmp_regs[0].to64(),
- .scale = .@"8",
- .disp = dst_mcv.load_frame.off,
- } },
- }, tmp_regs[2].to64());
-
- if (self.hasFeature(.slow_incdec)) {
- try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), .u(1));
- } else {
- try self.asmRegister(.{ ._c, .in }, tmp_regs[0].to32());
- }
- try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), .u(limb_len));
- _ = try self.asmJccReloc(.b, neg_loop);
-
- self.performReloc(positive);
- break :result dst_mcv;
- },
- },
- .float => return self.floatSign(inst, .abs, ty_op.operand, ty),
- .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
- else => null,
- .int => switch (ty.childType(zcu).intInfo(zcu).bits) {
- else => null,
- 8 => switch (ty.vectorLen(zcu)) {
- else => null,
- 1...16 => if (self.hasFeature(.avx))
- .{ .vp_b, .abs }
- else if (self.hasFeature(.ssse3))
- .{ .p_b, .abs }
- else
- null,
- 17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null,
- },
- 16 => switch (ty.vectorLen(zcu)) {
- else => null,
- 1...8 => if (self.hasFeature(.avx))
- .{ .vp_w, .abs }
- else if (self.hasFeature(.ssse3))
- .{ .p_w, .abs }
- else
- null,
- 9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null,
- },
- 32 => switch (ty.vectorLen(zcu)) {
- else => null,
- 1...4 => if (self.hasFeature(.avx))
- .{ .vp_d, .abs }
- else if (self.hasFeature(.ssse3))
- .{ .p_d, .abs }
- else
- null,
- 5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null,
- },
- },
- .float => return self.floatSign(inst, .abs, ty_op.operand, ty),
- },
- }) orelse return self.fail("TODO implement airAbs for {f}", .{ty.fmt(pt)});
- const abi_size: u32 = @intCast(ty.abiSize(zcu));
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv.getReg().?
- else
- try self.register_manager.allocReg(inst, self.regSetForType(ty));
- const dst_alias = registerAlias(dst_reg, abi_size);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- mir_tag,
- dst_alias,
- try src_mcv.mem(self, .{ .size = self.memSize(ty) }),
- ) else try self.asmRegisterRegister(
- mir_tag,
- dst_alias,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv), abi_size),
- );
- break :result .{ .register = dst_reg };
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+ return result;
}
-fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void {
+fn genUnwrapErrUnionPayloadPtrMir(
+ self: *CodeGen,
+ maybe_inst: ?Air.Inst.Index,
+ ptr_ty: Type,
+ ptr_mcv: MCValue,
+) !MCValue {
const pt = self.pt;
const zcu = pt.zcu;
- const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
- const ty = self.typeOf(un_op);
- const abi_size: u32 = @intCast(ty.abiSize(zcu));
+ const err_union_ty = ptr_ty.childType(zcu);
+ const payload_ty = err_union_ty.errorUnionPayload(zcu);
const result: MCValue = result: {
- switch (ty.zigTypeTag(zcu)) {
- .float => {
- const float_bits = ty.floatBits(self.target);
- if (switch (float_bits) {
- 16 => !self.hasFeature(.f16c),
- 32, 64 => false,
- 80, 128 => true,
- else => unreachable,
- }) {
- var sym_buf: ["__sqrt?".len]u8 = undefined;
- break :result try self.genCall(.{ .extern_func = .{
- .return_type = ty.toIntern(),
- .param_types = &.{ty.toIntern()},
- .sym = std.fmt.bufPrint(&sym_buf, "{s}sqrt{s}", .{
- floatLibcAbiPrefix(ty),
- floatLibcAbiSuffix(ty),
- }) catch unreachable,
- } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{});
- }
- },
- else => {},
- }
-
- const src_mcv = try self.resolveInst(un_op);
- const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
- src_mcv
+ const payload_off = codegen.errUnionPayloadOffset(payload_ty, zcu);
+ const result_mcv: MCValue = if (maybe_inst) |inst|
+ try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv)
else
- try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
- const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) {
- .float => switch (ty.floatBits(self.target)) {
- 16 => {
- assert(self.hasFeature(.f16c));
- const mat_src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv);
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
- try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{}),
- );
- break :result dst_mcv;
- },
- 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
- 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
- else => unreachable,
- },
- .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (ty.childType(zcu).floatBits(self.target)) {
- 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen(zcu)) {
- 1 => {
- try self.asmRegisterRegister(
- .{ .v_ps, .cvtph2 },
- dst_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegisterRegister(
- .{ .v_ss, .sqrt },
- dst_reg,
- dst_reg,
- dst_reg,
- );
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{}),
- );
- break :result dst_mcv;
- },
- 2...8 => {
- const wide_reg = registerAlias(dst_reg, abi_size * 2);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_ps, .cvtph2 },
- wide_reg,
- try src_mcv.mem(self, .{ .size = .fromSize(
- @intCast(@divExact(wide_reg.bitSize(), 16)),
- ) }),
- ) else try self.asmRegisterRegister(
- .{ .v_ps, .cvtph2 },
- wide_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- wide_reg,
- bits.RoundMode.imm(.{}),
- );
- break :result dst_mcv;
- },
- else => null,
- } else null,
- 32 => switch (ty.vectorLen(zcu)) {
- 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
- 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
- 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
- else => null,
- },
- 64 => switch (ty.vectorLen(zcu)) {
- 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
- 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
- 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
- else => null,
- },
- 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }) orelse return self.fail("TODO implement airSqrt for {f}", .{ty.fmt(pt)});
- switch (mir_tag[0]) {
- .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
- mir_tag,
- dst_reg,
- dst_reg,
- try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
- ) else try self.asmRegisterRegisterRegister(
- mir_tag,
- dst_reg,
- dst_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv), abi_size),
- ),
- else => if (src_mcv.isBase()) try self.asmRegisterMemory(
- mir_tag,
- dst_reg,
- try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }),
- ) else try self.asmRegisterRegister(
- mir_tag,
- dst_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv), abi_size),
- ),
- }
- break :result dst_mcv;
+ .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) };
+ try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off });
+ break :result result_mcv;
};
- return self.finishAir(inst, result, .{ un_op, .none, .none });
-}
-fn airUnaryMath(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
- const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
- const ty = self.typeOf(un_op);
- var sym_buf: ["__round?".len]u8 = undefined;
- const result = try self.genCall(.{ .extern_func = .{
- .return_type = ty.toIntern(),
- .param_types = &.{ty.toIntern()},
- .sym = std.fmt.bufPrint(&sym_buf, "{s}{s}{s}", .{
- floatLibcAbiPrefix(ty),
- switch (tag) {
- .sin,
- .cos,
- .tan,
- .exp,
- .exp2,
- .log,
- .log2,
- .log10,
- .round,
- => @tagName(tag),
- else => unreachable,
- },
- floatLibcAbiSuffix(ty),
- }) catch unreachable,
- } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{});
- return self.finishAir(inst, result, .{ un_op, .none, .none });
+ return result;
}
fn reuseOperand(
@@ -175573,95 +174524,6 @@ fn store(
}
}
-fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
- const pt = self.pt;
- const zcu = pt.zcu;
- const src_ty = self.typeOf(src_air);
- if (src_ty.zigTypeTag(zcu) == .vector)
- return self.fail("TODO implement genUnOp for {f}", .{src_ty.fmt(pt)});
-
- var src_mcv = try self.resolveInst(src_air);
- switch (src_mcv) {
- .eflags => |cc| switch (tag) {
- .not => {
- if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv))
- return .{ .eflags = cc.negate() };
- try self.spillEflagsIfOccupied();
- src_mcv = try self.resolveInst(src_air);
- },
- else => {},
- },
- else => {},
- }
-
- const src_lock = switch (src_mcv) {
- .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
- else => null,
- };
- defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- const dst_mcv: MCValue = dst: {
- if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv;
-
- const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true);
- try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
- break :dst dst_mcv;
- };
- const dst_lock = switch (dst_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const abi_size: u16 = @intCast(src_ty.abiSize(zcu));
- switch (tag) {
- .not => {
- const limb_abi_size: u16 = @min(abi_size, 8);
- const int_info: InternPool.Key.IntType = if (src_ty.ip_index == .bool_type)
- .{ .signedness = .unsigned, .bits = 1 }
- else
- src_ty.intInfo(zcu);
- var byte_off: i32 = 0;
- while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) {
- const limb_bits: u16 = @intCast(@min(switch (int_info.signedness) {
- .signed => abi_size * 8,
- .unsigned => int_info.bits,
- } - byte_off * 8, limb_abi_size * 8));
- const limb_ty = try pt.intType(int_info.signedness, limb_bits);
- const limb_mcv = switch (byte_off) {
- 0 => dst_mcv,
- else => dst_mcv.address().offset(byte_off).deref(),
- };
-
- if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) {
- const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_bits);
- try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask });
- } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv);
- }
- },
- .neg => {
- try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv);
- const bit_size = src_ty.intInfo(zcu).bits;
- if (abi_size * 8 > bit_size) {
- if (dst_mcv.isRegister()) {
- try self.truncateRegister(src_ty, dst_mcv.getReg().?);
- } else {
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
- try self.genSetReg(tmp_reg, .usize, hi_mcv, .{});
- try self.truncateRegister(src_ty, tmp_reg);
- try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{});
- }
- }
- },
- else => unreachable,
- }
- return dst_mcv;
-}
-
fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void {
const pt = self.pt;
const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu));
@@ -176346,1679 +175208,6 @@ fn genShiftBinOpMir(
});
}
-fn genBinOp(
- self: *CodeGen,
- maybe_inst: ?Air.Inst.Index,
- air_tag: Air.Inst.Tag,
- lhs_air: Air.Inst.Ref,
- rhs_air: Air.Inst.Ref,
-) !MCValue {
- const pt = self.pt;
- const zcu = pt.zcu;
- const lhs_ty = self.typeOf(lhs_air);
- const rhs_ty = self.typeOf(rhs_air);
- const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
-
- if (lhs_ty.isRuntimeFloat()) libcall: {
- const float_bits = lhs_ty.floatBits(self.target);
- const type_needs_libcall = switch (float_bits) {
- 16 => !self.hasFeature(.f16c),
- 32, 64 => false,
- 80, 128 => true,
- else => unreachable,
- };
- switch (air_tag) {
- .rem, .mod => {},
- else => if (!type_needs_libcall) break :libcall,
- }
- var sym_buf: ["__mod?f3".len]u8 = undefined;
- const sym = switch (air_tag) {
- .add,
- .sub,
- .mul,
- .div_float,
- .div_trunc,
- .div_floor,
- .div_exact,
- => std.fmt.bufPrint(&sym_buf, "__{s}{c}f3", .{
- @tagName(air_tag)[0..3],
- floatCompilerRtAbiName(float_bits),
- }),
- .rem, .mod, .min, .max => std.fmt.bufPrint(&sym_buf, "{s}f{s}{s}", .{
- floatLibcAbiPrefix(lhs_ty),
- switch (air_tag) {
- .rem, .mod => "mod",
- .min => "min",
- .max => "max",
- else => unreachable,
- },
- floatLibcAbiSuffix(lhs_ty),
- }),
- else => return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- }),
- } catch unreachable;
- const result = try self.genCall(.{ .extern_func = .{
- .return_type = lhs_ty.toIntern(),
- .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
- .sym = sym,
- } }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } }, .{});
- return switch (air_tag) {
- .mod => result: {
- const adjusted: MCValue = if (type_needs_libcall) adjusted: {
- var add_sym_buf: ["__add?f3".len]u8 = undefined;
- break :adjusted try self.genCall(.{ .extern_func = .{
- .return_type = lhs_ty.toIntern(),
- .param_types = &.{
- lhs_ty.toIntern(),
- rhs_ty.toIntern(),
- },
- .sym = std.fmt.bufPrint(&add_sym_buf, "__add{c}f3", .{
- floatCompilerRtAbiName(float_bits),
- }) catch unreachable,
- } }, &.{ lhs_ty, rhs_ty }, &.{ result, .{ .air_ref = rhs_air } }, .{});
- } else switch (float_bits) {
- 16, 32, 64 => adjusted: {
- const dst_reg = switch (result) {
- .register => |reg| reg,
- else => if (maybe_inst) |inst|
- (try self.copyToRegisterWithInstTracking(inst, lhs_ty, result)).register
- else
- try self.copyToTmpRegister(lhs_ty, result),
- };
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const rhs_mcv = try self.resolveInst(rhs_air);
- const src_mcv: MCValue = if (float_bits == 16) src: {
- assert(self.hasFeature(.f16c));
- const tmp_reg = (try self.register_manager.allocReg(
- null,
- abi.RegisterClass.sse,
- )).to128();
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
- .{ .vp_w, .insr },
- dst_reg,
- dst_reg,
- try rhs_mcv.mem(self, .{ .size = .word }),
- .u(1),
- ) else try self.asmRegisterRegisterRegister(
- .{ .vp_, .unpcklwd },
- dst_reg,
- dst_reg,
- (if (rhs_mcv.isRegister())
- rhs_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, rhs_mcv)).to128(),
- );
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
- try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
- break :src .{ .register = tmp_reg };
- } else rhs_mcv;
-
- if (self.hasFeature(.avx)) {
- const mir_tag: Mir.Inst.FixedTag = switch (float_bits) {
- 16, 32 => .{ .v_ss, .add },
- 64 => .{ .v_sd, .add },
- else => unreachable,
- };
- if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
- mir_tag,
- dst_reg,
- dst_reg,
- try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }),
- ) else try self.asmRegisterRegisterRegister(
- mir_tag,
- dst_reg,
- dst_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
- );
- } else {
- const mir_tag: Mir.Inst.FixedTag = switch (float_bits) {
- 32 => .{ ._ss, .add },
- 64 => .{ ._sd, .add },
- else => unreachable,
- };
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- mir_tag,
- dst_reg,
- try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }),
- ) else try self.asmRegisterRegister(
- mir_tag,
- dst_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
- );
- }
-
- if (float_bits == 16) try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{}),
- );
- break :adjusted .{ .register = dst_reg };
- },
- 80, 128 => return self.fail("TODO implement genBinOp for {s} of {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- }),
- else => unreachable,
- };
- break :result try self.genCall(.{ .extern_func = .{
- .return_type = lhs_ty.toIntern(),
- .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
- .sym = sym,
- } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } }, .{});
- },
- .div_trunc, .div_floor => try self.genRoundLibcall(lhs_ty, result, .{
- .direction = switch (air_tag) {
- .div_trunc => .zero,
- .div_floor => .down,
- else => unreachable,
- },
- .precision = .inexact,
- }),
- else => result,
- };
- }
-
- const sse_op = switch (lhs_ty.zigTypeTag(zcu)) {
- else => false,
- .float => true,
- .vector => switch (lhs_ty.childType(zcu).toIntern()) {
- .bool_type, .u1_type => false,
- else => true,
- },
- };
- if (sse_op and ((lhs_ty.scalarType(zcu).isRuntimeFloat() and
- lhs_ty.scalarType(zcu).floatBits(self.target) == 80) or
- lhs_ty.abiSize(zcu) > self.vectorSize(.float)))
- return self.fail("TODO implement genBinOp for {s} {f}", .{ @tagName(air_tag), lhs_ty.fmt(pt) });
-
- const maybe_mask_reg = switch (air_tag) {
- else => null,
- .rem, .mod => unreachable,
- .max, .min => if (lhs_ty.scalarType(zcu).isRuntimeFloat()) registerAlias(
- if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
- try self.register_manager.getKnownReg(.xmm0, null);
- break :mask .xmm0;
- } else try self.register_manager.allocReg(null, abi.RegisterClass.sse),
- abi_size,
- ) else null,
- };
- const mask_lock =
- if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
- defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
-
- const ordered_air: [2]Air.Inst.Ref = if (lhs_ty.isVector(zcu) and
- switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .bool => false,
- .int => switch (air_tag) {
- .cmp_lt, .cmp_gte => true,
- else => false,
- },
- .float => switch (air_tag) {
- .cmp_gte, .cmp_gt => true,
- else => false,
- },
- else => unreachable,
- }) .{ rhs_air, lhs_air } else .{ lhs_air, rhs_air };
-
- if (lhs_ty.isAbiInt(zcu)) for (ordered_air) |op_air| {
- switch (try self.resolveInst(op_air)) {
- .register => |op_reg| switch (op_reg.class()) {
- .sse => try self.register_manager.getReg(op_reg, null),
- else => {},
- },
- else => {},
- }
- };
-
- const lhs_mcv = try self.resolveInst(ordered_air[0]);
- var rhs_mcv = try self.resolveInst(ordered_air[1]);
- switch (lhs_mcv) {
- .immediate => |imm| switch (imm) {
- 0 => switch (air_tag) {
- .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air[1]),
- else => {},
- },
- else => {},
- },
- else => {},
- }
-
- const is_commutative = switch (air_tag) {
- .add,
- .add_wrap,
- .mul,
- .bool_or,
- .bit_or,
- .bool_and,
- .bit_and,
- .xor,
- .min,
- .max,
- .cmp_eq,
- .cmp_neq,
- => true,
-
- else => false,
- };
-
- const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
- .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
- .register_pair => |lhs_regs| locks: {
- const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
- break :locks .{ locks[0], locks[1] };
- },
- else => @splat(null),
- };
- defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
- .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
- .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
- else => @splat(null),
- };
- defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- var flipped = false;
- var copied_to_dst = true;
- const dst_mcv: MCValue = dst: {
- const tracked_inst = switch (air_tag) {
- else => maybe_inst,
- .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null,
- };
- if (maybe_inst) |inst| {
- if ((!sse_op or lhs_mcv.isRegister()) and
- self.reuseOperandAdvanced(inst, ordered_air[0], 0, lhs_mcv, tracked_inst))
- break :dst lhs_mcv;
- if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and
- self.reuseOperandAdvanced(inst, ordered_air[1], 1, rhs_mcv, tracked_inst))
- {
- flipped = true;
- break :dst rhs_mcv;
- }
- }
- const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, tracked_inst, true);
- if (sse_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
- copied_to_dst = false
- else
- try self.genCopy(lhs_ty, dst_mcv, lhs_mcv, .{});
- rhs_mcv = try self.resolveInst(ordered_air[1]);
- break :dst dst_mcv;
- };
- const dst_locks: [2]?RegisterLock = switch (dst_mcv) {
- .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null },
- .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs),
- else => @splat(null),
- };
- defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv;
- const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg|
- if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and
- self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: {
- try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv, .{});
- break :src .{ .register = mask_reg };
- }
- else
- unmat_src_mcv;
- const src_locks: [2]?RegisterLock = switch (src_mcv) {
- .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null },
- .register_pair => |src_regs| self.register_manager.lockRegs(2, src_regs),
- else => @splat(null),
- };
- defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- if (!sse_op) {
- switch (air_tag) {
- .add,
- .add_wrap,
- => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv),
-
- .sub,
- .sub_wrap,
- => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv),
-
- .ptr_add,
- .ptr_sub,
- => {
- const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
- const tmp_mcv = MCValue{ .register = tmp_reg };
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const elem_size = lhs_ty.elemType2(zcu).abiSize(zcu);
- try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
- try self.genBinOpMir(
- switch (air_tag) {
- .ptr_add => .{ ._, .add },
- .ptr_sub => .{ ._, .sub },
- else => unreachable,
- },
- lhs_ty,
- dst_mcv,
- tmp_mcv,
- );
- },
-
- .bool_or,
- .bit_or,
- => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv),
-
- .bool_and,
- .bit_and,
- => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv),
-
- .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv),
-
- .min,
- .max,
- => {
- const resolved_src_mcv = switch (src_mcv) {
- else => src_mcv,
- .air_ref => |src_ref| try self.resolveInst(src_ref),
- };
-
- if (abi_size > 8) {
- const dst_regs = switch (dst_mcv) {
- .register_pair => |dst_regs| dst_regs,
- else => dst: {
- const dst_regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp);
- const dst_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
- defer for (dst_regs_locks) |lock| self.register_manager.unlockReg(lock);
-
- try self.genCopy(lhs_ty, .{ .register_pair = dst_regs }, dst_mcv, .{});
- break :dst dst_regs;
- },
- };
- const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs);
- defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
- self.register_manager.unlockReg(lock);
-
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- const signed = lhs_ty.isSignedInt(zcu);
- const cc: Condition = switch (air_tag) {
- .min => if (signed) .nl else .nb,
- .max => if (signed) .nge else .nae,
- else => unreachable,
- };
-
- try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
- if (src_mcv.isBase()) {
- try self.asmRegisterMemory(
- .{ ._, .cmp },
- dst_regs[0],
- try src_mcv.mem(self, .{ .size = .qword }),
- );
- try self.asmRegisterMemory(
- .{ ._, .sbb },
- tmp_reg,
- try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- );
- try self.asmCmovccRegisterMemory(
- cc,
- dst_regs[0],
- try src_mcv.mem(self, .{ .size = .qword }),
- );
- try self.asmCmovccRegisterMemory(
- cc,
- dst_regs[1],
- try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }),
- );
- } else {
- try self.asmRegisterRegister(
- .{ ._, .cmp },
- dst_regs[0],
- src_mcv.register_pair[0],
- );
- try self.asmRegisterRegister(
- .{ ._, .sbb },
- tmp_reg,
- src_mcv.register_pair[1],
- );
- try self.asmCmovccRegisterRegister(cc, dst_regs[0], src_mcv.register_pair[0]);
- try self.asmCmovccRegisterRegister(cc, dst_regs[1], src_mcv.register_pair[1]);
- }
- try self.genCopy(lhs_ty, dst_mcv, .{ .register_pair = dst_regs }, .{});
- } else {
- const mat_src_mcv: MCValue = if (switch (resolved_src_mcv) {
- .immediate,
- .eflags,
- .register_offset,
- .lea_frame,
- .load_nav,
- .lea_nav,
- .load_uav,
- .lea_uav,
- .load_lazy_sym,
- .lea_lazy_sym,
- .load_extern_func,
- .lea_extern_func,
- => true,
- .memory => |addr| std.math.cast(i32, @as(i64, @bitCast(addr))) == null,
- else => false,
- .register_pair,
- .register_overflow,
- => unreachable,
- })
- .{ .register = try self.copyToTmpRegister(rhs_ty, resolved_src_mcv) }
- else
- resolved_src_mcv;
- const mat_mcv_lock = switch (mat_src_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- else => null,
- };
- defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock);
-
- try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv);
-
- const int_info = lhs_ty.intInfo(zcu);
- const cc: Condition = switch (int_info.signedness) {
- .unsigned => switch (air_tag) {
- .min => .a,
- .max => .b,
- else => unreachable,
- },
- .signed => switch (air_tag) {
- .min => .g,
- .max => .l,
- else => unreachable,
- },
- };
-
- const cmov_abi_size = @max(@as(u32, @intCast(lhs_ty.abiSize(zcu))), 2);
- const tmp_reg = switch (dst_mcv) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(lhs_ty, dst_mcv),
- };
- const tmp_lock = self.register_manager.lockReg(tmp_reg);
- defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
- switch (mat_src_mcv) {
- .none,
- .unreach,
- .dead,
- .undef,
- .immediate,
- .eflags,
- .register_pair,
- .register_triple,
- .register_quadruple,
- .register_offset,
- .register_overflow,
- .register_mask,
- .indirect_load_frame,
- .lea_frame,
- .load_nav,
- .lea_nav,
- .load_uav,
- .lea_uav,
- .load_lazy_sym,
- .lea_lazy_sym,
- .load_extern_func,
- .lea_extern_func,
- .elementwise_args,
- .reserved_frame,
- .air_ref,
- => unreachable,
- .register => |src_reg| try self.asmCmovccRegisterRegister(
- cc,
- registerAlias(tmp_reg, cmov_abi_size),
- registerAlias(src_reg, cmov_abi_size),
- ),
- .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
- cc,
- registerAlias(tmp_reg, cmov_abi_size),
- switch (mat_src_mcv) {
- .memory => |addr| .{
- .base = .{ .reg = .ds },
- .mod = .{ .rm = .{
- .size = .fromSize(cmov_abi_size),
- .disp = @intCast(@as(i64, @bitCast(addr))),
- } },
- },
- .indirect => |reg_off| .{
- .base = .{ .reg = reg_off.reg },
- .mod = .{ .rm = .{
- .size = .fromSize(cmov_abi_size),
- .disp = reg_off.off,
- } },
- },
- .load_frame => |frame_addr| .{
- .base = .{ .frame = frame_addr.index },
- .mod = .{ .rm = .{
- .size = .fromSize(cmov_abi_size),
- .disp = frame_addr.off,
- } },
- },
- else => unreachable,
- },
- ),
- }
- try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }, .{});
- }
- },
-
- .cmp_eq, .cmp_neq => {
- assert(lhs_ty.isVector(zcu) and lhs_ty.childType(zcu).toIntern() == .bool_type);
- try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv);
- switch (air_tag) {
- .cmp_eq => try self.genUnOpMir(.{ ._, .not }, lhs_ty, dst_mcv),
- .cmp_neq => {},
- else => unreachable,
- }
- },
-
- else => return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- }),
- }
- return dst_mcv;
- }
-
- const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
- const mir_tag = @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
- else => unreachable,
- .float => switch (lhs_ty.floatBits(self.target)) {
- 16 => {
- assert(self.hasFeature(.f16c));
- const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
-
- const tmp_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
- .{ .vp_w, .insr },
- dst_reg,
- lhs_reg,
- try src_mcv.mem(self, .{ .size = .word }),
- .u(1),
- ) else try self.asmRegisterRegisterRegister(
- .{ .vp_, .unpcklwd },
- dst_reg,
- lhs_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
- try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
- try self.asmRegisterRegisterRegister(
- switch (air_tag) {
- .add => .{ .v_ss, .add },
- .sub => .{ .v_ss, .sub },
- .mul => .{ .v_ss, .mul },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
- .max => .{ .v_ss, .max },
- .min => .{ .v_ss, .min },
- else => unreachable,
- },
- dst_reg,
- dst_reg,
- tmp_reg,
- );
- switch (air_tag) {
- .div_trunc, .div_floor => try self.asmRegisterRegisterRegisterImmediate(
- .{ .v_ss, .round },
- dst_reg,
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{
- .direction = switch (air_tag) {
- .div_trunc => .zero,
- .div_floor => .down,
- else => unreachable,
- },
- .precision = .inexact,
- }),
- ),
- else => {},
- }
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{}),
- );
- return dst_mcv;
- },
- 32 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
- .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
- .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
- .div_float,
- .div_trunc,
- .div_floor,
- .div_exact,
- => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
- .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
- .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
- else => unreachable,
- },
- 64 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
- .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
- .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
- .div_float,
- .div_trunc,
- .div_floor,
- .div_exact,
- => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
- .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
- .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
- else => unreachable,
- },
- 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- else => null,
- .int => switch (lhs_ty.childType(zcu).intInfo(zcu).bits) {
- 8 => switch (lhs_ty.vectorLen(zcu)) {
- 1...16 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add },
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
- .bit_and => if (self.hasFeature(.avx))
- .{ .vp_, .@"and" }
- else
- .{ .p_, .@"and" },
- .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
- .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
- .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_b, .mins }
- else if (self.hasFeature(.sse4_1))
- .{ .p_b, .mins }
- else
- null,
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_b, .minu }
- else if (self.hasFeature(.sse4_1))
- .{ .p_b, .minu }
- else
- null,
- },
- .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_b, .maxs }
- else if (self.hasFeature(.sse4_1))
- .{ .p_b, .maxs }
- else
- null,
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_b, .maxu }
- else if (self.hasFeature(.sse4_1))
- .{ .p_b, .maxu }
- else
- null,
- },
- .cmp_lt,
- .cmp_lte,
- .cmp_gte,
- .cmp_gt,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_b, .cmpgt }
- else
- .{ .p_b, .cmpgt },
- .unsigned => null,
- },
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else .{ .p_b, .cmpeq },
- else => null,
- },
- 17...32 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null,
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null,
- .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
- .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
- .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
- .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
- .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
- },
- .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
- .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
- },
- .cmp_lt,
- .cmp_lte,
- .cmp_gte,
- .cmp_gt,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx)) .{ .vp_b, .cmpgt } else null,
- .unsigned => null,
- },
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else null,
- else => null,
- },
- else => null,
- },
- 16 => switch (lhs_ty.vectorLen(zcu)) {
- 1...8 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add },
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub },
- .mul,
- .mul_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull },
- .bit_and => if (self.hasFeature(.avx))
- .{ .vp_, .@"and" }
- else
- .{ .p_, .@"and" },
- .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
- .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
- .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_w, .mins }
- else
- .{ .p_w, .mins },
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_w, .minu }
- else
- .{ .p_w, .minu },
- },
- .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_w, .maxs }
- else
- .{ .p_w, .maxs },
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_w, .maxu }
- else
- .{ .p_w, .maxu },
- },
- .cmp_lt,
- .cmp_lte,
- .cmp_gte,
- .cmp_gt,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_w, .cmpgt }
- else
- .{ .p_w, .cmpgt },
- .unsigned => null,
- },
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else .{ .p_w, .cmpeq },
- else => null,
- },
- 9...16 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null,
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null,
- .mul,
- .mul_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null,
- .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
- .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
- .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
- .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
- .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
- },
- .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
- .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
- },
- .cmp_lt,
- .cmp_lte,
- .cmp_gte,
- .cmp_gt,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx)) .{ .vp_w, .cmpgt } else null,
- .unsigned => null,
- },
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else null,
- else => null,
- },
- else => null,
- },
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- 1...4 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add },
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub },
- .mul,
- .mul_wrap,
- => if (self.hasFeature(.avx))
- .{ .vp_d, .mull }
- else if (self.hasFeature(.sse4_1))
- .{ .p_d, .mull }
- else
- null,
- .bit_and => if (self.hasFeature(.avx))
- .{ .vp_, .@"and" }
- else
- .{ .p_, .@"and" },
- .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
- .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
- .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_d, .mins }
- else if (self.hasFeature(.sse4_1))
- .{ .p_d, .mins }
- else
- null,
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_d, .minu }
- else if (self.hasFeature(.sse4_1))
- .{ .p_d, .minu }
- else
- null,
- },
- .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_d, .maxs }
- else if (self.hasFeature(.sse4_1))
- .{ .p_d, .maxs }
- else
- null,
- .unsigned => if (self.hasFeature(.avx))
- .{ .vp_d, .maxu }
- else if (self.hasFeature(.sse4_1))
- .{ .p_d, .maxu }
- else
- null,
- },
- .cmp_lt,
- .cmp_lte,
- .cmp_gte,
- .cmp_gt,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_d, .cmpgt }
- else
- .{ .p_d, .cmpgt },
- .unsigned => null,
- },
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else .{ .p_d, .cmpeq },
- else => null,
- },
- 5...8 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null,
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null,
- .mul,
- .mul_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null,
- .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
- .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
- .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
- .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
- .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
- },
- .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
- .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
- },
- .cmp_lt,
- .cmp_lte,
- .cmp_gte,
- .cmp_gt,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
- .unsigned => null,
- },
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
- else => null,
- },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- 1...2 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add },
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
- .bit_and => if (self.hasFeature(.avx))
- .{ .vp_, .@"and" }
- else
- .{ .p_, .@"and" },
- .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
- .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
- .cmp_lt,
- .cmp_lte,
- .cmp_gte,
- .cmp_gt,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx))
- .{ .vp_q, .cmpgt }
- else if (self.hasFeature(.sse4_2))
- .{ .p_q, .cmpgt }
- else
- null,
- .unsigned => null,
- },
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx))
- .{ .vp_q, .cmpeq }
- else if (self.hasFeature(.sse4_1))
- .{ .p_q, .cmpeq }
- else
- null,
- else => null,
- },
- 3...4 => switch (air_tag) {
- .add,
- .add_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null,
- .sub,
- .sub_wrap,
- => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null,
- .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
- .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
- .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
- .cmp_eq,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
- .cmp_lt,
- .cmp_lte,
- .cmp_gt,
- .cmp_gte,
- => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) {
- .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
- .unsigned => null,
- },
- else => null,
- },
- else => null,
- },
- else => null,
- },
- .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) {
- 16 => tag: {
- assert(self.hasFeature(.f16c));
- const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
- switch (lhs_ty.vectorLen(zcu)) {
- 1 => {
- const tmp_reg =
- (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
- .{ .vp_w, .insr },
- dst_reg,
- lhs_reg,
- try src_mcv.mem(self, .{ .size = .word }),
- .u(1),
- ) else try self.asmRegisterRegisterRegister(
- .{ .vp_, .unpcklwd },
- dst_reg,
- lhs_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
- try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
- try self.asmRegisterRegisterRegister(
- switch (air_tag) {
- .add => .{ .v_ss, .add },
- .sub => .{ .v_ss, .sub },
- .mul => .{ .v_ss, .mul },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
- .max => .{ .v_ss, .max },
- .min => .{ .v_ss, .max },
- else => unreachable,
- },
- dst_reg,
- dst_reg,
- tmp_reg,
- );
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{}),
- );
- return dst_mcv;
- },
- 2 => {
- const tmp_reg = (try self.register_manager.allocReg(
- null,
- abi.RegisterClass.sse,
- )).to128();
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
- .{ .vp_d, .insr },
- dst_reg,
- lhs_reg,
- try src_mcv.mem(self, .{ .size = .dword }),
- .u(1),
- ) else try self.asmRegisterRegisterRegister(
- .{ .v_ps, .unpckl },
- dst_reg,
- lhs_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
- try self.asmRegisterRegisterRegister(
- .{ .v_ps, .movhl },
- tmp_reg,
- dst_reg,
- dst_reg,
- );
- try self.asmRegisterRegisterRegister(
- switch (air_tag) {
- .add => .{ .v_ps, .add },
- .sub => .{ .v_ps, .sub },
- .mul => .{ .v_ps, .mul },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
- .max => .{ .v_ps, .max },
- .min => .{ .v_ps, .max },
- else => unreachable,
- },
- dst_reg,
- dst_reg,
- tmp_reg,
- );
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{}),
- );
- return dst_mcv;
- },
- 3...4 => {
- const tmp_reg = (try self.register_manager.allocReg(
- null,
- abi.RegisterClass.sse,
- )).to128();
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, lhs_reg);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_ps, .cvtph2 },
- tmp_reg,
- try src_mcv.mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- .{ .v_ps, .cvtph2 },
- tmp_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegisterRegister(
- switch (air_tag) {
- .add => .{ .v_ps, .add },
- .sub => .{ .v_ps, .sub },
- .mul => .{ .v_ps, .mul },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
- .max => .{ .v_ps, .max },
- .min => .{ .v_ps, .max },
- else => unreachable,
- },
- dst_reg,
- dst_reg,
- tmp_reg,
- );
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg,
- bits.RoundMode.imm(.{}),
- );
- return dst_mcv;
- },
- 5...8 => {
- const tmp_reg = (try self.register_manager.allocReg(
- null,
- abi.RegisterClass.sse,
- )).to256();
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), lhs_reg);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_ps, .cvtph2 },
- tmp_reg,
- try src_mcv.mem(self, .{ .size = .xword }),
- ) else try self.asmRegisterRegister(
- .{ .v_ps, .cvtph2 },
- tmp_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegisterRegister(
- switch (air_tag) {
- .add => .{ .v_ps, .add },
- .sub => .{ .v_ps, .sub },
- .mul => .{ .v_ps, .mul },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
- .max => .{ .v_ps, .max },
- .min => .{ .v_ps, .max },
- else => unreachable,
- },
- dst_reg.to256(),
- dst_reg.to256(),
- tmp_reg,
- );
- try self.asmRegisterRegisterImmediate(
- .{ .v_, .cvtps2ph },
- dst_reg,
- dst_reg.to256(),
- bits.RoundMode.imm(.{}),
- );
- return dst_mcv;
- },
- else => break :tag null,
- }
- },
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- 1 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
- .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
- .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
- .div_float,
- .div_trunc,
- .div_floor,
- .div_exact,
- => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
- .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
- .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
- .cmp_lt,
- .cmp_lte,
- .cmp_eq,
- .cmp_gte,
- .cmp_gt,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .v_ss, .cmp } else .{ ._ss, .cmp },
- else => unreachable,
- },
- 2...4 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
- .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
- .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
- .div_float,
- .div_trunc,
- .div_floor,
- .div_exact,
- => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
- .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
- .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
- .cmp_lt,
- .cmp_lte,
- .cmp_eq,
- .cmp_gte,
- .cmp_gt,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .v_ps, .cmp } else .{ ._ps, .cmp },
- else => unreachable,
- },
- 5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
- .add => .{ .v_ps, .add },
- .sub => .{ .v_ps, .sub },
- .mul => .{ .v_ps, .mul },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
- .max => .{ .v_ps, .max },
- .min => .{ .v_ps, .min },
- .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp },
- else => unreachable,
- } else null,
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- 1 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
- .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
- .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
- .div_float,
- .div_trunc,
- .div_floor,
- .div_exact,
- => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
- .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
- .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
- .cmp_lt,
- .cmp_lte,
- .cmp_eq,
- .cmp_gte,
- .cmp_gt,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .v_sd, .cmp } else .{ ._sd, .cmp },
- else => unreachable,
- },
- 2 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
- .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
- .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
- .div_float,
- .div_trunc,
- .div_floor,
- .div_exact,
- => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
- .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
- .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
- .cmp_lt,
- .cmp_lte,
- .cmp_eq,
- .cmp_gte,
- .cmp_gt,
- .cmp_neq,
- => if (self.hasFeature(.avx)) .{ .v_pd, .cmp } else .{ ._pd, .cmp },
- else => unreachable,
- },
- 3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
- .add => .{ .v_pd, .add },
- .sub => .{ .v_pd, .sub },
- .mul => .{ .v_pd, .mul },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
- .max => .{ .v_pd, .max },
- .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp },
- .min => .{ .v_pd, .min },
- else => unreachable,
- } else null,
- else => null,
- },
- 80, 128 => null,
- else => unreachable,
- },
- },
- }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- });
-
- const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias(
- if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?,
- abi_size,
- ) else null;
- const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
- defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
-
- switch (mir_tag[1]) {
- else => if (self.hasFeature(.avx)) {
- const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
- if (src_mcv.isBase()) try self.asmRegisterRegisterMemory(
- mir_tag,
- dst_reg,
- lhs_reg,
- try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
- else => .fromSize(abi_size),
- .vector => dst_reg.size(),
- } }),
- ) else try self.asmRegisterRegisterRegister(
- mir_tag,
- dst_reg,
- lhs_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
- );
- } else {
- assert(copied_to_dst);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- mir_tag,
- dst_reg,
- try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
- else => .fromSize(abi_size),
- .vector => dst_reg.size(),
- } }),
- ) else try self.asmRegisterRegister(
- mir_tag,
- dst_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
- );
- },
- .cmp => {
- const imm: Immediate = .u(switch (air_tag) {
- .cmp_eq => 0,
- .cmp_lt, .cmp_gt => 1,
- .cmp_lte, .cmp_gte => 2,
- .cmp_neq => 4,
- else => unreachable,
- });
- if (self.hasFeature(.avx)) {
- const lhs_reg =
- if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
- if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
- mir_tag,
- dst_reg,
- lhs_reg,
- try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
- else => .fromSize(abi_size),
- .vector => dst_reg.size(),
- } }),
- imm,
- ) else try self.asmRegisterRegisterRegisterImmediate(
- mir_tag,
- dst_reg,
- lhs_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
- imm,
- );
- } else {
- assert(copied_to_dst);
- if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
- mir_tag,
- dst_reg,
- try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) {
- else => .fromSize(abi_size),
- .vector => dst_reg.size(),
- } }),
- imm,
- ) else try self.asmRegisterRegisterImmediate(
- mir_tag,
- dst_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
- imm,
- );
- }
- },
- }
-
- switch (air_tag) {
- .bit_and, .bit_or, .xor => {},
- .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) {
- const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size);
-
- try self.asmRegisterRegisterRegisterImmediate(
- @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
- .float => switch (lhs_ty.floatBits(self.target)) {
- 32 => .{ .v_ss, .cmp },
- 64 => .{ .v_sd, .cmp },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- 1 => .{ .v_ss, .cmp },
- 2...8 => .{ .v_ps, .cmp },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- 1 => .{ .v_sd, .cmp },
- 2...4 => .{ .v_pd, .cmp },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- }),
- mask_reg,
- rhs_copy_reg,
- rhs_copy_reg,
- bits.VexFloatPredicate.imm(.unord),
- );
- try self.asmRegisterRegisterRegisterRegister(
- @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
- .float => switch (lhs_ty.floatBits(self.target)) {
- 32 => .{ .v_ps, .blendv },
- 64 => .{ .v_pd, .blendv },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- 1...8 => .{ .v_ps, .blendv },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- 1...4 => .{ .v_pd, .blendv },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- }),
- dst_reg,
- dst_reg,
- lhs_copy_reg.?,
- mask_reg,
- );
- } else {
- const has_blend = self.hasFeature(.sse4_1);
- try self.asmRegisterRegisterImmediate(
- @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
- .float => switch (lhs_ty.floatBits(self.target)) {
- 32 => .{ ._ss, .cmp },
- 64 => .{ ._sd, .cmp },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- 1 => .{ ._ss, .cmp },
- 2...4 => .{ ._ps, .cmp },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- 1 => .{ ._sd, .cmp },
- 2 => .{ ._pd, .cmp },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- }),
- mask_reg,
- mask_reg,
- bits.SseFloatPredicate.imm(if (has_blend) .unord else .ord),
- );
- if (has_blend) try self.asmRegisterRegisterRegister(
- @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) {
- .float => switch (lhs_ty.floatBits(self.target)) {
- 32 => .{ ._ps, .blendv },
- 64 => .{ ._pd, .blendv },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- 1...4 => .{ ._ps, .blendv },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- 1...2 => .{ ._pd, .blendv },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- }),
- dst_reg,
- lhs_copy_reg.?,
- mask_reg,
- ) else {
- const mir_fixes = @as(?Mir.Inst.Fixes, switch (lhs_ty.zigTypeTag(zcu)) {
- .float => switch (lhs_ty.floatBits(self.target)) {
- 32 => ._ps,
- 64 => ._pd,
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (lhs_ty.vectorLen(zcu)) {
- 1...4 => ._ps,
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(zcu)) {
- 1...2 => ._pd,
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{
- @tagName(air_tag), lhs_ty.fmt(pt),
- });
- try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_reg, mask_reg);
- try self.asmRegisterRegister(.{ mir_fixes, .andn }, mask_reg, lhs_copy_reg.?);
- try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_reg, mask_reg);
- }
- },
- .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => {
- switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) {
- .int => switch (air_tag) {
- .cmp_lt,
- .cmp_eq,
- .cmp_gt,
- => {},
- .cmp_lte,
- .cmp_gte,
- .cmp_neq,
- => {
- const unsigned_ty = try lhs_ty.toUnsigned(pt);
- const not_mcv = try self.lowerValue(try unsigned_ty.maxInt(pt, unsigned_ty));
- const not_mem: Memory = if (not_mcv.isBase())
- try not_mcv.mem(self, .{ .size = .fromSize(abi_size) })
- else
- .{ .base = .{
- .reg = try self.copyToTmpRegister(.usize, not_mcv.address()),
- }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } } };
- switch (mir_tag[0]) {
- .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory(
- .{ .vp_, .xor },
- dst_reg,
- dst_reg,
- not_mem,
- ),
- .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory(
- .{ .p_, .xor },
- dst_reg,
- not_mem,
- ),
- else => unreachable,
- }
- },
- else => unreachable,
- },
- .float => {},
- else => unreachable,
- }
-
- const gp_reg = try self.register_manager.allocReg(maybe_inst, abi.RegisterClass.gp);
- const gp_lock = self.register_manager.lockRegAssumeUnused(gp_reg);
- defer self.register_manager.unlockReg(gp_lock);
-
- try self.asmRegisterRegister(switch (mir_tag[0]) {
- ._pd, ._sd, .p_q => .{ ._pd, .movmsk },
- ._ps, ._ss, .p_d => .{ ._ps, .movmsk },
- .p_b => .{ .p_b, .movmsk },
- .p_w => movmsk: {
- try self.asmRegisterRegister(.{ .p_b, .ackssw }, dst_reg, dst_reg);
- break :movmsk .{ .p_b, .movmsk };
- },
- .v_pd, .v_sd, .vp_q => .{ .v_pd, .movmsk },
- .v_ps, .v_ss, .vp_d => .{ .v_ps, .movmsk },
- .vp_b => .{ .vp_b, .movmsk },
- .vp_w => movmsk: {
- try self.asmRegisterRegisterRegister(
- .{ .vp_b, .ackssw },
- dst_reg,
- dst_reg,
- dst_reg,
- );
- break :movmsk .{ .vp_b, .movmsk };
- },
- else => unreachable,
- }, gp_reg.to32(), dst_reg);
- return .{ .register = gp_reg };
- },
- else => unreachable,
- }
-
- return dst_mcv;
-}
-
fn genBinOpMir(
self: *CodeGen,
mir_tag: Mir.Inst.FixedTag,
@@ -178472,168 +175661,6 @@ fn genBinOpMir(
}
}
-/// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
-/// Does not support byte-size operands.
-fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
- const pt = self.pt;
- const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu));
- try self.spillEflagsIfOccupied();
- switch (dst_mcv) {
- .none,
- .unreach,
- .dead,
- .undef,
- .immediate,
- .eflags,
- .register_offset,
- .register_overflow,
- .register_mask,
- .indirect_load_frame,
- .lea_frame,
- .lea_nav,
- .lea_uav,
- .lea_lazy_sym,
- .lea_extern_func,
- .elementwise_args,
- .reserved_frame,
- .air_ref,
- => unreachable, // unmodifiable destination
- .register => |dst_reg| {
- const alias_size = switch (abi_size) {
- 1 => 4,
- else => abi_size,
- };
- const dst_alias = registerAlias(dst_reg, alias_size);
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- switch (abi_size) {
- 1 => try self.asmRegisterRegister(.{ ._, .movzx }, dst_reg.to32(), dst_reg.to8()),
- else => {},
- }
-
- const resolved_src_mcv = switch (src_mcv) {
- else => src_mcv,
- .air_ref => |src_ref| try self.resolveInst(src_ref),
- };
- switch (resolved_src_mcv) {
- .none,
- .unreach,
- .dead,
- .undef,
- .register_pair,
- .register_triple,
- .register_quadruple,
- .register_overflow,
- .register_mask,
- .indirect_load_frame,
- .elementwise_args,
- .reserved_frame,
- .air_ref,
- => unreachable,
- .register => |src_reg| {
- switch (abi_size) {
- 1 => try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()),
- else => {},
- }
- try self.asmRegisterRegister(
- .{ .i_, .mul },
- dst_alias,
- registerAlias(src_reg, alias_size),
- );
- },
- .immediate => |imm| {
- if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| {
- try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst_alias, dst_alias, .s(small));
- } else {
- const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv);
- return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
- }
- },
- .register_offset,
- .eflags,
- .lea_frame,
- .load_nav,
- .lea_nav,
- .load_uav,
- .lea_uav,
- .load_lazy_sym,
- .lea_lazy_sym,
- .load_extern_func,
- .lea_extern_func,
- => {
- const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv);
- switch (abi_size) {
- 1 => try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()),
- else => {},
- }
- try self.asmRegisterRegister(.{ .i_, .mul }, dst_alias, registerAlias(src_reg, alias_size));
- },
- .memory, .indirect, .load_frame => switch (abi_size) {
- 1 => {
- const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv);
- try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8());
- try self.asmRegisterRegister(.{ .i_, .mul }, dst_alias, registerAlias(src_reg, alias_size));
- },
- else => try self.asmRegisterMemory(
- .{ .i_, .mul },
- dst_alias,
- switch (resolved_src_mcv) {
- .memory => |addr| .{
- .base = .{ .reg = .ds },
- .mod = .{ .rm = .{
- .size = .fromSize(abi_size),
- .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse
- return self.asmRegisterRegister(
- .{ .i_, .mul },
- dst_alias,
- registerAlias(
- try self.copyToTmpRegister(dst_ty, resolved_src_mcv),
- abi_size,
- ),
- ),
- } },
- },
- .indirect => |reg_off| .{
- .base = .{ .reg = reg_off.reg },
- .mod = .{ .rm = .{
- .size = .fromSize(abi_size),
- .disp = reg_off.off,
- } },
- },
- .load_frame => |frame_addr| .{
- .base = .{ .frame = frame_addr.index },
- .mod = .{ .rm = .{
- .size = .fromSize(abi_size),
- .disp = frame_addr.off,
- } },
- },
- else => unreachable,
- },
- ),
- },
- }
- },
- .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented
- .memory,
- .indirect,
- .load_frame,
- .load_nav,
- .load_uav,
- .load_lazy_sym,
- .load_extern_func,
- => {
- const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
- const tmp_mcv = MCValue{ .register = tmp_reg };
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
-
- try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv);
- try self.genCopy(dst_ty, dst_mcv, tmp_mcv, .{});
- },
- }
-}
-
fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void {
const zcu = self.pt.zcu;
const arg_index = for (self.args, 0..) |arg, arg_index| {
@@ -179247,475 +176274,6 @@ fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.epilogue_relocs.append(self.gpa, jmp_reloc);
}
-fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- var ty = self.typeOf(bin_op.lhs);
- var null_compare: ?Mir.Inst.Index = null;
-
- const result: Condition = result: {
- try self.spillEflagsIfOccupied();
-
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
- .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
- .register_pair => |lhs_regs| locks: {
- const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
- break :locks .{ locks[0], locks[1] };
- },
- .register_offset => |lhs_ro| .{
- self.register_manager.lockRegAssumeUnused(lhs_ro.reg),
- null,
- },
- else => @splat(null),
- };
- defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
- .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
- .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
- .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null },
- else => @splat(null),
- };
- defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
-
- switch (ty.zigTypeTag(zcu)) {
- .float => {
- const float_bits = ty.floatBits(self.target);
- if (!switch (float_bits) {
- 16 => self.hasFeature(.f16c),
- 32 => self.hasFeature(.sse),
- 64 => self.hasFeature(.sse2),
- 80, 128 => false,
- else => unreachable,
- }) {
- var sym_buf: ["__???f2".len]u8 = undefined;
- const ret = try self.genCall(.{ .extern_func = .{
- .return_type = .i32_type,
- .param_types = &.{ ty.toIntern(), ty.toIntern() },
- .sym = std.fmt.bufPrint(&sym_buf, "__{s}{c}f2", .{
- switch (op) {
- .eq => "eq",
- .neq => "ne",
- .lt => "lt",
- .lte => "le",
- .gt => "gt",
- .gte => "ge",
- },
- floatCompilerRtAbiName(float_bits),
- }) catch unreachable,
- } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, .{});
- try self.genBinOpMir(.{ ._, .@"test" }, .i32, ret, ret);
- break :result switch (op) {
- .eq => .e,
- .neq => .ne,
- .lt => .l,
- .lte => .le,
- .gt => .g,
- .gte => .ge,
- };
- }
- },
- .optional => if (!ty.optionalReprIsPayload(zcu)) {
- const opt_ty = ty;
- const opt_abi_size: u31 = @intCast(opt_ty.abiSize(zcu));
- ty = opt_ty.optionalChild(zcu);
- const payload_abi_size: u31 = @intCast(ty.abiSize(zcu));
-
- const temp_lhs_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg);
- defer self.register_manager.unlockReg(temp_lhs_lock);
-
- if (lhs_mcv.isBase()) try self.asmRegisterMemory(
- .{ ._, .mov },
- temp_lhs_reg.to8(),
- try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }),
- ) else {
- try self.genSetReg(temp_lhs_reg, opt_ty, lhs_mcv, .{});
- try self.asmRegisterImmediate(
- .{ ._r, .sh },
- registerAlias(temp_lhs_reg, opt_abi_size),
- .u(payload_abi_size * 8),
- );
- }
-
- const payload_compare = payload_compare: {
- if (rhs_mcv.isBase()) {
- const rhs_mem =
- try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte });
- try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8());
- const payload_compare = try self.asmJccReloc(.nz, undefined);
- try self.asmRegisterMemory(.{ ._, .cmp }, temp_lhs_reg.to8(), rhs_mem);
- break :payload_compare payload_compare;
- }
-
- const temp_rhs_reg = try self.copyToTmpRegister(opt_ty, rhs_mcv);
- const temp_rhs_lock = self.register_manager.lockRegAssumeUnused(temp_rhs_reg);
- defer self.register_manager.unlockReg(temp_rhs_lock);
-
- try self.asmRegisterImmediate(
- .{ ._r, .sh },
- registerAlias(temp_rhs_reg, opt_abi_size),
- .u(payload_abi_size * 8),
- );
- try self.asmRegisterRegister(
- .{ ._, .@"test" },
- temp_lhs_reg.to8(),
- temp_rhs_reg.to8(),
- );
- const payload_compare = try self.asmJccReloc(.nz, undefined);
- try self.asmRegisterRegister(
- .{ ._, .cmp },
- temp_lhs_reg.to8(),
- temp_rhs_reg.to8(),
- );
- break :payload_compare payload_compare;
- };
- null_compare = try self.asmJmpReloc(undefined);
- self.performReloc(payload_compare);
- },
- else => {},
- }
-
- switch (ty.zigTypeTag(zcu)) {
- else => {
- const abi_size: u16 = @intCast(ty.abiSize(zcu));
- const may_flip: enum {
- may_flip,
- must_flip,
- must_not_flip,
- } = if (abi_size > 8) switch (op) {
- .lt, .gte => .must_not_flip,
- .lte, .gt => .must_flip,
- .eq, .neq => .may_flip,
- } else .may_flip;
-
- const flipped = switch (may_flip) {
- .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isBase(),
- .must_flip => true,
- .must_not_flip => false,
- };
- const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
- const dst_mcv = if (unmat_dst_mcv.isRegister() or
- (abi_size <= 8 and unmat_dst_mcv.isBase())) unmat_dst_mcv else dst: {
- const dst_mcv = try self.allocTempRegOrMem(ty, true);
- try self.genCopy(ty, dst_mcv, unmat_dst_mcv, .{});
- break :dst dst_mcv;
- };
- const dst_lock =
- if (dst_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
- const src_mcv = try self.resolveInst(if (flipped) bin_op.lhs else bin_op.rhs);
- const src_lock =
- if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
- defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
-
- break :result .fromCompareOperator(
- if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned,
- result_op: {
- const flipped_op = if (flipped) op.reverse() else op;
- if (abi_size > 8) switch (flipped_op) {
- .lt, .gte => {},
- .lte, .gt => unreachable,
- .eq, .neq => {
- const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
-
- const resolved_dst_mcv = switch (dst_mcv) {
- else => dst_mcv,
- .air_ref => |dst_ref| try self.resolveInst(dst_ref),
- };
- const dst_info: OpInfo = switch (resolved_dst_mcv) {
- .none,
- .unreach,
- .dead,
- .undef,
- .immediate,
- .eflags,
- .register_offset,
- .register_overflow,
- .register_mask,
- .indirect,
- .lea_frame,
- .lea_nav,
- .lea_uav,
- .lea_lazy_sym,
- .lea_extern_func,
- .elementwise_args,
- .reserved_frame,
- .air_ref,
- => unreachable,
- .register,
- .register_pair,
- .register_triple,
- .register_quadruple,
- .load_frame,
- => null,
- .memory,
- .load_nav,
- .load_uav,
- .load_lazy_sym,
- .load_extern_func,
- => dst: {
- switch (resolved_dst_mcv) {
- .memory => |addr| if (std.math.cast(
- i32,
- @as(i64, @bitCast(addr)),
- ) != null and std.math.cast(
- i32,
- @as(i64, @bitCast(addr)) + abi_size - 8,
- ) != null) break :dst null,
- .load_nav, .load_uav, .load_lazy_sym, .load_extern_func => {},
- else => unreachable,
- }
-
- const dst_addr_reg = (try self.register_manager.allocReg(
- null,
- abi.RegisterClass.gp,
- )).to64();
- const dst_addr_lock =
- self.register_manager.lockRegAssumeUnused(dst_addr_reg);
- errdefer self.register_manager.unlockReg(dst_addr_lock);
-
- try self.genSetReg(dst_addr_reg, .usize, resolved_dst_mcv.address(), .{});
- break :dst .{
- .addr_reg = dst_addr_reg,
- .addr_lock = dst_addr_lock,
- };
- },
- };
- defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock);
-
- const resolved_src_mcv = switch (src_mcv) {
- else => src_mcv,
- .air_ref => |src_ref| try self.resolveInst(src_ref),
- };
- const src_info: OpInfo = switch (resolved_src_mcv) {
- .none,
- .unreach,
- .dead,
- .undef,
- .immediate,
- .eflags,
- .register,
- .register_offset,
- .register_overflow,
- .register_mask,
- .indirect,
- .lea_frame,
- .lea_nav,
- .lea_uav,
- .lea_lazy_sym,
- .lea_extern_func,
- .elementwise_args,
- .reserved_frame,
- .air_ref,
- => unreachable,
- .register_pair,
- .register_triple,
- .register_quadruple,
- .load_frame,
- => null,
- .memory,
- .load_nav,
- .load_uav,
- .load_lazy_sym,
- .load_extern_func,
- => src: {
- switch (resolved_src_mcv) {
- .memory => |addr| if (std.math.cast(
- i32,
- @as(i64, @bitCast(addr)),
- ) != null and std.math.cast(
- i32,
- @as(i64, @bitCast(addr)) + abi_size - 8,
- ) != null) break :src null,
- .load_nav, .load_uav, .load_lazy_sym, .load_extern_func => {},
- else => unreachable,
- }
-
- const src_addr_reg = (try self.register_manager.allocReg(
- null,
- abi.RegisterClass.gp,
- )).to64();
- const src_addr_lock =
- self.register_manager.lockRegAssumeUnused(src_addr_reg);
- errdefer self.register_manager.unlockReg(src_addr_lock);
-
- try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{});
- break :src .{
- .addr_reg = src_addr_reg,
- .addr_lock = src_addr_lock,
- };
- },
- };
- defer if (src_info) |info|
- self.register_manager.unlockReg(info.addr_lock);
-
- const regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp);
- const acc_reg = regs[0].to64();
- const locks = self.register_manager.lockRegsAssumeUnused(2, regs);
- defer for (locks) |lock| self.register_manager.unlockReg(lock);
-
- const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable;
- var limb_i: u16 = 0;
- while (limb_i < limbs_len) : (limb_i += 1) {
- const off = limb_i * 8;
- const tmp_reg = regs[@min(limb_i, 1)].to64();
-
- try self.genSetReg(tmp_reg, .usize, if (dst_info) |info| .{
- .indirect = .{ .reg = info.addr_reg, .off = off },
- } else switch (resolved_dst_mcv) {
- inline .register_pair,
- .register_triple,
- .register_quadruple,
- => |dst_regs| .{ .register = dst_regs[limb_i] },
- .memory => |dst_addr| .{
- .memory = @bitCast(@as(i64, @bitCast(dst_addr)) + off),
- },
- .indirect => |reg_off| .{ .indirect = .{
- .reg = reg_off.reg,
- .off = reg_off.off + off,
- } },
- .load_frame => |frame_addr| .{ .load_frame = .{
- .index = frame_addr.index,
- .off = frame_addr.off + off,
- } },
- else => unreachable,
- }, .{});
-
- try self.genBinOpMir(
- .{ ._, .xor },
- .usize,
- .{ .register = tmp_reg },
- if (src_info) |info| .{
- .indirect = .{ .reg = info.addr_reg, .off = off },
- } else switch (resolved_src_mcv) {
- inline .register_pair,
- .register_triple,
- .register_quadruple,
- => |src_regs| .{ .register = src_regs[limb_i] },
- .memory => |src_addr| .{
- .memory = @bitCast(@as(i64, @bitCast(src_addr)) + off),
- },
- .indirect => |reg_off| .{ .indirect = .{
- .reg = reg_off.reg,
- .off = reg_off.off + off,
- } },
- .load_frame => |frame_addr| .{ .load_frame = .{
- .index = frame_addr.index,
- .off = frame_addr.off + off,
- } },
- else => unreachable,
- },
- );
-
- if (limb_i > 0)
- try self.asmRegisterRegister(.{ ._, .@"or" }, acc_reg, tmp_reg);
- }
- assert(limbs_len >= 2); // use flags from or
- break :result_op flipped_op;
- },
- };
- try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv);
- break :result_op flipped_op;
- },
- );
- },
- .float => {
- const flipped = switch (op) {
- .lt, .lte => true,
- .eq, .gte, .gt, .neq => false,
- };
-
- const dst_mcv = if (flipped) rhs_mcv else lhs_mcv;
- const dst_reg = if (dst_mcv.isRegister())
- dst_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, dst_mcv);
- const dst_lock = self.register_manager.lockReg(dst_reg);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
- const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
-
- switch (ty.floatBits(self.target)) {
- 16 => {
- assert(self.hasFeature(.f16c));
- const tmp1_reg =
- (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
- const tmp1_mcv = MCValue{ .register = tmp1_reg };
- const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg);
- defer self.register_manager.unlockReg(tmp1_lock);
-
- const tmp2_reg =
- (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128();
- const tmp2_mcv = MCValue{ .register = tmp2_reg };
- const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg);
- defer self.register_manager.unlockReg(tmp2_lock);
-
- if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
- .{ .vp_w, .insr },
- tmp1_reg,
- dst_reg.to128(),
- try src_mcv.mem(self, .{ .size = .word }),
- .u(1),
- ) else try self.asmRegisterRegisterRegister(
- .{ .vp_, .unpcklwd },
- tmp1_reg,
- dst_reg.to128(),
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv)).to128(),
- );
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg);
- try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
- try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
- },
- 32 => try self.genBinOpMir(
- .{ ._ss, .ucomi },
- ty,
- .{ .register = dst_reg },
- src_mcv,
- ),
- 64 => try self.genBinOpMir(
- .{ ._sd, .ucomi },
- ty,
- .{ .register = dst_reg },
- src_mcv,
- ),
- else => unreachable,
- }
-
- break :result switch (if (flipped) op.reverse() else op) {
- .lt, .lte => unreachable, // required to have been canonicalized to gt(e)
- .gt => .a,
- .gte => .ae,
- .eq => .z_and_np,
- .neq => .nz_or_p,
- };
- },
- }
- };
-
- if (null_compare) |reloc| self.performReloc(reloc);
- self.eflags_inst = inst;
- return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none });
-}
-
-fn airCmpVector(self: *CodeGen, inst: Air.Inst.Index) !void {
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data;
- const dst_mcv = try self.genBinOp(
- inst,
- .fromCmpOp(extra.compareOperator(), false),
- extra.lhs,
- extra.rhs,
- );
- return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
-}
-
fn airTry(self: *CodeGen, inst: Air.Inst.Index) !void {
const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
const extra = self.air.extraData(Air.Try, pl_op.payload);
@@ -181223,16 +177781,13 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
.@".cfi_escape" => error.InvalidInstruction,
else => unreachable,
} else self.asmOps(mnem_fixed_tag, ops)) catch |err| switch (err) {
- error.InvalidInstruction => return self.fail(
- "invalid instruction: '{s} {s} {s} {s} {s}'",
- .{
- mnem_str,
- @tagName(ops[0]),
- @tagName(ops[1]),
- @tagName(ops[2]),
- @tagName(ops[3]),
- },
- ),
+ error.InvalidInstruction => return self.fail("invalid instruction: '{s} {s} {s} {s} {s}'", .{
+ mnem_str,
+ @tagName(ops[0]),
+ @tagName(ops[1]),
+ @tagName(ops[2]),
+ @tagName(ops[3]),
+ }),
else => |e| return e,
};
}
@@ -182904,183 +179459,6 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void {
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
-fn airArrayToSlice(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const slice_ty = self.typeOfIndex(inst);
- const ptr_ty = self.typeOf(ty_op.operand);
- const ptr = try self.resolveInst(ty_op.operand);
- const array_ty = ptr_ty.childType(zcu);
- const array_len = array_ty.arrayLen(zcu);
-
- const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu));
- try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr, .{});
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(ptr_ty.abiSize(zcu)),
- .usize,
- .{ .immediate = array_len },
- .{},
- );
-
- const result = MCValue{ .load_frame = .{ .index = frame_index } };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airFloatFromInt(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const dst_ty = self.typeOfIndex(inst);
- const dst_bits = dst_ty.floatBits(self.target);
-
- const src_ty = self.typeOf(ty_op.operand);
- const src_bits: u32 = @intCast(src_ty.bitSize(zcu));
- const src_signedness =
- if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned;
- const src_size = std.math.divCeil(u32, @max(switch (src_signedness) {
- .signed => src_bits,
- .unsigned => src_bits + 1,
- }, 32), 8) catch unreachable;
-
- const result = result: {
- if (switch (dst_bits) {
- 16, 80, 128 => true,
- 32, 64 => src_size > 8,
- else => unreachable,
- }) {
- if (src_bits > 128) return self.fail("TODO implement airFloatFromInt from {f} to {f}", .{
- src_ty.fmt(pt), dst_ty.fmt(pt),
- });
-
- var sym_buf: ["__floatun?i?f".len]u8 = undefined;
- break :result try self.genCall(.{ .extern_func = .{
- .return_type = dst_ty.toIntern(),
- .param_types = &.{src_ty.toIntern()},
- .sym = std.fmt.bufPrint(&sym_buf, "__float{s}{c}i{c}f", .{
- switch (src_signedness) {
- .signed => "",
- .unsigned => "un",
- },
- intCompilerRtAbiName(src_bits),
- floatCompilerRtAbiName(dst_bits),
- }) catch unreachable,
- } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
- }
-
- const src_mcv = try self.resolveInst(ty_op.operand);
- const src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv);
- const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
- defer self.register_manager.unlockReg(src_lock);
-
- if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg);
-
- const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty));
- const dst_mcv = MCValue{ .register = dst_reg };
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag(zcu)) {
- .float => switch (dst_ty.floatBits(self.target)) {
- 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 },
- 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => null,
- }) orelse return self.fail("TODO implement airFloatFromInt from {f} to {f}", .{
- src_ty.fmt(pt), dst_ty.fmt(pt),
- });
- const dst_alias = dst_reg.to128();
- const src_alias = registerAlias(src_reg, src_size);
- switch (mir_tag[0]) {
- .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias),
- else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias),
- }
-
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn airIntFromFloat(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-
- const dst_ty = self.typeOfIndex(inst);
- const dst_bits: u32 = @intCast(dst_ty.bitSize(zcu));
- const dst_signedness =
- if (dst_ty.isAbiInt(zcu)) dst_ty.intInfo(zcu).signedness else .unsigned;
- const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) {
- .signed => dst_bits,
- .unsigned => dst_bits + 1,
- }, 32), 8) catch unreachable;
-
- const src_ty = self.typeOf(ty_op.operand);
- const src_bits = src_ty.floatBits(self.target);
-
- const result = result: {
- if (switch (src_bits) {
- 16, 80, 128 => true,
- 32, 64 => dst_size > 8,
- else => unreachable,
- }) {
- if (dst_bits > 128) return self.fail("TODO implement airIntFromFloat from {f} to {f}", .{
- src_ty.fmt(pt), dst_ty.fmt(pt),
- });
-
- var sym_buf: ["__fixuns?f?i".len]u8 = undefined;
- break :result try self.genCall(.{ .extern_func = .{
- .return_type = dst_ty.toIntern(),
- .param_types = &.{src_ty.toIntern()},
- .sym = std.fmt.bufPrint(&sym_buf, "__fix{s}{c}f{c}i", .{
- switch (dst_signedness) {
- .signed => "",
- .unsigned => "uns",
- },
- floatCompilerRtAbiName(src_bits),
- intCompilerRtAbiName(dst_bits),
- }) catch unreachable,
- } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{});
- }
-
- const src_mcv = try self.resolveInst(ty_op.operand);
- const src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv);
- const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
- defer self.register_manager.unlockReg(src_lock);
-
- const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty));
- const dst_mcv = MCValue{ .register = dst_reg };
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- try self.asmRegisterRegister(
- switch (src_bits) {
- 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si },
- 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si },
- else => unreachable,
- },
- registerAlias(dst_reg, dst_size),
- src_reg.to128(),
- );
-
- if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg);
-
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void {
const pt = self.pt;
const zcu = pt.zcu;
@@ -183747,331 +180125,46 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
const vector_ty = self.typeOfIndex(inst);
const vector_len = vector_ty.vectorLen(zcu);
- const dst_rc = self.regSetForType(vector_ty);
const scalar_ty = self.typeOf(ty_op.operand);
const result: MCValue = result: {
- switch (scalar_ty.zigTypeTag(zcu)) {
- else => {},
- .bool => {
- const regs =
- try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
- const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs);
- defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
-
- try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }, .{});
- try self.genSetReg(
- regs[1],
- vector_ty,
- .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) },
- .{},
- );
- const src_mcv = try self.resolveInst(ty_op.operand);
- const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4);
- try self.asmCmovccRegisterRegister(
- switch (src_mcv) {
- .eflags => |cc| cc,
- .register => |src_reg| cc: {
- try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1));
- break :cc .nz;
- },
- else => cc: {
- try self.asmMemoryImmediate(
- .{ ._, .@"test" },
- try src_mcv.mem(self, .{ .size = .byte }),
- .u(1),
- );
- break :cc .nz;
- },
- },
- registerAlias(regs[0], abi_size),
- registerAlias(regs[1], abi_size),
- );
- break :result .{ .register = regs[0] };
- },
- .int => if (self.hasFeature(.avx2)) avx2: {
- const mir_tag = @as(?Mir.Inst.FixedTag, switch (scalar_ty.intInfo(zcu).bits) {
- else => null,
- 1...8 => switch (vector_len) {
- else => null,
- 1...32 => .{ .vp_b, .broadcast },
- },
- 9...16 => switch (vector_len) {
- else => null,
- 1...16 => .{ .vp_w, .broadcast },
- },
- 17...32 => switch (vector_len) {
- else => null,
- 1...8 => .{ .vp_d, .broadcast },
- },
- 33...64 => switch (vector_len) {
- else => null,
- 1...4 => .{ .vp_q, .broadcast },
- },
- 65...128 => switch (vector_len) {
- else => null,
- 1...2 => .{ .v_i128, .broadcast },
- },
- }) orelse break :avx2;
-
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- const src_mcv = try self.resolveInst(ty_op.operand);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- mir_tag,
- registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))),
- try src_mcv.mem(self, .{ .size = self.memSize(scalar_ty) }),
- ) else {
- if (mir_tag[0] == .v_i128) break :avx2;
- try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
- try self.asmRegisterRegister(
- mir_tag,
- registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))),
- registerAlias(dst_reg, @intCast(scalar_ty.abiSize(zcu))),
- );
- }
- break :result .{ .register = dst_reg };
- } else {
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
-
- try self.genSetReg(dst_reg, scalar_ty, .{ .air_ref = ty_op.operand }, .{});
- if (vector_len == 1) break :result .{ .register = dst_reg };
+ if (scalar_ty.toIntern() != .bool_type) return self.fail("TODO implement airSplat for {f}", .{
+ vector_ty.fmt(pt),
+ });
+ const regs =
+ try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
+ const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs);
+ defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
- const dst_alias = registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu)));
- const scalar_bits = scalar_ty.intInfo(zcu).bits;
- if (switch (scalar_bits) {
- 1...8 => true,
- 9...128 => false,
- else => unreachable,
- }) if (self.hasFeature(.avx)) try self.asmRegisterRegisterRegister(
- .{ .vp_, .unpcklbw },
- dst_alias,
- dst_alias,
- dst_alias,
- ) else try self.asmRegisterRegister(
- .{ .p_, .unpcklbw },
- dst_alias,
- dst_alias,
- );
- if (switch (scalar_bits) {
- 1...8 => vector_len > 2,
- 9...16 => true,
- 17...128 => false,
- else => unreachable,
- }) try self.asmRegisterRegisterImmediate(
- .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl },
- dst_alias,
- dst_alias,
- .u(0b00_00_00_00),
- );
- if (switch (scalar_bits) {
- 1...8 => vector_len > 4,
- 9...16 => vector_len > 2,
- 17...64 => true,
- 65...128 => false,
- else => unreachable,
- }) try self.asmRegisterRegisterImmediate(
- .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf },
- dst_alias,
- dst_alias,
- .u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00),
- );
- break :result .{ .register = dst_reg };
- },
- .float => switch (scalar_ty.floatBits(self.target)) {
- 32 => switch (vector_len) {
- 1 => {
- const src_mcv = try self.resolveInst(ty_op.operand);
- if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
- break :result .{ .register = dst_reg };
- },
- 2...4 => {
- const src_mcv = try self.resolveInst(ty_op.operand);
- if (self.hasFeature(.avx)) {
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_ss, .broadcast },
- dst_reg.to128(),
- try src_mcv.mem(self, .{ .size = .dword }),
- ) else {
- const src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(scalar_ty, src_mcv);
- try self.asmRegisterRegisterRegisterImmediate(
- .{ .v_ps, .shuf },
- dst_reg.to128(),
- src_reg.to128(),
- src_reg.to128(),
- .u(0),
- );
- }
- break :result .{ .register = dst_reg };
- } else {
- const dst_mcv = if (src_mcv.isRegister() and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else
- try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?;
- try self.asmRegisterRegisterImmediate(
- .{ ._ps, .shuf },
- dst_reg.to128(),
- dst_reg.to128(),
- .u(0),
- );
- break :result dst_mcv;
- }
- },
- 5...8 => if (self.hasFeature(.avx)) {
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_ss, .broadcast },
- dst_reg.to256(),
- try src_mcv.mem(self, .{ .size = .dword }),
- ) else {
- const src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(scalar_ty, src_mcv);
- if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
- .{ .v_ss, .broadcast },
- dst_reg.to256(),
- src_reg.to128(),
- ) else {
- try self.asmRegisterRegisterRegisterImmediate(
- .{ .v_ps, .shuf },
- dst_reg.to128(),
- src_reg.to128(),
- src_reg.to128(),
- .u(0),
- );
- try self.asmRegisterRegisterRegisterImmediate(
- .{ .v_f128, .insert },
- dst_reg.to256(),
- dst_reg.to256(),
- dst_reg.to128(),
- .u(1),
- );
- }
- }
- break :result .{ .register = dst_reg };
- },
- else => {},
- },
- 64 => switch (vector_len) {
- 1 => {
- const src_mcv = try self.resolveInst(ty_op.operand);
- if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
- break :result .{ .register = dst_reg };
- },
- 2 => {
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (self.hasFeature(.sse3)) {
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
- dst_reg.to128(),
- try src_mcv.mem(self, .{ .size = .qword }),
- ) else try self.asmRegisterRegister(
- if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
- dst_reg.to128(),
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
- );
- break :result .{ .register = dst_reg };
- } else try self.asmRegisterRegister(
- .{ ._ps, .movlh },
- dst_reg.to128(),
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
- );
- },
- 3...4 => if (self.hasFeature(.avx)) {
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_sd, .broadcast },
- dst_reg.to256(),
- try src_mcv.mem(self, .{ .size = .qword }),
- ) else {
- const src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(scalar_ty, src_mcv);
- if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
- .{ .v_sd, .broadcast },
- dst_reg.to256(),
- src_reg.to128(),
- ) else {
- try self.asmRegisterRegister(
- .{ .v_, .movddup },
- dst_reg.to128(),
- src_reg.to128(),
- );
- try self.asmRegisterRegisterRegisterImmediate(
- .{ .v_f128, .insert },
- dst_reg.to256(),
- dst_reg.to256(),
- dst_reg.to128(),
- .u(1),
- );
- }
- }
- break :result .{ .register = dst_reg };
- },
- else => {},
+ try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }, .{});
+ try self.genSetReg(
+ regs[1],
+ vector_ty,
+ .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) },
+ .{},
+ );
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4);
+ try self.asmCmovccRegisterRegister(
+ switch (src_mcv) {
+ .eflags => |cc| cc,
+ .register => |src_reg| cc: {
+ try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1));
+ break :cc .nz;
},
- 128 => switch (vector_len) {
- 1 => {
- const src_mcv = try self.resolveInst(ty_op.operand);
- if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
- break :result .{ .register = dst_reg };
- },
- 2 => if (self.hasFeature(.avx)) {
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
- if (src_mcv.isBase()) try self.asmRegisterMemory(
- .{ .v_f128, .broadcast },
- dst_reg.to256(),
- try src_mcv.mem(self, .{ .size = .xword }),
- ) else {
- const src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(scalar_ty, src_mcv);
- try self.asmRegisterRegisterRegisterImmediate(
- .{ .v_f128, .insert },
- dst_reg.to256(),
- src_reg.to256(),
- src_reg.to128(),
- .u(1),
- );
- }
- break :result .{ .register = dst_reg };
- },
- else => {},
+ else => cc: {
+ try self.asmMemoryImmediate(
+ .{ ._, .@"test" },
+ try src_mcv.mem(self, .{ .size = .byte }),
+ .u(1),
+ );
+ break :cc .nz;
},
- 16, 80 => {},
- else => unreachable,
},
- }
- return self.fail("TODO implement airSplat for {f}", .{vector_ty.fmt(pt)});
+ registerAlias(regs[0], abi_size),
+ registerAlias(regs[1], abi_size),
+ );
+ break :result .{ .register = regs[0] };
};
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
@@ -185349,161 +181442,135 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void {
const result: MCValue = result: {
switch (result_ty.zigTypeTag(zcu)) {
.@"struct" => {
+ if (result_ty.containerLayout(zcu) == .@"packed") return self.fail(
+ "TODO implement airAggregateInit for {f}",
+ .{result_ty.fmt(pt)},
+ );
const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu));
- if (result_ty.containerLayout(zcu) == .@"packed") {
- const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern());
- try self.genInlineMemset(
- .{ .lea_frame = .{ .index = frame_index } },
- .{ .immediate = 0 },
- .{ .immediate = result_ty.abiSize(zcu) },
- .{},
- );
- for (elements, 0..) |elem, elem_i_usize| {
- const elem_i: u32 = @intCast(elem_i_usize);
- if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue;
-
- const elem_ty = result_ty.fieldType(elem_i, zcu);
- const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu));
- if (elem_bit_size > 64) {
- return self.fail(
- "TODO airAggregateInit implement packed structs with large fields",
- .{},
- );
- }
- const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
- const elem_abi_bits = elem_abi_size * 8;
- const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i);
- const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
- const elem_bit_off = elem_off % elem_abi_bits;
- const elem_mcv = try self.resolveInst(elem);
- const elem_lock = switch (elem_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- .immediate => |imm| lock: {
- if (imm == 0) continue;
- break :lock null;
- },
- else => null,
- };
- defer if (elem_lock) |lock| self.register_manager.unlockReg(lock);
-
- const elem_extra_bits = self.regExtraBits(elem_ty);
- {
- const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv);
- const temp_alias = registerAlias(temp_reg, elem_abi_size);
- const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
- defer self.register_manager.unlockReg(temp_lock);
-
- if (elem_bit_off < elem_extra_bits) {
- try self.truncateRegister(elem_ty, temp_alias);
- }
- if (elem_bit_off > 0) try self.genShiftBinOpMir(
- .{ ._l, .sh },
- elem_ty,
- .{ .register = temp_alias },
- .u8,
- .{ .immediate = elem_bit_off },
- );
- try self.genBinOpMir(
- .{ ._, .@"or" },
- elem_ty,
- .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
- .{ .register = temp_alias },
- );
- }
- if (elem_bit_off > elem_extra_bits) {
- const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv);
- const temp_alias = registerAlias(temp_reg, elem_abi_size);
- const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
- defer self.register_manager.unlockReg(temp_lock);
-
- if (elem_extra_bits > 0) {
- try self.truncateRegister(elem_ty, temp_alias);
- }
- try self.genShiftBinOpMir(
- .{ ._r, .sh },
- elem_ty,
- .{ .register = temp_reg },
- .u8,
- .{ .immediate = elem_abi_bits - elem_bit_off },
- );
- try self.genBinOpMir(
- .{ ._, .@"or" },
- elem_ty,
- .{ .load_frame = .{
- .index = frame_index,
- .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)),
- } },
- .{ .register = temp_alias },
- );
- }
- }
- } else for (elements, 0..) |elem, elem_i| {
+ const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern());
+ try self.genInlineMemset(
+ .{ .lea_frame = .{ .index = frame_index } },
+ .{ .immediate = 0 },
+ .{ .immediate = result_ty.abiSize(zcu) },
+ .{},
+ );
+ for (elements, 0..) |elem, elem_i_usize| {
+ const elem_i: u32 = @intCast(elem_i_usize);
if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue;
const elem_ty = result_ty.fieldType(elem_i, zcu);
- const elem_off: i32 = @intCast(result_ty.structFieldOffset(elem_i, zcu));
+ const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu));
+ if (elem_bit_size > 64) {
+ return self.fail(
+ "TODO airAggregateInit implement packed structs with large fields",
+ .{},
+ );
+ }
+ const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
+ const elem_abi_bits = elem_abi_size * 8;
+ const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i);
+ const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
+ const elem_bit_off = elem_off % elem_abi_bits;
const elem_mcv = try self.resolveInst(elem);
- try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, elem_mcv, .{});
- }
- break :result .{ .load_frame = .{ .index = frame_index } };
- },
- .array, .vector => {
- const elem_ty = result_ty.childType(zcu);
- if (result_ty.isVector(zcu) and elem_ty.toIntern() == .bool_type) {
- const result_size: u32 = @intCast(result_ty.abiSize(zcu));
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- try self.asmRegisterRegister(
- .{ ._, .xor },
- registerAlias(dst_reg, @min(result_size, 4)),
- registerAlias(dst_reg, @min(result_size, 4)),
- );
+ const elem_lock = switch (elem_mcv) {
+ .register => |reg| self.register_manager.lockReg(reg),
+ .immediate => |imm| lock: {
+ if (imm == 0) continue;
+ break :lock null;
+ },
+ else => null,
+ };
+ defer if (elem_lock) |lock| self.register_manager.unlockReg(lock);
- for (elements, 0..) |elem, elem_i| {
- const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem });
- const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg);
- defer self.register_manager.unlockReg(elem_lock);
+ const elem_extra_bits = self.regExtraBits(elem_ty);
+ {
+ const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv);
+ const temp_alias = registerAlias(temp_reg, elem_abi_size);
+ const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
+ defer self.register_manager.unlockReg(temp_lock);
- try self.asmRegisterImmediate(
- .{ ._, .@"and" },
- registerAlias(elem_reg, @min(result_size, 4)),
- .u(1),
- );
- if (elem_i > 0) try self.asmRegisterImmediate(
+ if (elem_bit_off < elem_extra_bits) {
+ try self.truncateRegister(elem_ty, temp_alias);
+ }
+ if (elem_bit_off > 0) try self.genShiftBinOpMir(
.{ ._l, .sh },
- registerAlias(elem_reg, result_size),
- .u(@intCast(elem_i)),
+ elem_ty,
+ .{ .register = temp_alias },
+ .u8,
+ .{ .immediate = elem_bit_off },
);
- try self.asmRegisterRegister(
+ try self.genBinOpMir(
.{ ._, .@"or" },
- registerAlias(dst_reg, result_size),
- registerAlias(elem_reg, result_size),
+ elem_ty,
+ .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
+ .{ .register = temp_alias },
);
}
- break :result .{ .register = dst_reg };
- } else {
- const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu));
- const elem_size: u32 = @intCast(elem_ty.abiSize(zcu));
-
- for (elements, 0..) |elem, elem_i| {
- const elem_mcv = try self.resolveInst(elem);
- const elem_off: i32 = @intCast(elem_size * elem_i);
- try self.genSetMem(
- .{ .frame = frame_index },
- elem_off,
+ if (elem_bit_off > elem_extra_bits) {
+ const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv);
+ const temp_alias = registerAlias(temp_reg, elem_abi_size);
+ const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
+ defer self.register_manager.unlockReg(temp_lock);
+
+ if (elem_extra_bits > 0) {
+ try self.truncateRegister(elem_ty, temp_alias);
+ }
+ try self.genShiftBinOpMir(
+ .{ ._r, .sh },
elem_ty,
- elem_mcv,
- .{},
+ .{ .register = temp_reg },
+ .u8,
+ .{ .immediate = elem_abi_bits - elem_bit_off },
+ );
+ try self.genBinOpMir(
+ .{ ._, .@"or" },
+ elem_ty,
+ .{ .load_frame = .{
+ .index = frame_index,
+ .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)),
+ } },
+ .{ .register = temp_alias },
);
}
- if (result_ty.sentinel(zcu)) |sentinel| try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(elem_size * elements.len),
- elem_ty,
- try self.lowerValue(sentinel),
- .{},
+ }
+ break :result .{ .load_frame = .{ .index = frame_index } };
+ },
+ .vector => {
+ const elem_ty = result_ty.childType(zcu);
+ if (elem_ty.toIntern() != .bool_type) return self.fail(
+ "TODO implement airAggregateInit for {f}",
+ .{result_ty.fmt(pt)},
+ );
+ const result_size: u32 = @intCast(result_ty.abiSize(zcu));
+ const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
+ try self.asmRegisterRegister(
+ .{ ._, .xor },
+ registerAlias(dst_reg, @min(result_size, 4)),
+ registerAlias(dst_reg, @min(result_size, 4)),
+ );
+
+ for (elements, 0..) |elem, elem_i| {
+ const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem });
+ const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg);
+ defer self.register_manager.unlockReg(elem_lock);
+
+ try self.asmRegisterImmediate(
+ .{ ._, .@"and" },
+ registerAlias(elem_reg, @min(result_size, 4)),
+ .u(1),
+ );
+ if (elem_i > 0) try self.asmRegisterImmediate(
+ .{ ._l, .sh },
+ registerAlias(elem_reg, result_size),
+ .u(@intCast(elem_i)),
+ );
+ try self.asmRegisterRegister(
+ .{ ._, .@"or" },
+ registerAlias(dst_reg, result_size),
+ registerAlias(elem_reg, result_size),
);
- break :result .{ .load_frame = .{ .index = frame_index } };
}
+ break :result .{ .register = dst_reg };
},
else => unreachable,
}
@@ -185519,220 +181586,6 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void {
return self.finishAirResult(inst, result);
}
-fn airUnionInit(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const ip = &zcu.intern_pool;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data;
- const result: MCValue = result: {
- const union_ty = self.typeOfIndex(inst);
- const layout = union_ty.unionGetLayout(zcu);
-
- const src_ty = self.typeOf(extra.init);
- const src_mcv = try self.resolveInst(extra.init);
- if (layout.tag_size == 0) {
- if (layout.abi_size <= src_ty.abiSize(zcu) and
- self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
-
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
- break :result dst_mcv;
- }
-
- const dst_mcv = try self.allocRegOrMem(inst, false);
-
- const loaded_union = zcu.typeToUnion(union_ty).?;
- const field_name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index];
- const tag_ty: Type = .fromInterned(loaded_union.enum_tag_ty);
- const field_index = tag_ty.enumFieldIndex(field_name, zcu).?;
- const tag_val = try pt.enumValueFieldIndex(tag_ty, field_index);
- const tag_int_val = try tag_val.intFromEnum(tag_ty, pt);
- const tag_int = tag_int_val.toUnsignedInt(zcu);
- const tag_off: i32 = @intCast(layout.tagOffset());
- try self.genCopy(
- tag_ty,
- dst_mcv.address().offset(tag_off).deref(),
- .{ .immediate = tag_int },
- .{},
- );
-
- const pl_off: i32 = @intCast(layout.payloadOffset());
- try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv, .{});
-
- break :result dst_mcv;
- };
- return self.finishAir(inst, result, .{ extra.init, .none, .none });
-}
-
-fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void {
- const pt = self.pt;
- const zcu = pt.zcu;
- const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
- const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
- const ty = self.typeOfIndex(inst);
-
- const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand };
- const result = result: {
- if (switch (ty.scalarType(zcu).floatBits(self.target)) {
- 16, 80, 128 => true,
- 32, 64 => !self.hasFeature(.fma),
- else => unreachable,
- }) {
- if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement airMulAdd for {f}", .{
- ty.fmt(pt),
- });
-
- var sym_buf: ["__fma?".len]u8 = undefined;
- break :result try self.genCall(.{ .extern_func = .{
- .return_type = ty.toIntern(),
- .param_types = &.{ ty.toIntern(), ty.toIntern(), ty.toIntern() },
- .sym = std.fmt.bufPrint(&sym_buf, "{s}fma{s}", .{
- floatLibcAbiPrefix(ty),
- floatLibcAbiSuffix(ty),
- }) catch unreachable,
- } }, &.{ ty, ty, ty }, &.{
- .{ .air_ref = extra.lhs }, .{ .air_ref = extra.rhs }, .{ .air_ref = pl_op.operand },
- }, .{});
- }
-
- var mcvs: [3]MCValue = undefined;
- var locks: [3]?RegisterManager.RegisterLock = @splat(null);
- defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
- var order: [3]u2 = @splat(0);
- var unused: std.StaticBitSet(3) = .initFull();
- for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| {
- const op_index: u2 = @intCast(op_i);
- mcv.* = try self.resolveInst(op);
- if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) {
- order[op_index] = 1;
- unused.unset(0);
- } else if (unused.isSet(2) and mcv.isBase()) {
- order[op_index] = 3;
- unused.unset(2);
- }
- switch (mcv.*) {
- .register => |reg| lock.* = self.register_manager.lockReg(reg),
- else => {},
- }
- }
- for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| {
- if (mop_index.* != 0) continue;
- mop_index.* = 1 + @as(u2, @intCast(unused.toggleFirstSet().?));
- if (mop_index.* > 1 and mcv.isRegister()) continue;
- const reg = try self.copyToTmpRegister(ty, mcv.*);
- mcv.* = .{ .register = reg };
- if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock);
- lock.* = self.register_manager.lockRegAssumeUnused(reg);
- }
-
- const mir_tag = @as(?Mir.Inst.FixedTag, if (std.mem.eql(u2, &order, &.{ 1, 3, 2 }) or
- std.mem.eql(u2, &order, &.{ 3, 1, 2 }))
- switch (ty.zigTypeTag(zcu)) {
- .float => switch (ty.floatBits(self.target)) {
- 32 => .{ .v_ss, .fmadd132 },
- 64 => .{ .v_sd, .fmadd132 },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (ty.vectorLen(zcu)) {
- 1 => .{ .v_ss, .fmadd132 },
- 2...8 => .{ .v_ps, .fmadd132 },
- else => null,
- },
- 64 => switch (ty.vectorLen(zcu)) {
- 1 => .{ .v_sd, .fmadd132 },
- 2...4 => .{ .v_pd, .fmadd132 },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }
- else if (std.mem.eql(u2, &order, &.{ 2, 1, 3 }) or std.mem.eql(u2, &order, &.{ 1, 2, 3 }))
- switch (ty.zigTypeTag(zcu)) {
- .float => switch (ty.floatBits(self.target)) {
- 32 => .{ .v_ss, .fmadd213 },
- 64 => .{ .v_sd, .fmadd213 },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (ty.vectorLen(zcu)) {
- 1 => .{ .v_ss, .fmadd213 },
- 2...8 => .{ .v_ps, .fmadd213 },
- else => null,
- },
- 64 => switch (ty.vectorLen(zcu)) {
- 1 => .{ .v_sd, .fmadd213 },
- 2...4 => .{ .v_pd, .fmadd213 },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }
- else if (std.mem.eql(u2, &order, &.{ 2, 3, 1 }) or std.mem.eql(u2, &order, &.{ 3, 2, 1 }))
- switch (ty.zigTypeTag(zcu)) {
- .float => switch (ty.floatBits(self.target)) {
- 32 => .{ .v_ss, .fmadd231 },
- 64 => .{ .v_sd, .fmadd231 },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) {
- .float => switch (ty.childType(zcu).floatBits(self.target)) {
- 32 => switch (ty.vectorLen(zcu)) {
- 1 => .{ .v_ss, .fmadd231 },
- 2...8 => .{ .v_ps, .fmadd231 },
- else => null,
- },
- 64 => switch (ty.vectorLen(zcu)) {
- 1 => .{ .v_sd, .fmadd231 },
- 2...4 => .{ .v_pd, .fmadd231 },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
- else => unreachable,
- }
- else
- unreachable) orelse return self.fail("TODO implement airMulAdd for {f}", .{ty.fmt(pt)});
-
- var mops: [3]MCValue = undefined;
- for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv;
-
- const abi_size: u32 = @intCast(ty.abiSize(zcu));
- const mop1_reg = registerAlias(mops[0].getReg().?, abi_size);
- const mop2_reg = registerAlias(mops[1].getReg().?, abi_size);
- if (mops[2].isRegister()) try self.asmRegisterRegisterRegister(
- mir_tag,
- mop1_reg,
- mop2_reg,
- registerAlias(mops[2].getReg().?, abi_size),
- ) else try self.asmRegisterRegisterMemory(
- mir_tag,
- mop1_reg,
- mop2_reg,
- try mops[2].mem(self, .{ .size = .fromSize(abi_size) }),
- );
- break :result mops[0];
- };
- return self.finishAir(inst, result, ops);
-}
-
fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void {
const pt = self.pt;
const zcu = pt.zcu;
@@ -186004,27 +181857,6 @@ fn getResolvedInstValue(self: *CodeGen, inst: Air.Inst.Index) *InstTracking {
};
}
-/// If the MCValue is an immediate, and it does not fit within this type,
-/// we put it in a register.
-/// A potential opportunity for future optimization here would be keeping track
-/// of the fact that the instruction is available both as an immediate
-/// and as a register.
-fn limitImmediateType(self: *CodeGen, operand: Air.Inst.Ref, comptime T: type) !MCValue {
- const mcv = try self.resolveInst(operand);
- const ti = @typeInfo(T).int;
- switch (mcv) {
- .immediate => |imm| {
- // This immediate is unsigned.
- const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed));
- if (imm >= std.math.maxInt(U)) {
- return MCValue{ .register = try self.copyToTmpRegister(.usize, mcv) };
- }
- },
- else => {},
- }
- return mcv;
-}
-
fn lowerValue(cg: *CodeGen, val: Value) Allocator.Error!MCValue {
return switch (try codegen.lowerValue(cg.pt, val, cg.target)) {
.none => .none,
@@ -186134,7 +181966,7 @@ fn resolveCallingConventionValues(
const classes = switch (cc) {
.x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, cg.target, .ret), .none),
- .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu, cg.target)},
+ .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu, cg.target, .ret)},
else => unreachable,
};
for (classes) |class| switch (class) {
@@ -186215,7 +182047,7 @@ fn resolveCallingConventionValues(
const classes = switch (cc) {
.x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, cg.target, .arg), .none),
- .x86_64_win => &.{abi.classifyWindows(ty, zcu, cg.target)},
+ .x86_64_win => &.{abi.classifyWindows(ty, zcu, cg.target, .arg)},
else => unreachable,
};
classes: for (classes) |class| switch (class) {
@@ -186678,53 +182510,6 @@ fn typeOfIndex(self: *CodeGen, inst: Air.Inst.Index) Type {
return Temp.typeOf(.{ .index = inst }, self);
}
-fn intCompilerRtAbiName(int_bits: u32) u8 {
- return switch (int_bits) {
- 1...32 => 's',
- 33...64 => 'd',
- 65...128 => 't',
- else => unreachable,
- };
-}
-
-fn floatCompilerRtAbiName(float_bits: u32) u8 {
- return switch (float_bits) {
- 16 => 'h',
- 32 => 's',
- 64 => 'd',
- 80 => 'x',
- 128 => 't',
- else => unreachable,
- };
-}
-
-fn floatCompilerRtAbiType(self: *CodeGen, ty: Type, other_ty: Type) Type {
- if (ty.toIntern() == .f16_type and
- (other_ty.toIntern() == .f32_type or other_ty.toIntern() == .f64_type) and
- self.target.os.tag.isDarwin()) return .u16;
- return ty;
-}
-
-fn floatLibcAbiPrefix(ty: Type) []const u8 {
- return switch (ty.toIntern()) {
- .f16_type, .f80_type => "__",
- .f32_type, .f64_type, .f128_type, .c_longdouble_type => "",
- else => unreachable,
- };
-}
-
-fn floatLibcAbiSuffix(ty: Type) []const u8 {
- return switch (ty.toIntern()) {
- .f16_type => "h",
- .f32_type => "f",
- .f64_type => "",
- .f80_type => "x",
- .f128_type => "q",
- .c_longdouble_type => "l",
- else => unreachable,
- };
-}
-
fn promoteInt(self: *CodeGen, ty: Type) Type {
const pt = self.pt;
const zcu = pt.zcu;
src/codegen/llvm.zig
@@ -12103,7 +12103,7 @@ fn firstParamSRet(fn_info: InternPool.Key.FuncType, zcu: *Zcu, target: *const st
return switch (fn_info.cc) {
.auto => returnTypeByRef(zcu, target, return_type),
.x86_64_sysv => firstParamSRetSystemV(return_type, zcu, target),
- .x86_64_win => x86_64_abi.classifyWindows(return_type, zcu, target) == .memory,
+ .x86_64_win => x86_64_abi.classifyWindows(return_type, zcu, target, .ret) == .memory,
.x86_sysv, .x86_win => isByRef(return_type, zcu),
.x86_stdcall => !isScalar(zcu, return_type),
.wasm_mvp => wasm_c_abi.classifyType(return_type, zcu) == .indirect,
@@ -12205,7 +12205,7 @@ fn lowerFnRetTy(o: *Object, pt: Zcu.PerThread, fn_info: InternPool.Key.FuncType)
fn lowerWin64FnRetTy(o: *Object, pt: Zcu.PerThread, fn_info: InternPool.Key.FuncType) Allocator.Error!Builder.Type {
const zcu = pt.zcu;
const return_type = Type.fromInterned(fn_info.return_type);
- switch (x86_64_abi.classifyWindows(return_type, zcu, zcu.getTarget())) {
+ switch (x86_64_abi.classifyWindows(return_type, zcu, zcu.getTarget(), .ret)) {
.integer => {
if (isScalar(zcu, return_type)) {
return o.lowerType(pt, return_type);
@@ -12476,7 +12476,7 @@ const ParamTypeIterator = struct {
fn nextWin64(it: *ParamTypeIterator, ty: Type) ?Lowering {
const zcu = it.pt.zcu;
- switch (x86_64_abi.classifyWindows(ty, zcu, zcu.getTarget())) {
+ switch (x86_64_abi.classifyWindows(ty, zcu, zcu.getTarget(), .arg)) {
.integer => {
if (isScalar(zcu, ty)) {
it.zig_index += 1;
test/behavior/x86_64/binary.zig
@@ -5172,15 +5172,6 @@ test mulSaturate {
try test_mul_saturate.testIntVectors();
}
-inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) Type {
- return lhs * rhs;
-}
-test multiply {
- const test_multiply = binary(multiply, .{});
- try test_multiply.testFloats();
- try test_multiply.testFloatVectors();
-}
-
inline fn divide(comptime Type: type, lhs: Type, rhs: Type) Type {
return lhs / rhs;
}
@@ -5264,7 +5255,7 @@ inline fn mod(comptime Type: type, lhs: Type, rhs: Type) Type {
return @mod(lhs, rhs);
}
test mod {
- if (@import("builtin").object_format == .coff) return error.SkipZigTest;
+ if (@import("builtin").object_format == .coff and @import("builtin").target.abi != .gnu) return error.SkipZigTest;
const test_mod = binary(mod, .{});
try test_mod.testInts();
try test_mod.testIntVectors();