Commit dcc9fe322e
Changed files (8)
src
src/arch/x86_64/CodeGen.zig
@@ -6036,10 +6036,377 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.sub_safe => unreachable,
.mul, .mul_optimized => |air_tag| if (use_old) try cg.airMulDivBinOp(inst, .mul) else fallback: {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
- if (cg.floatBits(cg.typeOf(bin_op.lhs).scalarType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul);
+ const ty = cg.typeOf(bin_op.lhs);
+ if (ty.isVector(zcu) and cg.floatBits(ty.childType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul);
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined;
- cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
+ cg.select(&res, &.{ty}, &ops, comptime &.{ .{
+ .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .src1b, ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
+ .patterns = &.{
+ .{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
+ .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mul, .src1b, ._, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .imm16, .none } },
+ .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .imm16, .none } },
+ .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .dst0w, .src0w, .src1w, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .mem, .none } },
+ .{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .dst0w, .src1w, ._, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .imm32, .none } },
+ .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .imm32, .none } },
+ .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .dst0d, .src0d, .src1d, ._ },
+ } },
+ }, .{
+ .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .mem, .none } },
+ .{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .dst0d, .src1d, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .mem, .simm32, .none } },
+ .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_gpr, .simm32, .none } },
+ .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+ },
+ .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .dst0q, .src0q, .src1q, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_gpr, .mem, .none } },
+ .{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
+ },
+ .dst_temps = .{ .{ .ref = .src0 }, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, .i_, .mul, .dst0q, .src1q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword }, .any },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
+ .{ ._, ._, .mul, .src1q, ._, ._, ._ },
+ .{ ._, ._, .mov, .dst0q, .tmp0q, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
+ .{ ._, .i_, .mul, .tmp0q, .memd(.src1q, 8), ._, ._ },
+ .{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ },
+ .{ ._, ._, .mov, .tmp0q, .src1q, ._, ._ },
+ .{ ._, .i_, .mul, .tmp0q, .memd(.src0q, 8), ._, ._ },
+ .{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ },
+ .{ ._, ._, .mov, .memd(.dst0q, 8), .tmp1q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bmi2, .adx, null },
+ .src_constraints = .{
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .{ .type = .isize, .kind = .{ .reg = .rcx } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+ .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+ .{ ._, ._, .@"or", .tmp2q, .memi(.src0q, .tmp0), ._, ._ },
+ .{ ._, ._z, .j, .@"2f", ._, ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .leaad(.tmp0, .sub_src0_size, 8), ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ .@"1:", ._x, .mul, .tmp6q, .tmp5q, .leai(.tmp1q, .tmp3), ._ },
+ .{ ._, ._x, .adc, .tmp5q, .tmp4q, ._, ._ },
+ .{ ._, ._, .mov, .memiad(.dst0q, .tmp3, .add_size, -8), .tmp5q, ._, ._ },
+ .{ ._, ._rcxz, .j, .@"1f", ._, ._, ._ },
+ .{ ._, ._x, .ado, .tmp6q, .memia(.dst0q, .tmp3, .add_size), ._, ._ },
+ .{ ._, ._, .mov, .tmp4q, .tmp6q, ._, ._ },
+ .{ ._, ._, .lea, .tmp3p, .lead(.tmp3, 8), ._, ._ },
+ .{ ._, ._mp, .j, .@"1b", ._, ._, ._ },
+ .{ .@"2:", ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
+ .{ .@"1:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bmi2, .slow_incdec, null },
+ .src_constraints = .{
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
+ .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
+ .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
+ .{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
+ .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
+ .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
+ .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
+ .{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ },
+ .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
+ .{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .bmi2, null, null },
+ .src_constraints = .{
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
+ .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
+ .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
+ .{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
+ .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
+ .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
+ .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
+ .{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ },
+ .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
+ .{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
+ .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+ .{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", .slow_incdec, null, null },
+ .src_constraints = .{
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
+ .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
+ .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
+ .{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
+ .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
+ .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
+ .{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ },
+ .{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
+ .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
+ .{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
+ .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+ .{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .@"64bit", null, null, null },
+ .src_constraints = .{
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+ .any,
+ },
+ .patterns = &.{
+ .{ .src = .{ .to_mem, .to_mem, .none } },
+ },
+ .extra_temps = .{
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+ .{ .type = .u64, .kind = .{ .reg = .rax } },
+ .{ .type = .u64, .kind = .{ .reg = .rdx } },
+ .unused,
+ },
+ .dst_temps = .{ .mem, .unused },
+ .clobbers = .{ .eflags = true },
+ .each = .{ .once = &.{
+ .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
+ .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
+ .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
+ .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+ .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+ .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
+ .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
+ .{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
+ .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
+ .{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
+ .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
+ .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
+ .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
+ .{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ },
+ .{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ },
+ .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
+ .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
+ .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
+ .{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
+ .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+ .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
+ .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+ .{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
+ } },
+ }, .{
.required_features = .{ .f16c, null, null, null },
.src_constraints = .{
.{ .scalar_float = .{ .of = .word, .is = .word } },
@@ -6890,7 +7257,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@tagName(air_tag),
- cg.typeOf(bin_op.lhs).fmt(pt),
+ ty.fmt(pt),
ops[0].tracking(cg),
ops[1].tracking(cg),
}),
@@ -92700,7 +93067,7 @@ const Select = struct {
const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] };
pseudo: {
switch (inst[0]) {
- .@"0:", .@"1:", .@"2:" => |label| s.emitLabel(label),
+ .@"0:", .@"1:", .@"2:", .@"3:" => |label| s.emitLabel(label),
._ => {},
.pseudo => break :pseudo,
}
@@ -93578,7 +93945,7 @@ const Select = struct {
Select.Operand,
Select.Operand,
};
- const Label = enum { @"0:", @"1:", @"2:", @"_", pseudo };
+ const Label = enum { @"0:", @"1:", @"2:", @"3:", @"_", pseudo };
const Operand = struct {
flags: packed struct(u16) {
tag: Tag,
@@ -93609,6 +93976,7 @@ const Select = struct {
ptr_size,
ptr_bit_size,
size,
+ src0_size,
delta_size,
delta_elem_size,
size_add_elem_size,
@@ -93641,6 +94009,8 @@ const Select = struct {
const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" };
const sub_size_div_4: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"4" };
const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" };
+ const sub_src0_size_div_8: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .div, .rhs = .@"8" };
+ const sub_src0_size: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .mul, .rhs = .@"1" };
const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" };
const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" };
const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" };
@@ -93882,6 +94252,8 @@ const Select = struct {
const @"1f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp1, .size = .none } };
const @"2b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp2, .size = .none } };
const @"2f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp2, .size = .none } };
+ const @"3b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp3, .size = .none } };
+ const @"3f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp3, .size = .none } };
const tmp0b: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0b };
const tmp0w: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0w };
@@ -94070,6 +94442,13 @@ const Select = struct {
.base = base,
};
}
+ fn leaad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand {
+ return .{
+ .flags = .{ .tag = .lea, .adjust = adjust },
+ .base = base,
+ .imm = disp,
+ };
+ }
fn lead(base: Ref.Sized, disp: i32) Select.Operand {
return .{
.flags = .{ .tag = .lea },
@@ -94226,6 +94605,7 @@ const Select = struct {
.ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8),
.ptr_bit_size => s.cg.target.ptrBitWidth(),
.size => @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu)),
+ .src0_size => @intCast(Select.Operand.Ref.src0.typeOf(s).abiSize(s.cg.pt.zcu)),
.delta_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu))) -
@as(SignedImm, @intCast(op.index.ref.typeOf(s).abiSize(s.cg.pt.zcu)))),
.delta_elem_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))) -
src/arch/x86_64/Emit.zig
@@ -88,13 +88,32 @@ pub fn emitMir(emit: *Emit) Error!void {
lowered_relocs[0].lowered_inst_index == lowered_index) : ({
lowered_relocs = lowered_relocs[1..];
}) switch (lowered_relocs[0].target) {
- .inst => |target| try relocs.append(emit.lower.allocator, .{
- .source = start_offset,
- .source_offset = end_offset - 4,
- .target = target,
- .target_offset = lowered_relocs[0].off,
- .length = @intCast(end_offset - start_offset),
- }),
+ .inst => |target| {
+ const inst_length: u4 = @intCast(end_offset - start_offset);
+ const reloc_offset, const reloc_length = reloc_offset_length: {
+ var reloc_offset = inst_length;
+ var op_index: usize = lowered_inst.ops.len;
+ while (true) {
+ op_index -= 1;
+ const op = lowered_inst.encoding.data.ops[op_index];
+ if (op == .none) continue;
+ const enc_length: u4 = @intCast(
+ std.math.divCeil(u7, @intCast(op.immBitSize()), 8) catch unreachable,
+ );
+ reloc_offset -= enc_length;
+ if (op_index == lowered_relocs[0].op_index)
+ break :reloc_offset_length .{ reloc_offset, enc_length };
+ }
+ };
+ try relocs.append(emit.lower.allocator, .{
+ .inst_offset = start_offset,
+ .inst_length = inst_length,
+ .source_offset = reloc_offset,
+ .source_length = reloc_length,
+ .target = target,
+ .target_offset = lowered_relocs[0].off,
+ });
+ },
.table => try table_relocs.append(emit.lower.allocator, .{
.source_offset = end_offset - 4,
.target_offset = lowered_relocs[0].off,
@@ -409,7 +428,7 @@ pub fn emitMir(emit: *Emit) Error!void {
} } };
},
.pseudo_dbg_local_am => loc: {
- const mem = emit.lower.mem(mir_inst.data.ax.payload);
+ const mem = emit.lower.mem(undefined, mir_inst.data.ax.payload);
break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
base: {
loc_buf[0] = switch (mem.base()) {
@@ -466,15 +485,18 @@ pub fn emitMir(emit: *Emit) Error!void {
}
}
}
- {
- // TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size.
- // This should be reversed like it is done in aarch64 MIR emit code: start with the smallest
- // possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution
- // until the entire decl is correctly emitted with all JMP/CALL instructions within range.
- for (relocs.items) |reloc| {
- const target = code_offset_mapping[reloc.target];
- const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.source + reloc.length)) + reloc.target_offset;
- std.mem.writeInt(i32, emit.code.items[reloc.source_offset..][0..4], @intCast(disp), .little);
+ for (relocs.items) |reloc| {
+ const target = code_offset_mapping[reloc.target];
+ const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.inst_offset + reloc.inst_length)) + reloc.target_offset;
+ const inst_bytes = emit.code.items[reloc.inst_offset..][0..reloc.inst_length];
+ switch (reloc.source_length) {
+ else => unreachable,
+ inline 1, 4 => |source_length| std.mem.writeInt(
+ @Type(.{ .int = .{ .signedness = .signed, .bits = @as(u16, 8) * source_length } }),
+ inst_bytes[reloc.source_offset..][0..source_length],
+ @intCast(disp),
+ .little,
+ ),
}
}
if (emit.lower.mir.table.len > 0) {
@@ -511,15 +533,17 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) Error {
const Reloc = struct {
/// Offset of the instruction.
- source: u32,
+ inst_offset: u32,
+ /// Length of the instruction.
+ inst_length: u4,
/// Offset of the relocation within the instruction.
- source_offset: u32,
+ source_offset: u4,
+ /// Length of the relocation.
+ source_length: u4,
/// Target of the relocation.
target: Mir.Inst.Index,
- /// Offset from the target instruction.
+ /// Offset from the target.
target_offset: i32,
- /// Length of the instruction.
- length: u5,
};
const TableReloc = struct {
src/arch/x86_64/Encoding.zig
@@ -304,20 +304,20 @@ pub const Mnemonic = enum {
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz,
lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne,
lods, lodsb, lodsd, lodsq, lodsw,
- lsl, ltr, lzcnt,
+ lsl, ltr,
mfence, mov, movbe,
movs, movsb, movsd, movsq, movsw,
movsx, movsxd, movzx, mul,
neg, nop, not,
@"or", out, outs, outsb, outsd, outsw,
- pause, pop, popcnt, popf, popfd, popfq, push, pushfq,
+ pause, pop, popf, popfd, popfq, push, pushfq,
rcl, rcr,
rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
- ret, rol, ror, rorx, rsm,
- sahf, sal, sar, sarx, sbb,
+ ret, rol, ror, rsm,
+ sahf, sal, sar, sbb,
scas, scasb, scasd, scasq, scasw,
senduipi, serialize,
- shl, shld, shlx, shr, shrd, shrx,
+ shl, shld, shr, shrd,
stac, stc, std, sti, str, stui,
sub, swapgs, syscall, sysenter, sysexit, sysret,
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
@@ -433,6 +433,8 @@ pub const Mnemonic = enum {
roundpd, roundps, roundsd, roundss,
// SSE4.2
crc32, pcmpgtq,
+ // ABM
+ lzcnt, popcnt,
// PCLMUL
pclmulqdq,
// AES
@@ -440,7 +442,6 @@ pub const Mnemonic = enum {
// SHA
sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2,
// AVX
- andn, bextr, blsi, blsmsk, blsr, bzhi, tzcnt,
vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps,
vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist,
vandnpd, vandnps, vandpd, vandps,
@@ -506,6 +507,10 @@ pub const Mnemonic = enum {
vtestpd, vtestps,
vucomisd, vucomiss, vunpckhpd, vunpckhps, vunpcklpd, vunpcklps,
vxorpd, vxorps,
+ // BMI
+ andn, bextr, blsi, blsmsk, blsr, tzcnt,
+ // BMI2
+ bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx,
// F16C
vcvtph2ps, vcvtps2ph,
// FMA
src/arch/x86_64/encodings.zig
@@ -405,9 +405,9 @@ pub const table = [_]Entry{
.{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
.{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none },
.{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
- .{ .jcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .short, .@"32bit" },
- .{ .jecxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"32bit" },
- .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"64bit" },
+ .{ .jcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .short, .@"32bit" },
+ .{ .jecxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"32bit" },
+ .{ .jrcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"64bit" },
.{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none },
.{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none },
.{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none },
@@ -477,10 +477,6 @@ pub const table = [_]Entry{
.{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none },
- .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
- .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt },
- .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt },
-
.{ .mfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none },
.{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none },
@@ -630,10 +626,6 @@ pub const table = [_]Entry{
.{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none },
.{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none },
- .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt },
- .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt },
- .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt },
-
.{ .popf, .z, &.{}, &.{ 0x9d }, 0, .short, .none },
.{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none, .@"32bit" },
.{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .@"64bit" },
@@ -1738,6 +1730,15 @@ pub const table = [_]Entry{
.{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 },
+ // ABM
+ .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
+ .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt },
+ .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt },
+
+ .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt },
+ .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt },
+ .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt },
+
// PCLMUL
.{ .pclmulqdq, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .none, .pclmul },
@@ -1771,38 +1772,6 @@ pub const table = [_]Entry{
.{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha },
// AVX
- .{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi },
- .{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi },
-
- .{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi },
- .{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi },
-
- .{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi },
- .{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi },
-
- .{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi },
- .{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi },
-
- .{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi },
- .{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi },
-
- .{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
- .{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
-
- .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
- .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
-
- .{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
- .{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
- .{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
- .{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
- .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
- .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
-
- .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
- .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi },
- .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi },
-
.{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
.{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
@@ -2307,6 +2276,49 @@ pub const table = [_]Entry{
.{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
.{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
+ // BMI
+ .{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi },
+ .{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi },
+
+ .{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi },
+ .{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi },
+
+ .{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi },
+ .{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi },
+
+ .{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi },
+ .{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi },
+
+ .{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi },
+ .{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi },
+
+ .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
+ .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi },
+ .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi },
+
+ // BMI2
+ .{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
+
+ .{ .mulx, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .mulx, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w1, .bmi2 },
+
+ .{ .pdep, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .pdep, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
+
+ .{ .pext, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .pext, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
+
+ .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
+ .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
+
+ .{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
+ .{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
+ .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
+ .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
+
// F16C
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
src/arch/x86_64/Lower.zig
@@ -10,32 +10,38 @@ mir: Mir,
cc: std.builtin.CallingConvention,
err_msg: ?*Zcu.ErrorMsg = null,
src_loc: Zcu.LazySrcLoc,
-result_insts_len: u8 = undefined,
-result_relocs_len: u8 = undefined,
-result_insts: [
- @max(
- 1, // non-pseudo instructions
- 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
- 2, // cmovcc: cmovcc \ cmovcc
- 3, // setcc: setcc \ setcc \ logicop
- 2, // jcc: jcc \ jcc
- pseudo_probe_align_insts,
- pseudo_probe_adjust_unrolled_max_insts,
- pseudo_probe_adjust_setup_insts,
- pseudo_probe_adjust_loop_insts,
- abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs
- abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs
- )
-]Instruction = undefined,
-result_relocs: [
- @max(
- 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
- 2, // jcc: jcc \ jcc
- 2, // test \ jcc \ probe \ sub \ jmp
- 1, // probe \ sub \ jcc
- 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
- )
-]Reloc = undefined,
+result_insts_len: ResultInstIndex = undefined,
+result_insts: [max_result_insts]Instruction = undefined,
+result_relocs_len: ResultRelocIndex = undefined,
+result_relocs: [max_result_relocs]Reloc = undefined,
+
+const max_result_insts = @max(
+ 1, // non-pseudo instructions
+ 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
+ 2, // cmovcc: cmovcc \ cmovcc
+ 3, // setcc: setcc \ setcc \ logicop
+ 2, // jcc: jcc \ jcc
+ pseudo_probe_align_insts,
+ pseudo_probe_adjust_unrolled_max_insts,
+ pseudo_probe_adjust_setup_insts,
+ pseudo_probe_adjust_loop_insts,
+ abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs
+ abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs
+);
+const max_result_relocs = @max(
+ 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
+ 2, // jcc: jcc \ jcc
+ 2, // test \ jcc \ probe \ sub \ jmp
+ 1, // probe \ sub \ jcc
+ 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
+);
+
+const ResultInstIndex = std.math.IntFittingRange(0, max_result_insts - 1);
+const ResultRelocIndex = std.math.IntFittingRange(0, max_result_relocs - 1);
+const InstOpIndex = std.math.IntFittingRange(
+ 0,
+ @typeInfo(@FieldType(Instruction, "ops")).array.len - 1,
+);
pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
pub const pseudo_probe_adjust_unrolled_max_insts =
@@ -51,7 +57,8 @@ pub const Error = error{
};
pub const Reloc = struct {
- lowered_inst_index: u8,
+ lowered_inst_index: ResultInstIndex,
+ op_index: InstOpIndex,
target: Target,
off: i32,
@@ -114,11 +121,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rx.r1 },
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(1, inst.data.rx.payload) },
});
try lower.emit(.none, .cmovp, &.{
.{ .reg = inst.data.rx.r1 },
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(1, inst.data.rx.payload) },
});
},
.pseudo_set_z_and_np_r => {
@@ -137,13 +144,13 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_z_and_np_m => {
assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setz, &.{
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(0, inst.data.rx.payload) },
});
try lower.emit(.none, .setnp, &.{
.{ .reg = inst.data.rx.r1 },
});
try lower.emit(.none, .@"and", &.{
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(0, inst.data.rx.payload) },
.{ .reg = inst.data.rx.r1 },
});
},
@@ -163,32 +170,32 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_nz_or_p_m => {
assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setnz, &.{
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(0, inst.data.rx.payload) },
});
try lower.emit(.none, .setp, &.{
.{ .reg = inst.data.rx.r1 },
});
try lower.emit(.none, .@"or", &.{
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(0, inst.data.rx.payload) },
.{ .reg = inst.data.rx.r1 },
});
},
.pseudo_j_z_and_np_inst => {
assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
- .{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) },
});
try lower.emit(.none, .jnp, &.{
- .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
});
},
.pseudo_j_nz_or_p_inst => {
assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
- .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
});
try lower.emit(.none, .jp, &.{
- .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
});
},
@@ -198,7 +205,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.{ .imm = .s(@bitCast(inst.data.ri.i)) },
});
try lower.emit(.none, .jz, &.{
- .{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) },
});
try lower.emit(.none, .lea, &.{
.{ .reg = inst.data.ri.r1 },
@@ -214,7 +221,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.{ .reg = inst.data.ri.r1.to32() },
});
try lower.emit(.none, .jmp, &.{
- .{ .imm = lower.reloc(.{ .inst = index }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = index }, 0) },
});
assert(lower.result_insts_len == pseudo_probe_align_insts);
},
@@ -260,7 +267,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.{ .imm = .s(page_size) },
});
try lower.emit(.none, .jae, &.{
- .{ .imm = lower.reloc(.{ .inst = index }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = index }, 0) },
});
assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
},
@@ -382,21 +389,22 @@ pub fn imm(lower: *const Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
};
}
-pub fn mem(lower: *Lower, payload: u32) Memory {
+pub fn mem(lower: *Lower, op_index: InstOpIndex, payload: u32) Memory {
var m = lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode();
switch (m) {
.sib => |*sib| switch (sib.base) {
else => {},
- .table => sib.disp = lower.reloc(.table, sib.disp).signed,
+ .table => sib.disp = lower.reloc(op_index, .table, sib.disp).signed,
},
else => {},
}
return m;
}
-fn reloc(lower: *Lower, target: Reloc.Target, off: i32) Immediate {
+fn reloc(lower: *Lower, op_index: InstOpIndex, target: Reloc.Target, off: i32) Immediate {
lower.result_relocs[lower.result_relocs_len] = .{
.lowered_inst_index = lower.result_insts_len,
+ .op_index = op_index,
.target = target,
.off = off,
};
@@ -409,7 +417,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
var emit_mnemonic = mnemonic;
var emit_ops_storage: [4]Operand = undefined;
const emit_ops = emit_ops_storage[0..ops.len];
- for (emit_ops, ops) |*emit_op, op| {
+ for (emit_ops, ops, 0..) |*emit_op, op, op_index| {
emit_op.* = switch (op) {
else => op,
.mem => |mem_op| switch (mem_op.base()) {
@@ -428,20 +436,20 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
if (lower.pic) {
// Here, we currently assume local dynamic TLS vars, and so
// we emit LD model.
- _ = lower.reloc(.{ .linker_tlsld = sym_index }, 0);
+ _ = lower.reloc(1, .{ .linker_tlsld = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{
.{ .reg = .rdi },
.{ .mem = Memory.initRip(.none, 0) },
}, lower.target);
lower.result_insts_len += 1;
- _ = lower.reloc(.{
+ _ = lower.reloc(0, .{
.linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null),
}, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
.{ .imm = .s(0) },
}, lower.target);
lower.result_insts_len += 1;
- _ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0);
+ _ = lower.reloc(@intCast(op_index), .{ .linker_dtpoff = sym_index }, 0);
emit_mnemonic = .lea;
break :op .{ .mem = Memory.initSib(.none, .{
.base = .{ .reg = .rax },
@@ -454,7 +462,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) },
}, lower.target);
lower.result_insts_len += 1;
- _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
+ _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
emit_mnemonic = .lea;
break :op .{ .mem = Memory.initSib(.none, .{
.base = .{ .reg = .rax },
@@ -463,15 +471,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
}
}
- _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
if (lower.pic) switch (mnemonic) {
- .lea => if (elf_sym.flags.is_extern_ptr) {
+ .lea => {
+ _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
+ if (!elf_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) };
emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(.ptr, 0) };
- } else break :op .{ .mem = Memory.initRip(.none, 0) },
+ },
.mov => {
if (elf_sym.flags.is_extern_ptr) {
const reg = ops[0].reg;
+ _ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = reg.to64() },
.{ .mem = Memory.initRip(.qword, 0) },
@@ -481,10 +491,13 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.reg = reg.to64(),
} }) };
}
+ _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
},
else => unreachable,
- } else switch (mnemonic) {
+ };
+ _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
+ switch (mnemonic) {
.call => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
.base = .{ .reg = .ds },
}) },
@@ -502,7 +515,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
const macho_sym = zo.symbols.items[sym_index];
if (macho_sym.flags.tlv) {
- _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
+ _ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = .rdi },
.{ .mem = Memory.initRip(.ptr, 0) },
@@ -516,15 +529,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
break :op .{ .reg = .rax };
}
- _ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
break :op switch (mnemonic) {
- .lea => if (macho_sym.flags.is_extern_ptr) {
+ .lea => {
+ _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
+ if (!macho_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) };
emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(.ptr, 0) };
- } else break :op .{ .mem = Memory.initRip(.none, 0) },
+ },
.mov => {
if (macho_sym.flags.is_extern_ptr) {
const reg = ops[0].reg;
+ _ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = reg.to64() },
.{ .mem = Memory.initRip(.qword, 0) },
@@ -534,6 +549,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.reg = reg.to64(),
} }) };
}
+ _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
},
else => unreachable,
@@ -550,7 +566,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
}
fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
- @setEvalBranchQuota(2_400);
+ @setEvalBranchQuota(2_500);
const fixes = switch (inst.ops) {
.none => inst.data.none.fixes,
.inst => inst.data.inst.fixes,
@@ -595,7 +611,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
}, switch (inst.ops) {
.none => &.{},
.inst => &.{
- .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
+ .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
},
.i_s, .i_u => &.{
.{ .imm = lower.imm(inst.ops, inst.data.i.i) },
@@ -642,10 +658,10 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .imm = lower.imm(inst.ops, inst.data.rri.i) },
},
.m => &.{
- .{ .mem = lower.mem(inst.data.x.payload) },
+ .{ .mem = lower.mem(0, inst.data.x.payload) },
},
.mi_s, .mi_u => &.{
- .{ .mem = lower.mem(inst.data.x.payload + 1) },
+ .{ .mem = lower.mem(0, inst.data.x.payload + 1) },
.{ .imm = lower.imm(
inst.ops,
lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm,
@@ -653,64 +669,64 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
},
.rm => &.{
.{ .reg = inst.data.rx.r1 },
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(1, inst.data.rx.payload) },
},
.rmr => &.{
.{ .reg = inst.data.rrx.r1 },
- .{ .mem = lower.mem(inst.data.rrx.payload) },
+ .{ .mem = lower.mem(1, inst.data.rrx.payload) },
.{ .reg = inst.data.rrx.r2 },
},
.rmi => &.{
.{ .reg = inst.data.rix.r1 },
- .{ .mem = lower.mem(inst.data.rix.payload) },
+ .{ .mem = lower.mem(1, inst.data.rix.payload) },
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
},
.rmi_s, .rmi_u => &.{
.{ .reg = inst.data.rx.r1 },
- .{ .mem = lower.mem(inst.data.rx.payload + 1) },
+ .{ .mem = lower.mem(1, inst.data.rx.payload + 1) },
.{ .imm = lower.imm(
inst.ops,
lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm,
) },
},
.mr => &.{
- .{ .mem = lower.mem(inst.data.rx.payload) },
+ .{ .mem = lower.mem(0, inst.data.rx.payload) },
.{ .reg = inst.data.rx.r1 },
},
.mrr => &.{
- .{ .mem = lower.mem(inst.data.rrx.payload) },
+ .{ .mem = lower.mem(0, inst.data.rrx.payload) },
.{ .reg = inst.data.rrx.r1 },
.{ .reg = inst.data.rrx.r2 },
},
.mri => &.{
- .{ .mem = lower.mem(inst.data.rix.payload) },
+ .{ .mem = lower.mem(0, inst.data.rix.payload) },
.{ .reg = inst.data.rix.r1 },
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
},
.rrm => &.{
.{ .reg = inst.data.rrx.r1 },
.{ .reg = inst.data.rrx.r2 },
- .{ .mem = lower.mem(inst.data.rrx.payload) },
+ .{ .mem = lower.mem(2, inst.data.rrx.payload) },
},
.rrmr => &.{
.{ .reg = inst.data.rrrx.r1 },
.{ .reg = inst.data.rrrx.r2 },
- .{ .mem = lower.mem(inst.data.rrrx.payload) },
+ .{ .mem = lower.mem(2, inst.data.rrrx.payload) },
.{ .reg = inst.data.rrrx.r3 },
},
.rrmi => &.{
.{ .reg = inst.data.rrix.r1 },
.{ .reg = inst.data.rrix.r2 },
- .{ .mem = lower.mem(inst.data.rrix.payload) },
+ .{ .mem = lower.mem(2, inst.data.rrix.payload) },
.{ .imm = lower.imm(inst.ops, inst.data.rrix.i) },
},
.extern_fn_reloc, .rel => &.{
- .{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) },
+ .{ .imm = lower.reloc(0, .{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) },
},
.got_reloc, .direct_reloc, .import_reloc => ops: {
const reg = inst.data.rx.r1;
const extra = lower.mir.extraData(bits.SymbolOffset, inst.data.rx.payload).data;
- _ = lower.reloc(switch (inst.ops) {
+ _ = lower.reloc(1, switch (inst.ops) {
.got_reloc => .{ .linker_got = extra.sym_index },
.direct_reloc => .{ .linker_direct = extra.sym_index },
.import_reloc => .{ .linker_import = extra.sym_index },
src/arch/x86_64/Mir.zig
@@ -100,6 +100,8 @@ pub const Inst = struct {
/// ___ Division
_d,
+ /// ___ Without Affecting Flags
+ _x,
/// ___ Left
_l,
/// ___ Left Double
@@ -483,6 +485,7 @@ pub const Inst = struct {
/// ASCII adjust al after subtraction
aa,
/// Add with carry
+ /// Unsigned integer addition of two operands with carry flag
adc,
/// Add
/// Add packed integers
@@ -1162,10 +1165,8 @@ pub const Inst = struct {
fmadd231,
// ADX
- /// Unsigned integer addition of two operands with carry flag
- adcx,
/// Unsigned integer addition of two operands with overflow flag
- adox,
+ ado,
// AESKLE
/// Encode 128-bit key with key locker
test/behavior/x86_64/build.zig
@@ -93,6 +93,11 @@ pub fn build(b: *std.Build) void {
.cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
},
+ .{
+ .cpu_arch = .x86_64,
+ .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
+ .cpu_features_add = std.Target.x86.featureSet(&.{.adx}),
+ },
.{
.cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 },
test/behavior/x86_64/math.zig
@@ -44,6 +44,17 @@ fn AddOneBit(comptime Type: type) type {
.vector => |vector| @Vector(vector.len, ResultScalar),
};
}
+fn DoubleBits(comptime Type: type) type {
+ const ResultScalar = switch (@typeInfo(Scalar(Type))) {
+ .int => |int| @Type(.{ .int = .{ .signedness = int.signedness, .bits = int.bits * 2 } }),
+ .float => Scalar(Type),
+ else => @compileError(@typeName(Type)),
+ };
+ return switch (@typeInfo(Type)) {
+ else => ResultScalar,
+ .vector => |vector| @Vector(vector.len, ResultScalar),
+ };
+}
// inline to avoid a runtime `@splat`
inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type {
return switch (@typeInfo(Type)) {
@@ -16216,6 +16227,8 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
);
}
fn testInts() !void {
+ try testArgs(i4, 0x3, 0x2);
+ try testArgs(u4, 0xe, 0x6);
try testArgs(i8, 0x48, 0x6c);
try testArgs(u8, 0xbb, 0x43);
try testArgs(i16, -0x0fdf, 0x302e);
@@ -18993,6 +19006,15 @@ test subUnsafe {
try test_sub_unsafe.testFloatVectors();
}
+inline fn mulUnsafe(comptime Type: type, lhs: Type, rhs: Type) DoubleBits(Type) {
+ @setRuntimeSafety(false);
+ return @as(DoubleBits(Type), lhs) * rhs;
+}
+test mulUnsafe {
+ const test_mul_unsafe = binary(mulUnsafe, .{});
+ try test_mul_unsafe.testInts();
+}
+
inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) {
if (@inComptime() and @typeInfo(Type) == .vector) {
// workaround https://github.com/ziglang/zig/issues/22743