Commit b6d6102850
Changed files (7)
src
link
test
behavior
src/arch/x86_64/bits.zig
@@ -175,15 +175,21 @@ pub const Register = enum(u7) {
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+ mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7,
+
+ st0, st1, st2, st3, st4, st5, st6, st7,
+
es, cs, ss, ds, fs, gs,
none,
// zig fmt: on
- pub const Class = enum(u2) {
+ pub const Class = enum {
general_purpose,
- floating_point,
segment,
+ x87,
+ mmx,
+ sse,
};
pub fn class(reg: Register) Class {
@@ -195,8 +201,10 @@ pub const Register = enum(u7) {
@enumToInt(Register.al) ... @enumToInt(Register.r15b) => .general_purpose,
@enumToInt(Register.ah) ... @enumToInt(Register.bh) => .general_purpose,
- @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .floating_point,
- @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .floating_point,
+ @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .sse,
+ @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .sse,
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => .mmx,
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => .x87,
@enumToInt(Register.es) ... @enumToInt(Register.gs) => .segment,
@@ -216,8 +224,10 @@ pub const Register = enum(u7) {
@enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0) - 16,
@enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0) - 16,
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0) - 32,
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0) - 40,
- @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 32,
+ @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 48,
else => unreachable,
// zig fmt: on
@@ -236,6 +246,8 @@ pub const Register = enum(u7) {
@enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => 256,
@enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => 128,
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => 64,
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => 80,
@enumToInt(Register.es) ... @enumToInt(Register.gs) => 16,
@@ -271,6 +283,8 @@ pub const Register = enum(u7) {
@enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0),
@enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0),
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0),
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0),
@enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es),
@@ -326,8 +340,8 @@ pub const Register = enum(u7) {
return @intToEnum(Register, @enumToInt(reg) - reg.gpBase() + @enumToInt(Register.al));
}
- fn fpBase(reg: Register) u7 {
- assert(reg.class() == .floating_point);
+ fn sseBase(reg: Register) u7 {
+ assert(reg.class() == .sse);
return switch (@enumToInt(reg)) {
@enumToInt(Register.ymm0)...@enumToInt(Register.ymm15) => @enumToInt(Register.ymm0),
@enumToInt(Register.xmm0)...@enumToInt(Register.xmm15) => @enumToInt(Register.xmm0),
@@ -336,49 +350,24 @@ pub const Register = enum(u7) {
}
pub fn to256(reg: Register) Register {
- return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.ymm0));
+ return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.ymm0));
}
pub fn to128(reg: Register) Register {
- return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.xmm0));
- }
-
- pub fn dwarfLocOp(reg: Register) u8 {
- return switch (reg.class()) {
- .general_purpose => switch (reg.to64()) {
- .rax => DW.OP.reg0,
- .rdx => DW.OP.reg1,
- .rcx => DW.OP.reg2,
- .rbx => DW.OP.reg3,
- .rsi => DW.OP.reg4,
- .rdi => DW.OP.reg5,
- .rbp => DW.OP.reg6,
- .rsp => DW.OP.reg7,
- else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.reg0,
- },
- .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.reg17,
- else => unreachable,
- };
+ return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.xmm0));
}
- /// DWARF encodings that push a value onto the DWARF stack that is either
- /// the contents of a register or the result of adding the contents a given
- /// register to a given signed offset.
- pub fn dwarfLocOpDeref(reg: Register) u8 {
+ /// DWARF register encoding
+ pub fn dwarfNum(reg: Register) u6 {
return switch (reg.class()) {
- .general_purpose => switch (reg.to64()) {
- .rax => DW.OP.breg0,
- .rdx => DW.OP.breg1,
- .rcx => DW.OP.breg2,
- .rbx => DW.OP.breg3,
- .rsi => DW.OP.breg4,
- .rdi => DW.OP.breg5,
- .rbp => DW.OP.breg6,
- .rsp => DW.OP.breg7,
- else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.breg0,
- },
- .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.breg17,
- else => unreachable,
+ .general_purpose => if (reg.isExtended())
+ reg.enc()
+ else
+ @truncate(u3, @as(u24, 0o54673120) >> @as(u5, reg.enc()) * 3),
+ .sse => 17 + @as(u6, reg.enc()),
+ .x87 => 33 + @as(u6, reg.enc()),
+ .mmx => 41 + @as(u6, reg.enc()),
+ .segment => 50 + @as(u6, reg.enc()),
};
}
};
@@ -392,6 +381,8 @@ test "Register id - different classes" {
try expect(Register.ymm0.id() == 0b10000);
try expect(Register.ymm0.id() != Register.rax.id());
try expect(Register.xmm0.id() == Register.ymm0.id());
+ try expect(Register.xmm0.id() != Register.mm0.id());
+ try expect(Register.mm0.id() != Register.st0.id());
try expect(Register.es.id() == 0b100000);
}
@@ -407,7 +398,9 @@ test "Register enc - different classes" {
test "Register classes" {
try expect(Register.r11.class() == .general_purpose);
- try expect(Register.ymm11.class() == .floating_point);
+ try expect(Register.ymm11.class() == .sse);
+ try expect(Register.mm3.class() == .mmx);
+ try expect(Register.st3.class() == .x87);
try expect(Register.fs.class() == .segment);
}
src/arch/x86_64/CodeGen.zig
@@ -2501,12 +2501,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
}
} else if (src_bits == 64 and dst_bits == 32) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- .{ .v_, .cvtsd2ss },
+ .{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegisterRegister(
- .{ .v_, .cvtsd2ss },
+ .{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -2514,11 +2514,11 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._, .cvtsd2ss },
+ .{ ._ss, .cvtsd2 },
dst_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
- .{ ._, .cvtsd2ss },
+ .{ ._ss, .cvtsd2 },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -2552,22 +2552,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(src_ty, src_mcv);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
switch (dst_bits) {
32 => {},
- 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg),
+ 64 => try self.asmRegisterRegisterRegister(.{ .v_sd, .cvtss2 }, dst_reg, dst_reg, dst_reg),
else => return self.fail("TODO implement airFpext from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
}),
}
} else if (src_bits == 32 and dst_bits == 64) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- .{ .v_, .cvtss2sd },
+ .{ .v_sd, .cvtss2 },
dst_reg,
dst_reg,
src_mcv.mem(.dword),
) else try self.asmRegisterRegisterRegister(
- .{ .v_, .cvtss2sd },
+ .{ .v_sd, .cvtss2 },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -2575,11 +2575,11 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._, .cvtss2sd },
+ .{ ._sd, .cvtss2 },
dst_reg,
src_mcv.mem(.dword),
) else try self.asmRegisterRegister(
- .{ ._, .cvtss2sd },
+ .{ ._sd, .cvtss2 },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -4789,7 +4789,6 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
})) |tag| tag else return self.fail("TODO implement genRound for {}", .{
ty.fmt(self.bin_file.options.module.?),
});
-
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
const dst_alias = registerAlias(dst_reg, abi_size);
switch (mir_tag[0]) {
@@ -4848,7 +4847,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
@@ -4868,7 +4867,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
1 => {
try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -4892,13 +4891,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
2...8 => {
const wide_reg = registerAlias(dst_reg, abi_size * 2);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
wide_reg,
src_mcv.mem(Memory.PtrSize.fromSize(
@intCast(u32, @divExact(wide_reg.bitSize(), 16)),
)),
) else try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
wide_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6347,7 +6346,7 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
@@ -6424,7 +6423,7 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
@@ -6467,7 +6466,7 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(
.{ .v_ps, .movhl },
tmp_reg,
@@ -6501,13 +6500,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6541,13 +6540,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.xword),
) else try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -7199,13 +7198,13 @@ fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void {
switch (self.debug_output) {
.dwarf => |dw| {
const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
- .register => |reg| .{ .register = reg.dwarfLocOp() },
+ .register => |reg| .{ .register = reg.dwarfNum() },
// TODO use a frame index
.load_frame => return,
//.stack_offset => |off| .{
// .stack = .{
// // TODO handle -fomit-frame-pointer
- // .fp_register = Register.rbp.dwarfLocOpDeref(),
+ // .fp_register = Register.rbp.dwarfNum(),
// .offset = -off,
// },
//},
@@ -7237,11 +7236,11 @@ fn genVarDbgInfo(
switch (self.debug_output) {
.dwarf => |dw| {
const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
- .register => |reg| .{ .register = reg.dwarfLocOp() },
+ .register => |reg| .{ .register = reg.dwarfNum() },
// TODO use a frame index
.load_frame, .lea_frame => return,
//=> |off| .{ .stack = .{
- // .fp_register = Register.rbp.dwarfLocOpDeref(),
+ // .fp_register = Register.rbp.dwarfNum(),
// .offset = -off,
//} },
.memory => |address| .{ .memory = address },
@@ -7595,7 +7594,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
else
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
} else return self.fail("TODO implement airCmp for {}", .{
@@ -8862,14 +8861,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
}
},
.register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister(
- if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point))
+ if ((dst_reg.class() == .sse) == (src_reg.class() == .sse))
switch (ty.zigTypeTag()) {
else => .{ ._, .mov },
.Float, .Vector => .{ ._ps, .mova },
}
else switch (abi_size) {
2 => return try self.asmRegisterRegisterImmediate(
- if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr },
+ if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr },
registerAlias(dst_reg, 4),
registerAlias(src_reg, 4),
Immediate.u(0),
@@ -9222,7 +9221,7 @@ fn genInlineMemcpyRegisterRegister(
try self.asmMemoryRegister(
switch (src_reg.class()) {
.general_purpose, .segment => .{ ._, .mov },
- .floating_point => .{ ._ss, .mov },
+ .sse => .{ ._ss, .mov },
},
Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }),
registerAlias(src_reg, abi_size),
@@ -9388,10 +9387,10 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void {
});
const src_mcv = try self.resolveInst(ty_op.operand);
- const src_reg = switch (src_mcv) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(src_ty, src_mcv),
- };
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv);
const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
defer self.register_manager.unlockReg(src_lock);
@@ -9402,23 +9401,23 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void {
const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
defer self.register_manager.unlockReg(dst_lock);
- try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) {
- 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
- .{ ._, .cvtsi2ss }
- else
- return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{
- src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
- }),
- 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
- .{ ._, .cvtsi2sd }
- else
- return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{
- src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
- }),
- else => return self.fail("TODO implement airIntToFloat from {} to {}", .{
- src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
- }),
- }, dst_reg.to128(), registerAlias(src_reg, src_size));
+ const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag()) {
+ .Float => switch (dst_ty.floatBits(self.target.*)) {
+ 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 },
+ 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => null,
+ })) |tag| tag else return self.fail("TODO implement airIntToFloat from {} to {}", .{
+ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
+ });
+ const dst_alias = dst_reg.to128();
+ const src_alias = registerAlias(src_reg, src_size);
+ switch (mir_tag[0]) {
+ .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias),
+ else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias),
+ }
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
@@ -9428,46 +9427,50 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void {
const src_ty = self.air.typeOf(ty_op.operand);
const dst_ty = self.air.typeOfIndex(inst);
- const operand = try self.resolveInst(ty_op.operand);
- const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
- const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
+ const dst_bits = @intCast(u32, dst_ty.bitSize(self.target.*));
+ const dst_signedness =
+ if (dst_ty.isAbiInt()) dst_ty.intInfo(self.target.*).signedness else .unsigned;
- switch (src_abi_size) {
- 4, 8 => {},
- else => |size| return self.fail("TODO load ST(0) with abiSize={}", .{size}),
- }
- if (dst_abi_size > 8) {
- return self.fail("TODO convert float with abiSize={}", .{dst_abi_size});
- }
+ const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) {
+ .signed => dst_bits,
+ .unsigned => dst_bits + 1,
+ }, 32), 8) catch unreachable;
+ if (dst_size > 8) return self.fail("TODO implement airFloatToInt from {} to {}", .{
+ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
+ });
- // move float src to ST(0)
- const frame_addr: FrameAddr = switch (operand) {
- .load_frame => |frame_addr| frame_addr,
- else => frame_addr: {
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*));
- try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand);
- break :frame_addr .{ .index = frame_index };
- },
- };
- try self.asmMemory(
- .{ .f_, .ld },
- Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{
- .base = .{ .frame = frame_addr.index },
- .disp = frame_addr.off,
- }),
- );
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv);
+ const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
+ defer self.register_manager.unlockReg(src_lock);
- // convert
- const stack_dst = try self.allocRegOrMem(inst, false);
- try self.asmMemory(
- .{ .f_p, .istt },
- Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{
- .base = .{ .frame = stack_dst.load_frame.index },
- .disp = stack_dst.load_frame.off,
+ const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty));
+ const dst_mcv = MCValue{ .register = dst_reg };
+ const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
+ defer self.register_manager.unlockReg(dst_lock);
+
+ try self.asmRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (src_ty.zigTypeTag()) {
+ .Float => switch (src_ty.floatBits(self.target.*)) {
+ 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si },
+ 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => null,
+ })) |tag| tag else return self.fail("TODO implement airFloatToInt from {} to {}", .{
+ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
}),
+ registerAlias(dst_reg, dst_size),
+ src_reg.to128(),
);
- return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none });
+ if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg);
+
+ return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
@@ -10997,13 +11000,13 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
reg.to64()
else
unreachable,
- .floating_point => if (size_bytes <= 16)
+ .segment, .x87, .mmx => unreachable,
+ .sse => if (size_bytes <= 16)
reg.to128()
else if (size_bytes <= 32)
reg.to256()
else
unreachable,
- .segment => unreachable,
};
}
src/arch/x86_64/Encoding.zig
@@ -233,7 +233,6 @@ pub const Mnemonic = enum {
cmpxchg, cmpxchg8b, cmpxchg16b,
cqo, cwd, cwde,
div,
- fisttp, fld,
idiv, imul, int3,
ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe,
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz,
@@ -259,6 +258,8 @@ pub const Mnemonic = enum {
@"test", tzcnt,
ud2,
xadd, xchg, xor,
+ // X87
+ fisttp, fld,
// MMX
movd,
// SSE
@@ -266,7 +267,7 @@ pub const Mnemonic = enum {
andps,
andnps,
cmpss,
- cvtsi2ss,
+ cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
divps, divss,
maxps, maxss,
minps, minss,
@@ -285,7 +286,9 @@ pub const Mnemonic = enum {
andpd,
andnpd,
//cmpsd,
- cvtsd2ss, cvtsi2sd, cvtss2sd,
+ cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd,
+ cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd,
+ cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si,
divpd, divsd,
maxpd, maxsd,
minpd, minsd,
@@ -314,7 +317,10 @@ pub const Mnemonic = enum {
// AVX
vaddpd, vaddps, vaddsd, vaddss,
vbroadcastf128, vbroadcastsd, vbroadcastss,
- vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd,
+ vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
+ vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
+ vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si,
+ vcvttpd2dq, vcvttps2dq, vcvttsd2si, vcvttss2si,
vdivpd, vdivps, vdivsd, vdivss,
vextractf128, vextractps,
vinsertf128, vinsertps,
@@ -377,80 +383,84 @@ pub const Op = enum {
m,
moffs,
sreg,
+ st, mm, mm_m64,
xmm, xmm_m32, xmm_m64, xmm_m128,
ymm, ymm_m256,
// zig fmt: on
pub fn fromOperand(operand: Instruction.Operand) Op {
- switch (operand) {
- .none => return .none,
-
- .reg => |reg| {
- switch (reg.class()) {
- .segment => return .sreg,
- .floating_point => return switch (reg.bitSize()) {
- 128 => .xmm,
- 256 => .ymm,
+ return switch (operand) {
+ .none => .none,
+
+ .reg => |reg| switch (reg.class()) {
+ .general_purpose => if (reg.to64() == .rax)
+ switch (reg) {
+ .al => .al,
+ .ax => .ax,
+ .eax => .eax,
+ .rax => .rax,
else => unreachable,
- },
- .general_purpose => {
- if (reg.to64() == .rax) return switch (reg) {
- .al => .al,
- .ax => .ax,
- .eax => .eax,
- .rax => .rax,
- else => unreachable,
- };
- if (reg == .cl) return .cl;
- return switch (reg.bitSize()) {
- 8 => .r8,
- 16 => .r16,
- 32 => .r32,
- 64 => .r64,
- else => unreachable,
- };
- },
- }
+ }
+ else if (reg == .cl)
+ .cl
+ else switch (reg.bitSize()) {
+ 8 => .r8,
+ 16 => .r16,
+ 32 => .r32,
+ 64 => .r64,
+ else => unreachable,
+ },
+ .segment => .sreg,
+ .x87 => .st,
+ .mmx => .mm,
+ .sse => switch (reg.bitSize()) {
+ 128 => .xmm,
+ 256 => .ymm,
+ else => unreachable,
+ },
},
.mem => |mem| switch (mem) {
- .moffs => return .moffs,
- .sib, .rip => {
- const bit_size = mem.bitSize();
- return switch (bit_size) {
- 8 => .m8,
- 16 => .m16,
- 32 => .m32,
- 64 => .m64,
- 80 => .m80,
- 128 => .m128,
- 256 => .m256,
- else => unreachable,
- };
+ .moffs => .moffs,
+ .sib, .rip => switch (mem.bitSize()) {
+ 8 => .m8,
+ 16 => .m16,
+ 32 => .m32,
+ 64 => .m64,
+ 80 => .m80,
+ 128 => .m128,
+ 256 => .m256,
+ else => unreachable,
},
},
- .imm => |imm| {
- switch (imm) {
- .signed => |x| {
- if (x == 1) return .unity;
- if (math.cast(i8, x)) |_| return .imm8s;
- if (math.cast(i16, x)) |_| return .imm16s;
- return .imm32s;
- },
- .unsigned => |x| {
- if (x == 1) return .unity;
- if (math.cast(i8, x)) |_| return .imm8s;
- if (math.cast(u8, x)) |_| return .imm8;
- if (math.cast(i16, x)) |_| return .imm16s;
- if (math.cast(u16, x)) |_| return .imm16;
- if (math.cast(i32, x)) |_| return .imm32s;
- if (math.cast(u32, x)) |_| return .imm32;
- return .imm64;
- },
- }
+ .imm => |imm| switch (imm) {
+ .signed => |x| if (x == 1)
+ .unity
+ else if (math.cast(i8, x)) |_|
+ .imm8s
+ else if (math.cast(i16, x)) |_|
+ .imm16s
+ else
+ .imm32s,
+ .unsigned => |x| if (x == 1)
+ .unity
+ else if (math.cast(i8, x)) |_|
+ .imm8s
+ else if (math.cast(u8, x)) |_|
+ .imm8
+ else if (math.cast(i16, x)) |_|
+ .imm16s
+ else if (math.cast(u16, x)) |_|
+ .imm16
+ else if (math.cast(i32, x)) |_|
+ .imm32s
+ else if (math.cast(u32, x)) |_|
+ .imm32
+ else
+ .imm64,
},
- }
+ };
}
pub fn immBitSize(op: Op) u64 {
@@ -460,6 +470,7 @@ pub const Op = enum {
.ax, .r16, .rm16 => unreachable,
.eax, .r32, .rm32, .r32_m16 => unreachable,
.rax, .r64, .rm64, .r64_m16 => unreachable,
+ .st, .mm, .mm_m64 => unreachable,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
.ymm, .ymm_m256 => unreachable,
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
@@ -480,7 +491,8 @@ pub const Op = enum {
.al, .cl, .r8, .rm8 => 8,
.ax, .r16, .rm16 => 16,
.eax, .r32, .rm32, .r32_m8, .r32_m16 => 32,
- .rax, .r64, .rm64, .r64_m16 => 64,
+ .rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64,
+ .st => 80,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
.ymm, .ymm_m256 => 256,
};
@@ -491,11 +503,11 @@ pub const Op = enum {
.none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
.rel8, .rel16, .rel32 => unreachable,
- .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable,
+ .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable,
.m8, .rm8, .r32_m8 => 8,
.m16, .rm16, .r32_m16, .r64_m16 => 16,
.m32, .rm32, .xmm_m32 => 32,
- .m64, .rm64, .xmm_m64 => 64,
+ .m64, .rm64, .mm_m64, .xmm_m64 => 64,
.m80 => 80,
.m128, .xmm_m128 => 128,
.m256, .ymm_m256 => 256,
@@ -522,6 +534,7 @@ pub const Op = enum {
.r8, .r16, .r32, .r64,
.rm8, .rm16, .rm32, .rm64,
.r32_m8, .r32_m16, .r64_m16,
+ .st, .mm, .mm_m64,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128,
.ymm, .ymm_m256,
=> true,
@@ -550,6 +563,7 @@ pub const Op = enum {
.r32_m8, .r32_m16, .r64_m16,
.m8, .m16, .m32, .m64, .m80, .m128, .m256,
.m,
+ .mm_m64,
.xmm_m32, .xmm_m64, .xmm_m128,
.ymm_m256,
=> true,
@@ -573,8 +587,10 @@ pub const Op = enum {
.rm8, .rm16, .rm32, .rm64 => .general_purpose,
.r32_m8, .r32_m16, .r64_m16 => .general_purpose,
.sreg => .segment,
- .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point,
- .ymm, .ymm_m256 => .floating_point,
+ .st => .x87,
+ .mm, .mm_m64 => .mmx,
+ .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
+ .ymm, .ymm_m256 => .sse,
};
}
@@ -695,6 +711,7 @@ pub const Feature = enum {
f16c,
fma,
lzcnt,
+ movbe,
popcnt,
sse,
sse2,
@@ -717,7 +734,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
}
const mnemonic_to_encodings_map = init: {
- @setEvalBranchQuota(20_000);
+ @setEvalBranchQuota(25_000);
const encodings = @import("encodings.zig");
var entries = encodings.table;
std.sort.sort(encodings.Entry, &entries, {}, struct {
src/arch/x86_64/encodings.zig
@@ -272,14 +272,6 @@ pub const table = [_]Entry{
.{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none },
.{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none },
- .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 },
- .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 },
- .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 },
-
- .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 },
- .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 },
- .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 },
-
.{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none },
.{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none },
.{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none },
@@ -395,12 +387,12 @@ pub const table = [_]Entry{
.{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none },
.{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none },
- .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .none },
- .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none },
- .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none },
- .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .none },
- .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none },
- .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none },
+ .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .movbe },
+ .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .movbe },
+ .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .movbe },
+ .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .movbe },
+ .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .movbe },
+ .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .movbe },
.{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none },
.{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none },
@@ -836,6 +828,15 @@ pub const table = [_]Entry{
.{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none },
.{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none },
+ // X87
+ .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 },
+ .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 },
+ .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 },
+
+ .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 },
+ .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 },
+ .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 },
+
// SSE
.{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse },
@@ -847,9 +848,21 @@ pub const table = [_]Entry{
.{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse },
+ .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse },
+
+ .{ .cvtps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2d }, 0, .none, .sse },
+
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse },
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse },
+ .{ .cvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .none, .sse },
+ .{ .cvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .long, .sse },
+
+ .{ .cvttps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2c }, 0, .none, .sse },
+
+ .{ .cvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .none, .sse },
+ .{ .cvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .long, .sse },
+
.{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse },
.{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse },
@@ -906,6 +919,25 @@ pub const table = [_]Entry{
.{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 },
+ .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 },
+
+ .{ .cvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .none, .sse2 },
+
+ .{ .cvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .none, .sse2 },
+
+ .{ .cvtpd2pi, .rm, &.{ .mm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2d }, 0, .none, .sse2 },
+
+ .{ .cvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .none, .sse2 },
+
+ .{ .cvtpi2pd, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x66, 0x0f, 0x2a }, 0, .none, .sse2 },
+
+ .{ .cvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .none, .sse2 },
+
+ .{ .cvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .none, .sse2 },
+
+ .{ .cvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .none, .sse2 },
+ .{ .cvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .long, .sse2 },
+
.{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .none, .sse2 },
.{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .none, .sse2 },
@@ -913,6 +945,15 @@ pub const table = [_]Entry{
.{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 },
+ .{ .cvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .none, .sse2 },
+
+ .{ .cvttpd2pi, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2c }, 0, .none, .sse2 },
+
+ .{ .cvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .none, .sse2 },
+
+ .{ .cvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .none, .sse2 },
+ .{ .cvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .long, .sse2 },
+
.{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 },
.{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 },
@@ -1034,15 +1075,51 @@ pub const table = [_]Entry{
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
.{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx },
+ .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
+ .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .vex_128_wig, .avx },
+ .{ .vcvtdq2ps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x5b }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
+ .{ .vcvtpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_128_wig, .avx },
+ .{ .vcvtpd2ps, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_128_wig, .avx },
+ .{ .vcvtps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .vex_128_wig, .avx },
+ .{ .vcvtps2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x0f, 0x5a }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w0, .sse2 },
+ .{ .vcvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w1, .sse2 },
+
.{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
- .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
- .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
+ .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
+ .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
+
+ .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
+
+ .{ .vcvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w0, .avx },
+ .{ .vcvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w1, .avx },
+
+ .{ .vcvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
+ .{ .vcvttpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_128_wig, .avx },
+ .{ .vcvttps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w0, .sse2 },
+ .{ .vcvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w1, .sse2 },
- .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
+ .{ .vcvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w0, .avx },
+ .{ .vcvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w1, .avx },
.{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx },
.{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx },
src/arch/x86_64/Mir.zig
@@ -439,8 +439,21 @@ pub const Inst = struct {
/// Bitwise logical and not of packed single-precision floating-point values
/// Bitwise logical and not of packed double-precision floating-point values
andn,
+ /// Convert packed doubleword integers to packed single-precision floating-point values
+ /// Convert packed doubleword integers to packed double-precision floating-point values
+ cvtpi2,
+ /// Convert packed single-precision floating-point values to packed doubleword integers
+ cvtps2pi,
/// Convert doubleword integer to scalar single-precision floating-point value
- cvtsi2ss,
+ /// Convert doubleword integer to scalar double-precision floating-point value
+ cvtsi2,
+ /// Convert scalar single-precision floating-point value to doubleword integer
+ cvtss2si,
+ /// Convert with truncation packed single-precision floating-point values to packed doubleword integers
+ cvttps2pi,
+ /// Convert with truncation scalar single-precision floating-point value to doubleword integer
+ cvttss2si,
+
/// Maximum of packed single-precision floating-point values
/// Maximum of scalar single-precision floating-point values
/// Maximum of packed double-precision floating-point values
@@ -486,12 +499,33 @@ pub const Inst = struct {
/// Unpack and interleave low packed double-precision floating-point values
unpckl,
+ /// Convert packed doubleword integers to packed single-precision floating-point values
+ /// Convert packed doubleword integers to packed double-precision floating-point values
+ cvtdq2,
+ /// Convert packed double-precision floating-point values to packed doubleword integers
+ cvtpd2dq,
+ /// Convert packed double-precision floating-point values to packed doubleword integers
+ cvtpd2pi,
+ /// Convert packed double-precision floating-point values to packed single-precision floating-point values
+ cvtpd2,
+ /// Convert packed single-precision floating-point values to packed doubleword integers
+ cvtps2dq,
+ /// Convert packed single-precision floating-point values to packed double-precision floating-point values
+ cvtps2,
+ /// Convert scalar double-precision floating-point value to doubleword integer
+ cvtsd2si,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
- cvtsd2ss,
- /// Convert doubleword integer to scalar double-precision floating-point value
- cvtsi2sd,
+ cvtsd2,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
- cvtss2sd,
+ cvtss2,
+ /// Convert with truncation packed double-precision floating-point values to packed doubleword integers
+ cvttpd2dq,
+ /// Convert with truncation packed double-precision floating-point values to packed doubleword integers
+ cvttpd2pi,
+ /// Convert with truncation packed single-precision floating-point values to packed doubleword integers
+ cvttps2dq,
+ /// Convert with truncation scalar double-precision floating-point value to doubleword integer
+ cvttsd2si,
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
/// Packed interleave shuffle of pairs of double-precision floating-point values
shuf,
@@ -542,7 +576,7 @@ pub const Inst = struct {
broadcast,
/// Convert 16-bit floating-point values to single-precision floating-point values
- cvtph2ps,
+ cvtph2,
/// Convert single-precision floating-point values to 16-bit floating-point values
cvtps2ph,
src/link/Dwarf.zig
@@ -608,23 +608,44 @@ pub const DeclState = struct {
switch (loc) {
.register => |reg| {
- try dbg_info.ensureUnusedCapacity(3);
+ try dbg_info.ensureUnusedCapacity(4);
dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // ULEB128 dwarf expression length
- reg,
- });
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (reg < 32) {
+ expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.regx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), reg) catch unreachable;
+ }
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (reg < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.regx);
+ leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable;
+ }
},
.stack => |info| {
- try dbg_info.ensureUnusedCapacity(8);
+ try dbg_info.ensureUnusedCapacity(9);
dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
- const fixup = dbg_info.items.len;
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // we will backpatch it after we encode the displacement in LEB128
- info.fp_register, // frame pointer
- });
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (info.fp_register < 32) {
+ expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.bregx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable;
+ }
+ leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable;
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (info.fp_register < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.bregx);
+ leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable;
+ }
leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable;
- dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
},
.wasm_local => |value| {
const leb_size = link.File.Wasm.getULEB128Size(value);
@@ -670,22 +691,45 @@ pub const DeclState = struct {
switch (loc) {
.register => |reg| {
- try dbg_info.ensureUnusedCapacity(2);
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // ULEB128 dwarf expression length
- reg,
- });
+ try dbg_info.ensureUnusedCapacity(4);
+ dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (reg < 32) {
+ expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.regx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), reg) catch unreachable;
+ }
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (reg < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.regx);
+ leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable;
+ }
},
.stack => |info| {
- try dbg_info.ensureUnusedCapacity(7);
- const fixup = dbg_info.items.len;
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // we will backpatch it after we encode the displacement in LEB128
- info.fp_register,
- });
+ try dbg_info.ensureUnusedCapacity(9);
+ dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (info.fp_register < 32) {
+ expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.bregx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable;
+ }
+ leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable;
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (info.fp_register < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.bregx);
+ leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable;
+ }
leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable;
- dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
},
.wasm_local => |value| {
test/behavior/cast.zig
@@ -153,7 +153,6 @@ test "@intToFloat(f80)" {
test "@floatToInt" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;