Commit 057139fda5
Changed files (5)
src
src/arch/x86_64/CodeGen.zig
@@ -1176,6 +1176,21 @@ fn asmRegisterRegisterRegister(
});
}
+fn asmRegisterRegisterRegisterImmediate(
+ self: *Self,
+ tag: Mir.Inst.Tag,
+ reg1: Register,
+ reg2: Register,
+ reg3: Register,
+ imm: Immediate,
+) !void {
+ _ = try self.addInst(.{
+ .tag = tag,
+ .ops = .rrri,
+ .data = .{ .rrri = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3, .i = @intCast(u8, imm.unsigned) } },
+ });
+}
+
fn asmRegisterRegisterImmediate(
self: *Self,
tag: Mir.Inst.Tag,
@@ -2310,20 +2325,31 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
}),
}
} else if (src_bits == 64 and dst_bits == 32) {
- if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
+ if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
.vcvtsd2ss,
dst_reg,
dst_reg,
- src_mcv.getReg().?.to128(),
- ) else try self.asmRegisterRegisterMemory(
+ src_mcv.mem(.qword),
+ ) else try self.asmRegisterRegisterRegister(
.vcvtsd2ss,
dst_reg,
dst_reg,
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
+ ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .cvtsd2ss,
+ dst_reg,
src_mcv.mem(.qword),
- ) else if (src_mcv.isRegister())
- try self.asmRegisterRegister(.cvtsd2ss, dst_reg, src_mcv.getReg().?.to128())
- else
- try self.asmRegisterMemory(.cvtsd2ss, dst_reg, src_mcv.mem(.qword));
+ ) else try self.asmRegisterRegister(
+ .cvtsd2ss,
+ dst_reg,
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
+ );
} else return self.fail("TODO implement airFptrunc from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
});
@@ -2360,20 +2386,31 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
}),
}
} else if (src_bits == 32 and dst_bits == 64) {
- if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
+ if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
.vcvtss2sd,
dst_reg,
dst_reg,
- src_mcv.getReg().?.to128(),
- ) else try self.asmRegisterRegisterMemory(
+ src_mcv.mem(.dword),
+ ) else try self.asmRegisterRegisterRegister(
.vcvtss2sd,
dst_reg,
dst_reg,
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
+ ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .cvtss2sd,
+ dst_reg,
src_mcv.mem(.dword),
- ) else if (src_mcv.isRegister())
- try self.asmRegisterRegister(.cvtss2sd, dst_reg, src_mcv.getReg().?.to128())
- else
- try self.asmRegisterMemory(.cvtss2sd, dst_reg, src_mcv.mem(.dword));
+ ) else try self.asmRegisterRegister(
+ .cvtss2sd,
+ dst_reg,
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
+ );
} else return self.fail("TODO implement airFpext from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
});
@@ -4532,7 +4569,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
const result: MCValue = result: {
- const tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) {
+ const mir_tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) {
const mat_src_reg = if (src_mcv.isRegister())
@@ -4558,11 +4595,14 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
.Float => switch (ty.childType().floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
1 => {
- const mat_src_reg = if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(ty, src_mcv);
- try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegister(
+ .vcvtph2ps,
+ dst_reg,
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, src_mcv)).to128(),
+ );
try self.asmRegisterRegisterRegister(.vsqrtss, dst_reg, dst_reg, dst_reg);
try self.asmRegisterRegisterImmediate(
.vcvtps2ph,
@@ -4574,16 +4614,19 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
},
2...8 => {
const wide_reg = registerAlias(dst_reg, abi_size * 2);
- if (src_mcv.isRegister()) try self.asmRegisterRegister(
- .vcvtph2ps,
- wide_reg,
- src_mcv.getReg().?.to128(),
- ) else try self.asmRegisterMemory(
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
.vcvtph2ps,
wide_reg,
src_mcv.mem(Memory.PtrSize.fromSize(
@intCast(u32, @divExact(wide_reg.bitSize(), 16)),
)),
+ ) else try self.asmRegisterRegister(
+ .vcvtph2ps,
+ wide_reg,
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.vsqrtps, wide_reg, wide_reg);
try self.asmRegisterRegisterImmediate(
@@ -4617,26 +4660,32 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
})) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{
ty.fmt(self.bin_file.options.module.?),
});
- switch (tag) {
- .vsqrtss, .vsqrtsd => if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
- tag,
+ switch (mir_tag) {
+ .vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ mir_tag,
dst_reg,
dst_reg,
- registerAlias(src_mcv.getReg().?, abi_size),
- ) else try self.asmRegisterRegisterMemory(
- tag,
+ src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
+ ) else try self.asmRegisterRegisterRegister(
+ mir_tag,
dst_reg,
dst_reg,
- src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, src_mcv), abi_size),
),
- else => if (src_mcv.isRegister()) try self.asmRegisterRegister(
- tag,
- dst_reg,
- registerAlias(src_mcv.getReg().?, abi_size),
- ) else try self.asmRegisterMemory(
- tag,
+ else => if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ mir_tag,
dst_reg,
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
+ ) else try self.asmRegisterRegister(
+ mir_tag,
+ dst_reg,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, src_mcv), abi_size),
),
}
break :result dst_mcv;
@@ -5800,25 +5849,22 @@ fn genMulDivBinOp(
}
}
-/// Result is always a register.
fn genBinOp(
self: *Self,
maybe_inst: ?Air.Inst.Index,
- tag: Air.Inst.Tag,
+ air_tag: Air.Inst.Tag,
lhs_air: Air.Inst.Ref,
rhs_air: Air.Inst.Ref,
) !MCValue {
- const lhs = try self.resolveInst(lhs_air);
- const rhs = try self.resolveInst(rhs_air);
+ const lhs_mcv = try self.resolveInst(lhs_air);
+ const rhs_mcv = try self.resolveInst(rhs_air);
const lhs_ty = self.air.typeOf(lhs_air);
const rhs_ty = self.air.typeOf(rhs_air);
- if (lhs_ty.zigTypeTag() == .Vector) {
- return self.fail("TODO implement genBinOp for {}", .{lhs_ty.fmt(self.bin_file.options.module.?)});
- }
+ const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
- switch (lhs) {
+ switch (lhs_mcv) {
.immediate => |imm| switch (imm) {
- 0 => switch (tag) {
+ 0 => switch (air_tag) {
.sub, .subwrap => return self.genUnOp(maybe_inst, .neg, rhs_air),
else => {},
},
@@ -5827,9 +5873,10 @@ fn genBinOp(
else => {},
}
- const is_commutative = switch (tag) {
+ const is_commutative = switch (air_tag) {
.add,
.addwrap,
+ .mul,
.bool_or,
.bit_or,
.bool_and,
@@ -5841,48 +5888,42 @@ fn genBinOp(
else => false,
};
- const dst_mem_ok = switch (tag) {
- .add,
- .addwrap,
- .sub,
- .subwrap,
- .mul,
- .div_float,
- .div_exact,
- .div_trunc,
- .div_floor,
- => !lhs_ty.isRuntimeFloat(),
-
- else => true,
+ const vec_op = switch (lhs_ty.zigTypeTag()) {
+ else => false,
+ .Float, .Vector => true,
};
- const lhs_lock: ?RegisterLock = switch (lhs) {
+ const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
else => null,
};
defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
- const rhs_lock: ?RegisterLock = switch (rhs) {
+ const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
.register => |reg| self.register_manager.lockReg(reg),
else => null,
};
defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
- var flipped: bool = false;
+ var flipped = false;
+ var copied_to_dst = true;
const dst_mcv: MCValue = dst: {
if (maybe_inst) |inst| {
- if ((dst_mem_ok or lhs.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs)) {
- break :dst lhs;
+ if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) {
+ break :dst lhs_mcv;
}
- if (is_commutative and (dst_mem_ok or rhs.isRegister()) and
- self.reuseOperand(inst, rhs_air, 1, rhs))
+ if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and
+ self.reuseOperand(inst, rhs_air, 1, rhs_mcv))
{
flipped = true;
- break :dst rhs;
+ break :dst rhs_mcv;
}
}
const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
- try self.genCopy(lhs_ty, dst_mcv, lhs);
+ if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
+ copied_to_dst = false
+ else
+ try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
break :dst dst_mcv;
};
const dst_lock: ?RegisterLock = switch (dst_mcv) {
@@ -5891,160 +5932,47 @@ fn genBinOp(
};
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
- const src_mcv = if (flipped) lhs else rhs;
- switch (tag) {
- .add,
- .addwrap,
- => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
- else => .add,
- .Float => switch (lhs_ty.floatBits(self.target.*)) {
- 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
- .addss
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
- .addsd
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- },
- }, lhs_ty, dst_mcv, src_mcv),
-
- .sub,
- .subwrap,
- => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
- else => .sub,
- .Float => switch (lhs_ty.floatBits(self.target.*)) {
- 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
- .subss
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
- .subsd
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- },
- }, lhs_ty, dst_mcv, src_mcv),
-
- .mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
- else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- .Float => switch (lhs_ty.floatBits(self.target.*)) {
- 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
- .mulss
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
- .mulsd
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- },
- }, lhs_ty, dst_mcv, src_mcv),
+ const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
+ if (!vec_op) {
+ switch (air_tag) {
+ .add,
+ .addwrap,
+ => try self.genBinOpMir(.add, lhs_ty, dst_mcv, src_mcv),
- .div_float,
- .div_exact,
- .div_trunc,
- .div_floor,
- => {
- try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
- else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- .Float => switch (lhs_ty.floatBits(self.target.*)) {
- 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
- .divss
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
- .divsd
- else
- return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- },
- }, lhs_ty, dst_mcv, src_mcv);
- switch (tag) {
- .div_float,
- .div_exact,
- => {},
- .div_trunc,
- .div_floor,
- => if (self.hasFeature(.sse4_1)) {
- const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
- const dst_alias = registerAlias(dst_mcv.register, abi_size);
- try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) {
- 32 => .roundss,
- 64 => .roundsd,
- else => unreachable,
- }, dst_alias, dst_alias, Immediate.u(switch (tag) {
- .div_trunc => 0b1_0_11,
- .div_floor => 0b1_0_01,
- else => unreachable,
- }));
- } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- else => unreachable,
- }
- },
+ .sub,
+ .subwrap,
+ => try self.genBinOpMir(.sub, lhs_ty, dst_mcv, src_mcv),
- .ptr_add,
- .ptr_sub,
- => {
- const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
- const tmp_mcv = MCValue{ .register = tmp_reg };
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
+ .ptr_add,
+ .ptr_sub,
+ => {
+ const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
+ const tmp_mcv = MCValue{ .register = tmp_reg };
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
- const elem_size = lhs_ty.elemType2().abiSize(self.target.*);
- try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
- try self.genBinOpMir(switch (tag) {
- .ptr_add => .add,
- .ptr_sub => .sub,
- else => unreachable,
- }, lhs_ty, dst_mcv, tmp_mcv);
- },
+ const elem_size = lhs_ty.elemType2().abiSize(self.target.*);
+ try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
+ try self.genBinOpMir(switch (air_tag) {
+ .ptr_add => .add,
+ .ptr_sub => .sub,
+ else => unreachable,
+ }, lhs_ty, dst_mcv, tmp_mcv);
+ },
- .bool_or,
- .bit_or,
- => try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv),
+ .bool_or,
+ .bit_or,
+ => try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv),
- .bool_and,
- .bit_and,
- => try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv),
+ .bool_and,
+ .bit_and,
+ => try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv),
- .xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv),
+ .xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv),
- .min,
- .max,
- => switch (lhs_ty.zigTypeTag()) {
- .Int => {
+ .min,
+ .max,
+ => {
const mat_src_mcv: MCValue = if (switch (src_mcv) {
.immediate,
.eflags,
@@ -6070,12 +5998,12 @@ fn genBinOp(
const int_info = lhs_ty.intInfo(self.target.*);
const cc: Condition = switch (int_info.signedness) {
- .unsigned => switch (tag) {
+ .unsigned => switch (air_tag) {
.min => .a,
.max => .b,
else => unreachable,
},
- .signed => switch (tag) {
+ .signed => switch (air_tag) {
.min => .g,
.max => .l,
else => unreachable,
@@ -6134,26 +6062,222 @@ fn genBinOp(
}
try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg });
},
- .Float => try self.genBinOpMir(switch (lhs_ty.floatBits(self.target.*)) {
- 32 => switch (tag) {
- .min => .minss,
- .max => .maxss,
- else => unreachable,
- },
- 64 => switch (tag) {
- .min => .minsd,
- .max => .maxsd,
- else => unreachable,
- },
- else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
- }),
- }, lhs_ty, dst_mcv, src_mcv),
+
else => return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
- },
+ }
+ return dst_mcv;
+ }
+ const mir_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) {
+ else => unreachable,
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => switch (air_tag) {
+ .add => if (self.hasFeature(.avx)) .vaddss else .addss,
+ .sub => if (self.hasFeature(.avx)) .vsubss else .subss,
+ .mul => if (self.hasFeature(.avx)) .vmulss else .mulss,
+ .div_float,
+ .div_trunc,
+ .div_floor,
+ .div_exact,
+ => if (self.hasFeature(.avx)) .vdivss else .divss,
+ .max => if (self.hasFeature(.avx)) .vmaxss else .maxss,
+ .min => if (self.hasFeature(.avx)) .vminss else .minss,
+ else => unreachable,
+ },
+ 64 => switch (air_tag) {
+ .add => if (self.hasFeature(.avx)) .vaddsd else .addsd,
+ .sub => if (self.hasFeature(.avx)) .vsubsd else .subsd,
+ .mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd,
+ .div_float,
+ .div_trunc,
+ .div_floor,
+ .div_exact,
+ => if (self.hasFeature(.avx)) .vdivsd else .divsd,
+ .max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd,
+ .min => if (self.hasFeature(.avx)) .vminsd else .minsd,
+ else => unreachable,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ else => null,
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1 => switch (air_tag) {
+ .add => if (self.hasFeature(.avx)) .vaddss else .addss,
+ .sub => if (self.hasFeature(.avx)) .vsubss else .subss,
+ .mul => if (self.hasFeature(.avx)) .vmulss else .mulss,
+ .div_float,
+ .div_trunc,
+ .div_floor,
+ .div_exact,
+ => if (self.hasFeature(.avx)) .vdivss else .divss,
+ .max => if (self.hasFeature(.avx)) .vmaxss else .maxss,
+ .min => if (self.hasFeature(.avx)) .vminss else .minss,
+ else => unreachable,
+ },
+ 2...4 => switch (air_tag) {
+ .add => if (self.hasFeature(.avx)) .vaddps else .addps,
+ .sub => if (self.hasFeature(.avx)) .vsubps else .subps,
+ .mul => if (self.hasFeature(.avx)) .vmulps else .mulps,
+ .div_float,
+ .div_trunc,
+ .div_floor,
+ .div_exact,
+ => if (self.hasFeature(.avx)) .vdivps else .divps,
+ .max => if (self.hasFeature(.avx)) .vmaxps else .maxps,
+ .min => if (self.hasFeature(.avx)) .vminps else .minps,
+ else => unreachable,
+ },
+ 5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
+ .add => .vaddps,
+ .sub => .vsubps,
+ .mul => .vmulps,
+ .div_float, .div_trunc, .div_floor, .div_exact => .vdivps,
+ .max => .vmaxps,
+ .min => .vminps,
+ else => unreachable,
+ } else null,
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1 => switch (air_tag) {
+ .add => if (self.hasFeature(.avx)) .vaddsd else .addsd,
+ .sub => if (self.hasFeature(.avx)) .vsubsd else .subsd,
+ .mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd,
+ .div_float,
+ .div_trunc,
+ .div_floor,
+ .div_exact,
+ => if (self.hasFeature(.avx)) .vdivsd else .divsd,
+ .max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd,
+ .min => if (self.hasFeature(.avx)) .vminsd else .minsd,
+ else => unreachable,
+ },
+ 2 => switch (air_tag) {
+ .add => if (self.hasFeature(.avx)) .vaddpd else .addpd,
+ .sub => if (self.hasFeature(.avx)) .vsubpd else .subpd,
+ .mul => if (self.hasFeature(.avx)) .vmulpd else .mulpd,
+ .div_float,
+ .div_trunc,
+ .div_floor,
+ .div_exact,
+ => if (self.hasFeature(.avx)) .vdivpd else .divpd,
+ .max => if (self.hasFeature(.avx)) .vmaxpd else .maxpd,
+ .min => if (self.hasFeature(.avx)) .vminpd else .minpd,
+ else => unreachable,
+ },
+ 3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
+ .add => .vaddpd,
+ .sub => .vsubpd,
+ .mul => .vmulpd,
+ .div_float, .div_trunc, .div_floor, .div_exact => .vdivpd,
+ .max => .vmaxpd,
+ .min => .vminpd,
+ else => unreachable,
+ } else null,
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ },
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ });
+ const dst_alias = registerAlias(dst_mcv.getReg().?, abi_size);
+ if (self.hasFeature(.avx)) {
+ const src1_alias =
+ if (copied_to_dst) dst_alias else registerAlias(lhs_mcv.getReg().?, abi_size);
+ if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ mir_tag,
+ dst_alias,
+ src1_alias,
+ src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
+ ) else try self.asmRegisterRegisterRegister(
+ mir_tag,
+ dst_alias,
+ src1_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
+ );
+ } else {
+ assert(copied_to_dst);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ mir_tag,
+ dst_alias,
+ src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
+ ) else try self.asmRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
+ );
+ }
+ switch (air_tag) {
+ .add, .sub, .mul, .div_float, .div_exact => {},
+ .div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) {
+ const round_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => if (self.hasFeature(.avx)) .vroundss else .roundss,
+ 64 => if (self.hasFeature(.avx)) .vroundsd else .roundsd,
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1 => if (self.hasFeature(.avx)) .vroundss else .roundss,
+ 2...4 => if (self.hasFeature(.avx)) .vroundps else .roundps,
+ 5...8 => if (self.hasFeature(.avx)) .vroundps else null,
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1 => if (self.hasFeature(.avx)) .vroundsd else .roundsd,
+ 2 => if (self.hasFeature(.avx)) .vroundpd else .roundpd,
+ 3...4 => if (self.hasFeature(.avx)) .vroundpd else null,
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => null,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ });
+ const round_mode = Immediate.u(switch (air_tag) {
+ .div_trunc => 0b1_0_11,
+ .div_floor => 0b1_0_01,
+ else => unreachable,
+ });
+ switch (round_tag) {
+ .vroundss, .vroundsd => try self.asmRegisterRegisterRegisterImmediate(
+ round_tag,
+ dst_alias,
+ dst_alias,
+ dst_alias,
+ round_mode,
+ ),
+ else => try self.asmRegisterRegisterImmediate(
+ round_tag,
+ dst_alias,
+ dst_alias,
+ round_mode,
+ ),
+ }
+ } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ .max, .min => {}, // TODO: unordered select
else => unreachable,
}
return dst_mcv;
@@ -6186,20 +6310,11 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
.register_overflow,
.reserved_frame,
=> unreachable,
- .register => |src_reg| switch (ty.zigTypeTag()) {
- .Float => {
- if (!Target.x86.featureSetHas(self.target.cpu.features, .sse))
- return self.fail("TODO genBinOpMir for {s} {} without sse", .{
- @tagName(mir_tag), ty.fmt(self.bin_file.options.module.?),
- });
- return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128());
- },
- else => try self.asmRegisterRegister(
- mir_tag,
- dst_alias,
- registerAlias(src_reg, abi_size),
- ),
- },
+ .register => |src_reg| try self.asmRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(src_reg, abi_size),
+ ),
.immediate => |imm| switch (self.regBitSize(ty)) {
8 => try self.asmRegisterImmediate(
mir_tag,
@@ -9646,7 +9761,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
lock.* = self.register_manager.lockRegAssumeUnused(reg);
}
- const tag = if (@as(
+ const mir_tag = if (@as(
?Mir.Inst.Tag,
if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 }))
switch (ty.zigTypeTag()) {
@@ -9741,20 +9856,17 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
const mop1_reg = registerAlias(mops[0].getReg().?, abi_size);
const mop2_reg = registerAlias(mops[1].getReg().?, abi_size);
- if (mops[2].isRegister())
- try self.asmRegisterRegisterRegister(
- tag,
- mop1_reg,
- mop2_reg,
- registerAlias(mops[2].getReg().?, abi_size),
- )
- else
- try self.asmRegisterRegisterMemory(
- tag,
- mop1_reg,
- mop2_reg,
- mops[2].mem(Memory.PtrSize.fromSize(abi_size)),
- );
+ if (mops[2].isRegister()) try self.asmRegisterRegisterRegister(
+ mir_tag,
+ mop1_reg,
+ mop2_reg,
+ registerAlias(mops[2].getReg().?, abi_size),
+ ) else try self.asmRegisterRegisterMemory(
+ mir_tag,
+ mop1_reg,
+ mop2_reg,
+ mops[2].mem(Memory.PtrSize.fromSize(abi_size)),
+ );
return self.finishAir(inst, mops[0], ops);
}
src/arch/x86_64/Encoding.zig
@@ -262,61 +262,69 @@ pub const Mnemonic = enum {
// MMX
movd,
// SSE
- addss,
+ addps, addss,
andps,
andnps,
cmpss,
cvtsi2ss,
- divss,
- maxss, minss,
+ divps, divss,
+ maxps, maxss,
+ minps, minss,
movaps, movss, movups,
- mulss,
+ mulps, mulss,
orps,
pextrw, pinsrw,
- sqrtps,
- sqrtss,
- subss,
+ sqrtps, sqrtss,
+ subps, subss,
ucomiss,
xorps,
// SSE2
- addsd,
+ addpd, addsd,
andpd,
andnpd,
//cmpsd,
cvtsd2ss, cvtsi2sd, cvtss2sd,
- divsd,
- maxsd, minsd,
+ divpd, divsd,
+ maxpd, maxsd,
+ minpd, minsd,
movapd,
movq, //movd, movsd,
movupd,
- mulsd,
+ mulpd, mulsd,
orpd,
pshufhw, pshuflw,
psrld, psrlq, psrlw,
punpckhbw, punpckhdq, punpckhqdq, punpckhwd,
punpcklbw, punpckldq, punpcklqdq, punpcklwd,
sqrtpd, sqrtsd,
- subsd,
+ subpd, subsd,
ucomisd,
xorpd,
// SSE3
movddup, movshdup, movsldup,
// SSE4.1
- roundsd, roundss,
+ roundpd, roundps, roundsd, roundss,
// AVX
+ vaddpd, vaddps, vaddsd, vaddss,
vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd,
+ vdivpd, vdivps, vdivsd, vdivss,
+ vmaxpd, vmaxps, vmaxsd, vmaxss,
+ vminpd, vminps, vminsd, vminss,
vmovapd, vmovaps,
vmovddup,
vmovsd,
vmovshdup, vmovsldup,
vmovss,
vmovupd, vmovups,
+ vmulpd, vmulps, vmulsd, vmulss,
vpextrw, vpinsrw,
vpshufhw, vpshuflw,
vpsrld, vpsrlq, vpsrlw,
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
+ vroundpd, vroundps, vroundsd, vroundss,
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
+ vsubpd, vsubps, vsubsd, vsubss,
// F16C
vcvtph2ps, vcvtps2ph,
// FMA
src/arch/x86_64/encodings.zig
@@ -837,6 +837,8 @@ pub const table = [_]Entry{
.{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none },
// SSE
+ .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse },
+
.{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .none, .sse },
.{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .none, .sse },
@@ -848,10 +850,16 @@ pub const table = [_]Entry{
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse },
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse },
+ .{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse },
+
.{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse },
+ .{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse },
+
.{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .none, .sse },
+ .{ .minps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .none, .sse },
+
.{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .none, .sse },
.{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse },
@@ -863,10 +871,14 @@ pub const table = [_]Entry{
.{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .none, .sse },
.{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .none, .sse },
+ .{ .mulps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .none, .sse },
+
.{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .none, .sse },
.{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse },
+ .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
+
.{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
@@ -878,6 +890,8 @@ pub const table = [_]Entry{
.{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse },
// SSE2
+ .{ .addpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .none, .sse2 },
+
.{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .none, .sse2 },
.{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .none, .sse2 },
@@ -893,10 +907,16 @@ pub const table = [_]Entry{
.{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 },
+ .{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 },
+
.{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 },
+ .{ .maxpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .none, .sse2 },
+
.{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 },
+ .{ .minpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .none, .sse2 },
+
.{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .none, .sse2 },
.{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .none, .sse2 },
@@ -914,6 +934,8 @@ pub const table = [_]Entry{
.{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .none, .sse2 },
.{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .none, .sse2 },
+ .{ .mulpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .none, .sse2 },
+
.{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .none, .sse2 },
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
@@ -947,6 +969,8 @@ pub const table = [_]Entry{
.{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 },
+ .{ .subpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .none, .sse2 },
+
.{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 },
.{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 },
@@ -966,10 +990,25 @@ pub const table = [_]Entry{
// SSE4.1
.{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 },
- .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 },
+ .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
+
+ .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 },
+
.{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 },
+ .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 },
+
// AVX
+ .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
+ .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
+
+ .{ .vaddps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
+ .{ .vaddps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
+
+ .{ .vaddsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
+
+ .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
+
.{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
@@ -980,6 +1019,36 @@ pub const table = [_]Entry{
.{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
+ .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx },
+ .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx },
+
+ .{ .vdivps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .vex_128_wig, .avx },
+ .{ .vdivps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5e }, 0, .vex_256_wig, .avx },
+
+ .{ .vdivsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx },
+
+ .{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx },
+
+ .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
+ .{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
+
+ .{ .vmaxps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
+ .{ .vmaxps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
+
+ .{ .vmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx },
+
+ .{ .vmaxss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx },
+
+ .{ .vminpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_128_wig, .avx },
+ .{ .vminpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_256_wig, .avx },
+
+ .{ .vminps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .vex_128_wig, .avx },
+ .{ .vminps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5d }, 0, .vex_256_wig, .avx },
+
+ .{ .vminsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx },
+
+ .{ .vminss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx },
+
.{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx },
.{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx },
.{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
@@ -1019,6 +1088,16 @@ pub const table = [_]Entry{
.{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx },
.{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx },
+ .{ .vmulpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_128_wig, .avx },
+ .{ .vmulpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_256_wig, .avx },
+
+ .{ .vmulps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .vex_128_wig, .avx },
+ .{ .vmulps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x59 }, 0, .vex_256_wig, .avx },
+
+ .{ .vmulsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
+
+ .{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
+
.{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx },
.{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx },
@@ -1041,6 +1120,16 @@ pub const table = [_]Entry{
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx },
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx },
+ .{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx },
+ .{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx },
+
+ .{ .vroundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_128_wig, .avx },
+ .{ .vroundps, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_256_wig, .avx },
+
+ .{ .vroundsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .vex_lig_wig, .avx },
+
+ .{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx },
+
.{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx },
.{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx },
@@ -1051,6 +1140,16 @@ pub const table = [_]Entry{
.{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx },
+ .{ .vsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_128_wig, .avx },
+ .{ .vsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_256_wig, .avx },
+
+ .{ .vsubps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .vex_128_wig, .avx },
+ .{ .vsubps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5c }, 0, .vex_256_wig, .avx },
+
+ .{ .vsubsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
+
+ .{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
+
// F16C
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
src/arch/x86_64/Lower.zig
@@ -124,27 +124,34 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.xchg,
.xor,
+ .addps,
.addss,
.andnps,
.andps,
.cmpss,
.cvtsi2ss,
+ .divps,
.divss,
+ .maxps,
.maxss,
+ .minps,
.minss,
.movaps,
.movss,
.movups,
+ .mulps,
.mulss,
.orps,
.pextrw,
.pinsrw,
.sqrtps,
.sqrtss,
+ .subps,
.subss,
.ucomiss,
.xorps,
+ .addpd,
.addsd,
.andnpd,
.andpd,
@@ -152,10 +159,14 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.cvtsd2ss,
.cvtsi2sd,
.cvtss2sd,
+ .divpd,
.divsd,
+ .maxpd,
.maxsd,
+ .minpd,
.minsd,
.movsd,
+ .mulpd,
.mulsd,
.orpd,
.pshufhw,
@@ -173,6 +184,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.punpcklwd,
.sqrtpd,
.sqrtsd,
+ .subpd,
.subsd,
.ucomisd,
.xorpd,
@@ -181,13 +193,31 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.movshdup,
.movsldup,
+ .roundpd,
+ .roundps,
.roundsd,
.roundss,
+ .vaddpd,
+ .vaddps,
+ .vaddsd,
+ .vaddss,
.vcvtsd2ss,
.vcvtsi2sd,
.vcvtsi2ss,
.vcvtss2sd,
+ .vdivpd,
+ .vdivps,
+ .vdivsd,
+ .vdivss,
+ .vmaxpd,
+ .vmaxps,
+ .vmaxsd,
+ .vmaxss,
+ .vminpd,
+ .vminps,
+ .vminsd,
+ .vminss,
.vmovapd,
.vmovaps,
.vmovddup,
@@ -197,6 +227,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.vmovss,
.vmovupd,
.vmovups,
+ .vmulpd,
+ .vmulps,
+ .vmulsd,
+ .vmulss,
.vpextrw,
.vpinsrw,
.vpshufhw,
@@ -212,10 +246,18 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.vpunpckldq,
.vpunpcklqdq,
.vpunpcklwd,
+ .vroundpd,
+ .vroundps,
+ .vroundsd,
+ .vroundss,
.vsqrtpd,
.vsqrtps,
.vsqrtsd,
.vsqrtss,
+ .vsubpd,
+ .vsubps,
+ .vsubsd,
+ .vsubss,
.vcvtph2ps,
.vcvtps2ph,
@@ -304,6 +346,7 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
.lock_mi_rip_s,
=> Immediate.s(@bitCast(i32, i)),
+ .rrri,
.rri_u,
.ri_u,
.i_u,
@@ -429,6 +472,12 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .reg = inst.data.rrr.r2 },
.{ .reg = inst.data.rrr.r3 },
},
+ .rrri => &.{
+ .{ .reg = inst.data.rrri.r1 },
+ .{ .reg = inst.data.rrri.r2 },
+ .{ .reg = inst.data.rrri.r3 },
+ .{ .imm = lower.imm(inst.ops, inst.data.rrri.i) },
+ },
.ri_s, .ri_u => &.{
.{ .reg = inst.data.ri.r },
.{ .imm = lower.imm(inst.ops, inst.data.ri.i) },
src/arch/x86_64/Mir.zig
@@ -166,7 +166,9 @@ pub const Inst = struct {
/// Logical exclusive-or
xor,
- /// Add single precision floating point values
+ /// Add packed single-precision floating-point values
+ addps,
+ /// Add scalar single-precision floating-point values
addss,
/// Bitwise logical and of packed single precision floating-point values
andps,
@@ -176,11 +178,17 @@ pub const Inst = struct {
cmpss,
/// Convert doubleword integer to scalar single-precision floating-point value
cvtsi2ss,
+ /// Divide packed single-precision floating-point values
+ divps,
/// Divide scalar single-precision floating-point values
divss,
- /// Return maximum single-precision floating-point value
+ /// Maximum of packed single-precision floating-point values
+ maxps,
+ /// Maximum of scalar single-precision floating-point values
maxss,
- /// Return minimum single-precision floating-point value
+ /// Minimum of packed single-precision floating-point values
+ minps,
+ /// Minimum of scalar single-precision floating-point values
minss,
/// Move aligned packed single-precision floating-point values
movaps,
@@ -188,6 +196,8 @@ pub const Inst = struct {
movss,
/// Move unaligned packed single-precision floating-point values
movups,
+ /// Multiply packed single-precision floating-point values
+ mulps,
/// Multiply scalar single-precision floating-point values
mulss,
/// Bitwise logical or of packed single precision floating-point values
@@ -196,18 +206,22 @@ pub const Inst = struct {
pextrw,
/// Insert word
pinsrw,
- /// Square root of scalar single precision floating-point value
+ /// Square root of packed single-precision floating-point values
sqrtps,
- /// Subtract scalar single-precision floating-point values
+ /// Square root of scalar single-precision floating-point value
sqrtss,
- /// Square root of single precision floating-point values
+ /// Subtract packed single-precision floating-point values
+ subps,
+ /// Subtract scalar single-precision floating-point values
subss,
/// Unordered compare scalar single-precision floating-point values
ucomiss,
/// Bitwise logical xor of packed single precision floating-point values
xorps,
- /// Add double precision floating point values
+ /// Add packed double-precision floating-point values
+ addpd,
+ /// Add scalar double-precision floating-point values
addsd,
/// Bitwise logical and not of packed double precision floating-point values
andnpd,
@@ -221,14 +235,22 @@ pub const Inst = struct {
cvtsi2sd,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
cvtss2sd,
+ /// Divide packed double-precision floating-point values
+ divpd,
/// Divide scalar double-precision floating-point values
divsd,
- /// Return maximum double-precision floating-point value
+ /// Maximum of packed double-precision floating-point values
+ maxpd,
+ /// Maximum of scalar double-precision floating-point values
maxsd,
- /// Return minimum double-precision floating-point value
+ /// Minimum of packed double-precision floating-point values
+ minpd,
+ /// Minimum of scalar double-precision floating-point values
minsd,
/// Move scalar double-precision floating-point value
movsd,
+ /// Multiply packed double-precision floating-point values
+ mulpd,
/// Multiply scalar double-precision floating-point values
mulsd,
/// Bitwise logical or of packed double precision floating-point values
@@ -263,6 +285,8 @@ pub const Inst = struct {
sqrtpd,
/// Square root of scalar double precision floating-point value
sqrtsd,
+ /// Subtract packed double-precision floating-point values
+ subpd,
/// Subtract scalar double-precision floating-point values
subsd,
/// Unordered compare scalar double-precision floating-point values
@@ -277,11 +301,23 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
- /// Round scalar double-precision floating-point values
+ /// Round packed double-precision floating-point values
+ roundpd,
+ /// Round packed single-precision floating-point values
+ roundps,
+ /// Round scalar double-precision floating-point value
roundsd,
- /// Round scalar single-precision floating-point values
+ /// Round scalar single-precision floating-point value
roundss,
+ /// Add packed double-precision floating-point values
+ vaddpd,
+ /// Add packed single-precision floating-point values
+ vaddps,
+ /// Add scalar double-precision floating-point values
+ vaddsd,
+ /// Add scalar single-precision floating-point values
+ vaddss,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
vcvtsd2ss,
/// Convert doubleword integer to scalar double-precision floating-point value
@@ -290,6 +326,30 @@ pub const Inst = struct {
vcvtsi2ss,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
vcvtss2sd,
+ /// Divide packed double-precision floating-point values
+ vdivpd,
+ /// Divide packed single-precision floating-point values
+ vdivps,
+ /// Divide scalar double-precision floating-point values
+ vdivsd,
+ /// Divide scalar single-precision floating-point values
+ vdivss,
+ /// Maximum of packed double-precision floating-point values
+ vmaxpd,
+ /// Maximum of packed single-precision floating-point values
+ vmaxps,
+ /// Maximum of scalar double-precision floating-point values
+ vmaxsd,
+ /// Maximum of scalar single-precision floating-point values
+ vmaxss,
+ /// Minimum of packed double-precision floating-point values
+ vminpd,
+ /// Minimum of packed single-precision floating-point values
+ vminps,
+ /// Minimum of scalar double-precision floating-point values
+ vminsd,
+ /// Minimum of scalar single-precision floating-point values
+ vminss,
/// Move aligned packed double-precision floating-point values
vmovapd,
/// Move aligned packed single-precision floating-point values
@@ -308,6 +368,14 @@ pub const Inst = struct {
vmovupd,
/// Move unaligned packed single-precision floating-point values
vmovups,
+ /// Multiply packed double-precision floating-point values
+ vmulpd,
+ /// Multiply packed single-precision floating-point values
+ vmulps,
+ /// Multiply scalar double-precision floating-point values
+ vmulsd,
+ /// Multiply scalar single-precision floating-point values
+ vmulss,
/// Extract word
vpextrw,
/// Insert word
@@ -338,6 +406,14 @@ pub const Inst = struct {
vpunpcklqdq,
/// Unpack low data
vpunpcklwd,
+ /// Round packed double-precision floating-point values
+ vroundpd,
+ /// Round packed single-precision floating-point values
+ vroundps,
+ /// Round scalar double-precision floating-point value
+ vroundsd,
+ /// Round scalar single-precision floating-point value
+ vroundss,
/// Square root of packed double-precision floating-point value
vsqrtpd,
/// Square root of packed single-precision floating-point value
@@ -346,6 +422,14 @@ pub const Inst = struct {
vsqrtsd,
/// Square root of scalar single-precision floating-point value
vsqrtss,
+ /// Subtract packed double-precision floating-point values
+ vsubpd,
+ /// Subtract packed single-precision floating-point values
+ vsubps,
+ /// Subtract scalar double-precision floating-point values
+ vsubsd,
+ /// Subtract scalar single-precision floating-point values
+ vsubss,
/// Convert 16-bit floating-point values to single-precision floating-point values
vcvtph2ps,
@@ -442,6 +526,9 @@ pub const Inst = struct {
/// Register, register, register operands.
/// Uses `rrr` payload.
rrr,
+ /// Register, register, register, immediate (byte) operands.
+ /// Uses `rrri` payload.
+ rrri,
/// Register, register, immediate (sign-extended) operands.
/// Uses `rri` payload.
rri_s,
@@ -625,6 +712,12 @@ pub const Inst = struct {
r2: Register,
r3: Register,
},
+ rrri: struct {
+ r1: Register,
+ r2: Register,
+ r3: Register,
+ i: u8,
+ },
rri: struct {
r1: Register,
r2: Register,