Commit 6c6d8d67cf
Changed files (8)
src
test
src/arch/x86_64/CodeGen.zig
@@ -4681,61 +4681,136 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
}
fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
+ const tag = self.air.instructions.items(.tag)[inst];
const un_op = self.air.instructions.items(.data)[inst].un_op;
const ty = self.air.typeOf(un_op);
- const ty_bits = ty.floatBits(self.target.*);
+ const abi_size: u32 = switch (ty.abiSize(self.target.*)) {
+ 1...16 => 16,
+ 17...32 => 32,
+ else => return self.fail("TODO implement airFloatSign for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
+ };
+ const scalar_bits = ty.scalarType().floatBits(self.target.*);
+
+ const src_mcv = try self.resolveInst(un_op);
+ const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
+ defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
+ src_mcv
+ else if (self.hasFeature(.avx))
+ .{ .register = try self.register_manager.allocReg(inst, sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_lock = self.register_manager.lockReg(dst_reg);
+ defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
var arena = std.heap.ArenaAllocator.init(self.gpa);
defer arena.deinit();
- const ExpectedContents = union {
- f16: Value.Payload.Float_16,
- f32: Value.Payload.Float_32,
- f64: Value.Payload.Float_64,
- f80: Value.Payload.Float_80,
- f128: Value.Payload.Float_128,
+ const ExpectedContents = struct {
+ scalar: union {
+ i64: Value.Payload.I64,
+ big: struct {
+ limbs: [
+ @max(
+ std.math.big.int.Managed.default_capacity,
+ std.math.big.int.calcTwosCompLimbCount(128),
+ )
+ ]std.math.big.Limb,
+ pl: Value.Payload.BigInt,
+ },
+ },
+ repeated: Value.Payload.SubValue,
};
var stack align(@alignOf(ExpectedContents)) =
std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator());
+ var int_pl = Type.Payload.Bits{
+ .base = .{ .tag = .int_signed },
+ .data = scalar_bits,
+ };
var vec_pl = Type.Payload.Array{
.base = .{ .tag = .vector },
.data = .{
- .len = @divExact(128, ty_bits),
- .elem_type = ty,
+ .len = @divExact(abi_size * 8, scalar_bits),
+ .elem_type = Type.initPayload(&int_pl.base),
},
};
const vec_ty = Type.initPayload(&vec_pl.base);
-
- var sign_pl = Value.Payload.SubValue{
- .base = .{ .tag = .repeated },
- .data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*),
+ const sign_val = switch (tag) {
+ .neg => try vec_ty.minInt(stack.get(), self.target.*),
+ .fabs => try vec_ty.maxInt(stack.get(), self.target.*),
+ else => unreachable,
};
- const sign_val = Value.initPayload(&sign_pl.base);
const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
-
- const src_mcv = try self.resolveInst(un_op);
- const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
- src_mcv
+ const sign_mem = if (sign_mcv.isMemory())
+ sign_mcv.mem(Memory.PtrSize.fromSize(abi_size))
else
- try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
- const dst_lock = self.register_manager.lockReg(dst_mcv.register);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+ Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
+ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) },
+ });
- const tag = self.air.instructions.items(.tag)[inst];
- try self.genBinOpMir(switch (ty_bits) {
- // No point using an extra prefix byte for *pd which performs the same operation.
- 16, 32, 64, 128 => switch (tag) {
- .neg => .{ ._ps, .xor },
- .fabs => .{ ._ps, .andn },
+ if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
+ switch (scalar_bits) {
+ 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
+ .neg => .{ .vp_, .xor },
+ .fabs => .{ .vp_, .@"and" },
+ else => unreachable,
+ } else switch (tag) {
+ .neg => .{ .v_ps, .xor },
+ .fabs => .{ .v_ps, .@"and" },
+ else => unreachable,
+ },
+ 32 => switch (tag) {
+ .neg => .{ .v_ps, .xor },
+ .fabs => .{ .v_ps, .@"and" },
+ else => unreachable,
+ },
+ 64 => switch (tag) {
+ .neg => .{ .v_pd, .xor },
+ .fabs => .{ .v_pd, .@"and" },
+ else => unreachable,
+ },
+ 80 => return self.fail("TODO implement airFloatSign for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
else => unreachable,
},
- 80 => return self.fail("TODO implement airFloatSign for {}", .{
- ty.fmt(self.bin_file.options.module.?),
- }),
- else => unreachable,
- }, vec_ty, dst_mcv, sign_mcv);
+ registerAlias(dst_reg, abi_size),
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, src_mcv), abi_size),
+ sign_mem,
+ ) else try self.asmRegisterMemory(
+ switch (scalar_bits) {
+ 16, 128 => switch (tag) {
+ .neg => .{ .p_, .xor },
+ .fabs => .{ .p_, .@"and" },
+ else => unreachable,
+ },
+ 32 => switch (tag) {
+ .neg => .{ ._ps, .xor },
+ .fabs => .{ ._ps, .@"and" },
+ else => unreachable,
+ },
+ 64 => switch (tag) {
+ .neg => .{ ._pd, .xor },
+ .fabs => .{ ._pd, .@"and" },
+ else => unreachable,
+ },
+ 80 => return self.fail("TODO implement airFloatSign for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
+ else => unreachable,
+ },
+ registerAlias(dst_reg, abi_size),
+ sign_mem,
+ );
return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
}
@@ -8593,7 +8668,6 @@ const MoveStrategy = union(enum) {
const InsertExtract = struct {
insert: Mir.Inst.FixedTag,
extract: Mir.Inst.FixedTag,
- imm: Immediate,
};
};
fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
@@ -8603,17 +8677,15 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
.insert = .{ .vp_w, .insr },
.extract = .{ .vp_w, .extr },
- .imm = Immediate.u(0),
} } else .{ .insert_extract = .{
.insert = .{ .p_w, .insr },
.extract = .{ .p_w, .extr },
- .imm = Immediate.u(0),
} },
32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } },
64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } },
128 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
else => {},
},
.Vector => switch (ty.childType().zigTypeTag()) {
@@ -8622,101 +8694,120 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
.insert = .{ .vp_b, .insr },
.extract = .{ .vp_b, .extr },
- .imm = Immediate.u(0),
} } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
.insert = .{ .p_b, .insr },
.extract = .{ .p_b, .extr },
- .imm = Immediate.u(0),
} },
2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
.insert = .{ .vp_w, .insr },
.extract = .{ .vp_w, .extr },
- .imm = Immediate.u(0),
} } else .{ .insert_extract = .{
.insert = .{ .p_w, .insr },
.extract = .{ .p_w, .extr },
- .imm = Immediate.u(0),
} },
3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
+ .{ .v_d, .mov }
else
- .{ ._ss, .mov } },
+ .{ ._d, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ .{ .v_q, .mov }
else
- .{ ._sd, .mov } },
+ .{ ._q, .mov } },
+ 9...16 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 17...32 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
16 => switch (ty.vectorLen()) {
1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
.insert = .{ .vp_w, .insr },
.extract = .{ .vp_w, .extr },
- .imm = Immediate.u(0),
} } else .{ .insert_extract = .{
.insert = .{ .p_w, .insr },
.extract = .{ .p_w, .extr },
- .imm = Immediate.u(0),
} },
2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
+ .{ .v_d, .mov }
else
- .{ ._ss, .mov } },
+ .{ ._d, .mov } },
3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ .{ .v_q, .mov }
else
- .{ ._sd, .mov } },
+ .{ ._q, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ps, .mov }
- else
- .{ ._ps, .mov } },
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 9...16 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
32 => switch (ty.vectorLen()) {
1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
+ .{ .v_d, .mov }
else
- .{ ._ss, .mov } },
+ .{ ._d, .mov } },
2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ .{ .v_q, .mov }
else
- .{ ._sd, .mov } },
+ .{ ._q, .mov } },
3...4 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
5...8 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
64 => switch (ty.vectorLen()) {
1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ .{ .v_q, .mov }
else
- .{ ._sd, .mov } },
+ .{ ._q, .mov } },
2 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
3...4 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ 128 => switch (ty.vectorLen()) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 2 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ 256 => switch (ty.vectorLen()) {
+ 1 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
else => {},
},
.Float => switch (ty.childType().floatBits(self.target.*)) {
16 => switch (ty.vectorLen()) {
- 1 => {},
+ 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
+ .{ .v_d, .mov }
else
- .{ ._ss, .mov } },
+ .{ ._d, .mov } },
3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ .{ .v_q, .mov }
else
- .{ ._sd, .mov } },
+ .{ ._q, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
9...16 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
32 => switch (ty.vectorLen()) {
@@ -8741,18 +8832,18 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
else
.{ ._sd, .mov } },
2 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
+ else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
3...4 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
+ return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } },
else => {},
},
128 => switch (ty.vectorLen()) {
1 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
2 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
else => {},
@@ -8860,29 +8951,69 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
);
}
},
- .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister(
- if ((dst_reg.class() == .sse) == (src_reg.class() == .sse))
- switch (ty.zigTypeTag()) {
- else => .{ ._, .mov },
- .Float, .Vector => .{ ._ps, .mova },
- }
- else switch (abi_size) {
- 2 => return try self.asmRegisterRegisterImmediate(
- if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr },
- registerAlias(dst_reg, 4),
- registerAlias(src_reg, 4),
- Immediate.u(0),
+ .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
+ .general_purpose => switch (src_reg.class()) {
+ .general_purpose => try self.asmRegisterRegister(
+ .{ ._, .mov },
+ registerAlias(dst_reg, abi_size),
+ registerAlias(src_reg, abi_size),
),
- 4 => .{ ._d, .mov },
- 8 => .{ ._q, .mov },
- else => return self.fail(
- "unsupported register copy from {s} to {s}",
- .{ @tagName(src_reg), @tagName(dst_reg) },
+ .segment => try self.asmRegisterRegister(
+ .{ ._, .mov },
+ registerAlias(dst_reg, abi_size),
+ src_reg,
),
+ .sse => try self.asmRegisterRegister(
+ switch (abi_size) {
+ 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ else => unreachable,
+ },
+ registerAlias(dst_reg, @max(abi_size, 4)),
+ src_reg.to128(),
+ ),
+ .x87, .mmx => unreachable,
},
- registerAlias(dst_reg, abi_size),
- registerAlias(src_reg, abi_size),
- ),
+ .segment => try self.asmRegisterRegister(
+ .{ ._, .mov },
+ dst_reg,
+ switch (src_reg.class()) {
+ .general_purpose, .segment => registerAlias(src_reg, abi_size),
+ .sse => try self.copyToTmpRegister(ty, src_mcv),
+ .x87, .mmx => unreachable,
+ },
+ ),
+ .sse => switch (src_reg.class()) {
+ .general_purpose => try self.asmRegisterRegister(
+ switch (abi_size) {
+ 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ else => unreachable,
+ },
+ dst_reg.to128(),
+ registerAlias(src_reg, @max(abi_size, 4)),
+ ),
+ .segment => try self.genSetReg(
+ dst_reg,
+ ty,
+ .{ .register = try self.copyToTmpRegister(ty, src_mcv) },
+ ),
+ .sse => try self.asmRegisterRegister(
+ switch (ty.scalarType().zigTypeTag()) {
+ else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
+ .Float => switch (ty.floatBits(self.target.*)) {
+ else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
+ 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
+ 64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
+ },
+ },
+ registerAlias(dst_reg, abi_size),
+ registerAlias(src_reg, abi_size),
+ ),
+ .x87, .mmx => unreachable,
+ },
+ .x87, .mmx => unreachable,
+ },
.register_offset,
.indirect,
.load_frame,
@@ -8918,14 +9049,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
ie.insert,
dst_alias,
src_mem,
- ie.imm,
+ Immediate.u(0),
),
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
ie.insert,
dst_alias,
dst_alias,
src_mem,
- ie.imm,
+ Immediate.u(0),
),
}
},
@@ -8947,14 +9078,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
ie.insert,
dst_alias,
src_mem,
- ie.imm,
+ Immediate.u(0),
),
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
ie.insert,
dst_alias,
dst_alias,
src_mem,
- ie.imm,
+ Immediate.u(0),
),
}
},
@@ -8994,14 +9125,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
ie.insert,
dst_alias,
src_mem,
- ie.imm,
+ Immediate.u(0),
),
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
ie.insert,
dst_alias,
dst_alias,
src_mem,
- ie.imm,
+ Immediate.u(0),
),
}
},
@@ -9129,7 +9260,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
ie.extract,
dst_mem,
src_alias,
- ie.imm,
+ Immediate.u(0),
),
}
},
@@ -10499,7 +10630,7 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void {
if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(src_ty, dst_mcv, src_mcv);
+ try self.genCopy(union_ty, dst_mcv, src_mcv);
break :result dst_mcv;
}
@@ -11000,7 +11131,15 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
reg.to64()
else
unreachable,
- .segment, .x87, .mmx => unreachable,
+ .segment => if (size_bytes <= 2)
+ reg
+ else
+ unreachable,
+ .x87 => unreachable,
+ .mmx => if (size_bytes <= 8)
+ reg
+ else
+ unreachable,
.sse => if (size_bytes <= 16)
reg.to128()
else if (size_bytes <= 32)
src/arch/x86_64/Encoding.zig
@@ -261,7 +261,8 @@ pub const Mnemonic = enum {
// X87
fisttp, fld,
// MMX
- movd,
+ movd, movq,
+ pand, pandn, por, pxor,
// SSE
addps, addss,
andps,
@@ -293,7 +294,8 @@ pub const Mnemonic = enum {
maxpd, maxsd,
minpd, minsd,
movapd,
- movq, //movd, movsd,
+ movdqa, movdqu,
+ //movsd,
movupd,
mulpd, mulsd,
orpd,
@@ -316,6 +318,7 @@ pub const Mnemonic = enum {
roundpd, roundps, roundsd, roundss,
// AVX
vaddpd, vaddps, vaddsd, vaddss,
+ vandnpd, vandnps, vandpd, vandps,
vbroadcastf128, vbroadcastsd, vbroadcastss,
vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
@@ -327,22 +330,31 @@ pub const Mnemonic = enum {
vmaxpd, vmaxps, vmaxsd, vmaxss,
vminpd, vminps, vminsd, vminss,
vmovapd, vmovaps,
- vmovddup, vmovhlps, vmovlhps,
+ vmovd,
+ vmovddup,
+ vmovdqa, vmovdqu,
+ vmovhlps, vmovlhps,
+ vmovq,
vmovsd,
vmovshdup, vmovsldup,
vmovss,
vmovupd, vmovups,
vmulpd, vmulps, vmulsd, vmulss,
+ vorpd, vorps,
+ vpand, vpandn,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
+ vpor,
vpshufhw, vpshuflw,
vpsrld, vpsrlq, vpsrlw,
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
+ vpxor,
vroundpd, vroundps, vroundsd, vroundss,
vshufpd, vshufps,
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
vsubpd, vsubps, vsubsd, vsubss,
+ vxorpd, vxorps,
// F16C
vcvtph2ps, vcvtps2ph,
// FMA
src/arch/x86_64/encodings.zig
@@ -970,11 +970,16 @@ pub const table = [_]Entry{
.{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 },
.{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 },
- .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
-
.{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 },
+ .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 },
+ .{ .movdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .none, .sse2 },
+ .{ .movdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .none, .sse2 },
+
+ .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 },
+ .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 },
+
.{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 },
@@ -987,10 +992,16 @@ pub const table = [_]Entry{
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
+ .{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 },
+
+ .{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 },
+
.{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 },
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
+ .{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 },
+
.{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 },
.{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 },
@@ -1012,6 +1023,8 @@ pub const table = [_]Entry{
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
.{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 },
+ .{ .pxor, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .none, .sse2 },
+
.{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 },
.{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 },
@@ -1070,6 +1083,18 @@ pub const table = [_]Entry{
.{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
+ .{ .vandnpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
+ .{ .vandnpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
+
+ .{ .vandnps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
+ .{ .vandnps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
+
+ .{ .vandpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
+ .{ .vandpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
+
+ .{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
+ .{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
+
.{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
.{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
@@ -1169,13 +1194,31 @@ pub const table = [_]Entry{
.{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
.{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx },
+ .{ .vmovd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w0, .avx },
+ .{ .vmovq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w1, .avx },
+ .{ .vmovd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w0, .avx },
+ .{ .vmovq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w1, .avx },
+
.{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
.{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx },
+ .{ .vmovdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqa, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
+ .{ .vmovdqa, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
+
+ .{ .vmovdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqu, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
+ .{ .vmovdqu, .mr, &.{ .ymm_m256, .ymm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
+
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
.{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
+ .{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx },
+ .{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx },
+
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
@@ -1212,6 +1255,16 @@ pub const table = [_]Entry{
.{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
+ .{ .vorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
+ .{ .vorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
+
+ .{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
+ .{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
+
+ .{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx },
+
+ .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx },
+
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
@@ -1225,6 +1278,8 @@ pub const table = [_]Entry{
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
+ .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
+
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx },
.{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx },
.{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx },
@@ -1242,6 +1297,8 @@ pub const table = [_]Entry{
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx },
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx },
+ .{ .vpxor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_128_wig, .avx },
+
.{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx },
.{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx },
@@ -1278,6 +1335,12 @@ pub const table = [_]Entry{
.{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
+ .{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
+ .{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
+
+ .{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
+ .{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
+
// F16C
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
@@ -1313,6 +1376,12 @@ pub const table = [_]Entry{
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
+ .{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
+
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
@@ -1329,5 +1398,7 @@ pub const table = [_]Entry{
.{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 },
};
// zig fmt: on
src/arch/x86_64/Mir.zig
@@ -236,6 +236,14 @@ pub const Inst = struct {
/// VEX-Encoded ___
v_,
+ /// VEX-Encoded ___ Byte
+ v_b,
+ /// VEX-Encoded ___ Word
+ v_w,
+ /// VEX-Encoded ___ Doubleword
+ v_d,
+ /// VEX-Encoded ___ QuadWord
+ v_q,
/// VEX-Encoded Packed ___
vp_,
/// VEX-Encoded Packed ___ Byte
@@ -526,6 +534,10 @@ pub const Inst = struct {
cvttps2dq,
/// Convert with truncation scalar double-precision floating-point value to doubleword integer
cvttsd2si,
+ /// Move aligned packed integer values
+ movdqa,
+ /// Move unaligned packed integer values
+ movdqu,
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
/// Packed interleave shuffle of pairs of double-precision floating-point values
shuf,
src/type.zig
@@ -5433,8 +5433,18 @@ pub const Type = extern union {
}
}
+ // Works for vectors and vectors of integers.
+ pub fn maxInt(ty: Type, arena: Allocator, target: Target) !Value {
+ const scalar = try maxIntScalar(ty.scalarType(), arena, target);
+ if (ty.zigTypeTag() == .Vector and scalar.tag() != .the_only_possible_value) {
+ return Value.Tag.repeated.create(arena, scalar);
+ } else {
+ return scalar;
+ }
+ }
+
/// Asserts that self.zigTypeTag() == .Int.
- pub fn maxInt(self: Type, arena: Allocator, target: Target) !Value {
+ pub fn maxIntScalar(self: Type, arena: Allocator, target: Target) !Value {
assert(self.zigTypeTag() == .Int);
const info = self.intInfo(target);
test/behavior/floatop.zig
@@ -532,7 +532,6 @@ fn testFabs() !void {
test "@fabs with vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
test/behavior/math.zig
@@ -1612,7 +1612,6 @@ test "absFloat" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
test/behavior/translate_c_macros.zig
@@ -65,7 +65,6 @@ test "cast negative integer to pointer" {
test "casting to union with a macro" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO