Commit 941d3a2bb1
Changed files (1)
src
arch
x86_64
src/arch/x86_64/CodeGen.zig
@@ -5653,10 +5653,10 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
const dst_lock = self.register_manager.lockReg(dst_reg);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+ const wide_ty = if (src_bits <= 8) Type.u16 else src_ty;
if (self.hasFeature(.bmi)) {
if (src_bits <= 64) {
const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0);
- const wide_ty = if (src_bits <= 8) Type.u16 else src_ty;
const masked_mcv = if (extra_bits > 0) masked: {
const tmp_mcv = tmp: {
if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
@@ -5718,7 +5718,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
defer self.register_manager.unlockReg(wide_lock);
try self.truncateRegister(src_ty, wide_reg);
- try self.genBinOpMir(.{ ._, .bsf }, Type.u16, dst_mcv, .{ .register = wide_reg });
+ try self.genBinOpMir(.{ ._, .bsf }, wide_ty, dst_mcv, .{ .register = wide_reg });
} else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv);
const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2);
@@ -5890,6 +5890,7 @@ fn genByteSwap(
) !MCValue {
const mod = self.bin_file.comp.module.?;
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+ const have_movbe = self.hasFeature(.movbe);
if (src_ty.zigTypeTag(mod) == .Vector) return self.fail(
"TODO implement genByteSwap for {}",
@@ -5935,46 +5936,42 @@ fn genByteSwap(
const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs);
defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
- if (src_mcv.isMemory()) {
- try self.asmRegisterMemory(
- .{ ._, .movbe },
- dst_regs[0],
- try src_mcv.address().offset(8).deref().mem(self, .qword),
- );
- try self.asmRegisterMemory(.{ ._, .movbe }, dst_regs[1], try src_mcv.mem(self, .qword));
- } else for (dst_regs, src_mcv.register_pair) |dst_reg, src_reg| {
- try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to64(), src_reg.to64());
- try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
+ for (dst_regs, 0..) |dst_reg, limb_index| {
+ if (src_mcv.isMemory()) {
+ try self.asmRegisterMemory(
+ .{ ._, if (have_movbe) .movbe else .mov },
+ dst_reg.to64(),
+ try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .qword),
+ );
+ if (!have_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
+ } else {
+ try self.asmRegisterRegister(
+ .{ ._, .mov },
+ dst_reg.to64(),
+ src_mcv.register_pair[limb_index].to64(),
+ );
+ try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
+ }
}
- return .{ .register_pair = dst_regs };
+ return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } };
},
}
- if (src_mcv.isRegister()) {
- const dst_mcv: MCValue = if (mem_ok)
- try self.allocRegOrMem(inst, true)
- else
- .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) };
- if (dst_mcv.isRegister()) {
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
- defer self.register_manager.unlockReg(dst_lock);
-
- try self.genSetReg(dst_mcv.register, src_ty, src_mcv);
- switch (abi_size) {
- else => unreachable,
- 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
- 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv),
- }
- } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
- return dst_mcv;
- }
-
- const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
- const dst_mcv = MCValue{ .register = dst_reg };
- const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
- defer self.register_manager.unlockReg(dst_lock);
+ const dst_mcv: MCValue = if (mem_ok and have_movbe and src_mcv.isRegister())
+ try self.allocRegOrMem(inst, true)
+ else
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) };
+ if (dst_mcv.getReg()) |dst_reg| {
+ const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
+ defer self.register_manager.unlockReg(dst_lock);
- try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
+ try self.genSetReg(dst_reg, src_ty, src_mcv);
+ switch (abi_size) {
+ else => unreachable,
+ 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }),
+ 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv),
+ }
+ } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv);
return dst_mcv;
}