Commit 606f157a6b
Changed files (3)
src-self-hosted
codegen
test
stage2
src-self-hosted/codegen/x86_64.zig
@@ -81,6 +81,26 @@ pub const Register = enum(u8) {
else => null,
};
}
+
+ /// Convert from any register to its 64 bit alias.
+ pub fn to64(self: Register) Register {
+ return @intToEnum(Register, self.id());
+ }
+
+ /// Convert from any register to its 32 bit alias.
+ pub fn to32(self: Register) Register {
+ return @intToEnum(Register, @as(u8, self.id()) + 16);
+ }
+
+ /// Convert from any register to its 16 bit alias.
+ pub fn to16(self: Register) Register {
+ return @intToEnum(Register, @as(u8, self.id()) + 32);
+ }
+
+ /// Convert from any register to its 8 bit alias.
+ pub fn to8(self: Register) Register {
+ return @intToEnum(Register, @as(u8, self.id()) + 48);
+ }
};
// zig fmt: on
src-self-hosted/codegen.zig
@@ -328,6 +328,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
self.free_registers |= @as(FreeRegInt, 1) << shift;
}
+ /// Before calling, must ensureCapacity + 1 on branch.registers.
+ /// Returns `null` if all registers are allocated.
+ fn allocReg(self: *Branch, inst: *ir.Inst) ?Register {
+ const free_index = @ctz(FreeRegInt, self.free_registers);
+ if (free_index >= callee_preserved_regs.len) {
+ return null;
+ }
+ self.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
+ const reg = callee_preserved_regs[free_index];
+ self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
+ return reg;
+ }
+
fn deinit(self: *Branch, gpa: *Allocator) void {
self.inst_table.deinit(gpa);
self.registers.deinit(gpa);
@@ -502,8 +515,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
entry.value = .dead;
switch (prev_value) {
.register => |reg| {
- _ = branch.registers.remove(reg);
- branch.markRegFree(reg);
+ const reg64 = reg.to64();
+ _ = branch.registers.remove(reg64);
+ branch.markRegFree(reg64);
},
else => {}, // TODO process stack allocation death
}
@@ -582,30 +596,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
self.stack_align = abi_align;
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
- // TODO Make sure the type can fit in a register before we try to allocate one.
- const free_index = @ctz(FreeRegInt, branch.free_registers);
- if (free_index >= callee_preserved_regs.len) {
- const stack_offset = try self.allocMem(inst, abi_size, abi_align);
- return MCValue{ .stack_offset = stack_offset };
+ // Make sure the type can fit in a register before we try to allocate one.
+ const ptr_bits = arch.ptrBitWidth();
+ const ptr_bytes: u64 = @divExact(ptr_bits, 8);
+ if (abi_size <= ptr_bytes) {
+ try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
+ if (branch.allocReg(inst)) |reg| {
+ return MCValue{ .register = registerAlias(reg, abi_size) };
+ }
}
- branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
- const reg = callee_preserved_regs[free_index];
- try branch.registers.putNoClobber(self.gpa, reg, .{ .inst = inst });
- return MCValue{ .register = reg };
+ const stack_offset = try self.allocMem(inst, abi_size, abi_align);
+ return MCValue{ .stack_offset = stack_offset };
}
/// Does not "move" the instruction.
fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue {
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
- try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1);
- const free_index = @ctz(FreeRegInt, branch.free_registers);
- if (free_index >= callee_preserved_regs.len)
+ const reg = branch.allocReg(inst) orelse
return self.fail(inst.src, "TODO implement spilling register to stack", .{});
- branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
- const reg = callee_preserved_regs[free_index];
- branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
const old_mcv = branch.inst_table.get(inst).?;
const new_mcv: MCValue = .{ .register = reg };
try self.genSetReg(inst.src, reg, old_mcv);
@@ -1131,7 +1141,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// test reg, 1
// TODO detect al, ax, eax
try self.code.ensureCapacity(self.code.items.len + 4);
- self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 });
+ // TODO audit this codegen: we force w = true here to make
+ // the value affect the big register
+ self.rex(.{ .b = reg.isExtended(), .w = true });
self.code.appendSliceAssumeCapacity(&[_]u8{
0xf6,
@as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()),
@@ -1319,7 +1331,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (!self.wantSafety())
return; // The already existing value will do just fine.
// TODO Upgrade this to a memset call when we have that available.
- return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa });
+ switch (ty.abiSize(self.target.*)) {
+ 1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }),
+ 2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }),
+ 4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
+ 8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
+ else => return self.fail(src, "TODO implement memset", .{}),
+ }
},
.compare_flags_unsigned => |op| {
return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
@@ -1328,24 +1346,35 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
},
.immediate => |x_big| {
- if (ty.abiSize(self.target.*) != 4) {
- // TODO after fixing this, need to update the undef case above
- return self.fail(src, "TODO implement set non 4 abi size stack variable with immediate", .{});
+ if (stack_offset > 128) {
+ return self.fail(src, "TODO implement set stack variable with large stack offset", .{});
}
- try self.code.ensureCapacity(self.code.items.len + 7);
- if (x_big <= math.maxInt(u32)) {
- const x = @intCast(u32, x_big);
- if (stack_offset > 128) {
- return self.fail(src, "TODO implement set stack variable with large stack offset", .{});
- }
- // We have a positive stack offset value but we want a twos complement negative
- // offset from rbp, which is at the top of the stack frame.
- const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
- const twos_comp = @bitCast(u8, negative_offset);
- // mov DWORD PTR [rbp+offset], immediate
- self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
- mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
- } else {
+ try self.code.ensureCapacity(self.code.items.len + 8);
+ switch (ty.abiSize(self.target.*)) {
+ 1 => {
+ return self.fail(src, "TODO implement set abi_size=1 stack variable with immediate", .{});
+ },
+ 2 => {
+ return self.fail(src, "TODO implement set abi_size=2 stack variable with immediate", .{});
+ },
+ 4 => {
+ const x = @intCast(u32, x_big);
+ // We have a positive stack offset value but we want a twos complement negative
+ // offset from rbp, which is at the top of the stack frame.
+ const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
+ const twos_comp = @bitCast(u8, negative_offset);
+ // mov DWORD PTR [rbp+offset], immediate
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
+ mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
+ },
+ 8 => {
+ return self.fail(src, "TODO implement set abi_size=8 stack variable with immediate", .{});
+ },
+ else => {
+ return self.fail(src, "TODO implement set abi_size=large stack variable with immediate", .{});
+ },
+ }
+ if (x_big <= math.maxInt(u32)) {} else {
return self.fail(src, "TODO implement set stack variable with large immediate", .{});
}
},
@@ -1407,7 +1436,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
},
.compare_flags_unsigned => |op| {
try self.code.ensureCapacity(self.code.items.len + 3);
- self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 });
+ // TODO audit this codegen: we force w = true here to make
+ // the value affect the big register
+ self.rex(.{ .b = reg.isExtended(), .w = true });
const opcode: u8 = switch (op) {
.gte => 0x93,
.gt => 0x97,
@@ -1423,9 +1454,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return self.fail(src, "TODO set register with compare flags value (signed)", .{});
},
.immediate => |x| {
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
// 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
// register is the fastest way to zero a register.
if (x == 0) {
@@ -1478,16 +1506,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
//
// In this case, the encoding of the REX byte is 0b0100100B
try self.code.ensureCapacity(self.code.items.len + 10);
- self.rex(.{ .w = true, .b = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
self.code.items.len += 9;
self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111);
const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
mem.writeIntLittle(u64, imm_ptr, x);
},
.embedded_in_code => |code_offset| {
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
// We need the offset from RIP in a signed i32 twos complement.
// The instruction is 7 bytes long and RIP points to the next instruction.
try self.code.ensureCapacity(self.code.items.len + 7);
@@ -1495,7 +1520,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three
// bits as five.
// REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id.
- self.rex(.{ .w = true, .b = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
self.code.items.len += 6;
const rip = self.code.items.len;
const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
@@ -1507,12 +1532,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
},
.register => |src_reg| {
// If the registers are the same, nothing to do.
- if (src_reg == reg)
+ if (src_reg.id() == reg.id())
return;
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
// This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX.
// This is thus three bytes: REX 0x8B R/M.
// If the destination is extended, the R field must be 1.
@@ -1520,14 +1542,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// Since the register is being accessed directly, the R/M mode is three. The reg field (the middle
// three bits) contain the destination, and the R/M field (the lower three bits) contain the source.
try self.code.ensureCapacity(self.code.items.len + 3);
- self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended(), .b = src_reg.isExtended() });
const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111);
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R });
},
.memory => |x| {
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
if (x <= math.maxInt(u32)) {
// Moving from memory to a register is a variant of `8B /r`.
// Since we're using 64-bit moves, we require a REX.
@@ -1537,7 +1556,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// 0b00RRR100, where RRR is the lower three bits of the register ID.
// The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
try self.code.ensureCapacity(self.code.items.len + 8);
- self.rex(.{ .w = true, .b = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
self.code.appendSliceAssumeCapacity(&[_]u8{
0x8B,
0x04 | (@as(u8, reg.id() & 0b111) << 3), // R
@@ -1580,18 +1599,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
//
// Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both*
// register operands need to be marked as extended.
- self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() });
const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
}
}
},
.stack_offset => |off| {
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
try self.code.ensureCapacity(self.code.items.len + 7);
- self.rex(.{ .w = true, .r = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
const reg_id: u8 = @truncate(u3, reg.id());
if (off <= 128) {
// Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f]
@@ -1750,11 +1766,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
for (param_types) |ty, i| {
switch (ty.zigTypeTag()) {
.Bool, .Int => {
+ const param_size = @intCast(u32, ty.abiSize(self.target.*));
if (next_int_reg >= c_abi_int_param_regs.len) {
result.args[i] = .{ .stack_offset = next_stack_offset };
- next_stack_offset += @intCast(u32, ty.abiSize(self.target.*));
+ next_stack_offset += param_size;
} else {
- result.args[i] = .{ .register = c_abi_int_param_regs[next_int_reg] };
+ const aliased_reg = registerAlias(
+ c_abi_int_param_regs[next_int_reg],
+ param_size,
+ );
+ result.args[i] = .{ .register = aliased_reg };
next_int_reg += 1;
}
},
@@ -1778,7 +1799,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
.x86_64 => switch (cc) {
.Naked => unreachable,
.Unspecified, .C => {
- result.return_value = .{ .register = c_abi_int_return_regs[0] };
+ const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
+ const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size);
+ result.return_value = .{ .register = aliased_reg };
},
else => return self.fail(src, "TODO implement function return values for {}", .{cc}),
},
@@ -1825,5 +1848,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
fn parseRegName(name: []const u8) ?Register {
return std.meta.stringToEnum(Register, name);
}
+
+ fn registerAlias(reg: Register, size_bytes: u32) Register {
+ switch (arch) {
+ // For x86_64 we have to pick a smaller register alias depending on abi size.
+ .x86_64 => switch (size_bytes) {
+ 1 => return reg.to8(),
+ 2 => return reg.to16(),
+ 4 => return reg.to32(),
+ 8 => return reg.to64(),
+ else => unreachable,
+ },
+ else => return reg,
+ }
+ }
};
}
test/stage2/compare_output.zig
@@ -363,5 +363,39 @@ pub fn addCases(ctx: *TestContext) !void {
,
"",
);
+
+ // Local mutable variables.
+ case.addCompareOutput(
+ \\export fn _start() noreturn {
+ \\ assert(add(3, 4) == 7);
+ \\ assert(add(20, 10) == 30);
+ \\
+ \\ exit();
+ \\}
+ \\
+ \\fn add(a: u32, b: u32) u32 {
+ \\ var x: u32 = undefined;
+ \\ x = 0;
+ \\ x += a;
+ \\ x += b;
+ \\ return x;
+ \\}
+ \\
+ \\pub fn assert(ok: bool) void {
+ \\ if (!ok) unreachable; // assertion failure
+ \\}
+ \\
+ \\fn exit() noreturn {
+ \\ asm volatile ("syscall"
+ \\ :
+ \\ : [number] "{rax}" (231),
+ \\ [arg1] "{rdi}" (0)
+ \\ : "rcx", "r11", "memory"
+ \\ );
+ \\ unreachable;
+ \\}
+ ,
+ "",
+ );
}
}