Commit d211dc37c8

joachimschmidt557 <joachim.schmidt557@outlook.com>
2021-05-08 09:33:01
stage2 ARM: Overhaul of genArmBinOp
1 parent 2299e5f
Changed files (2)
src/codegen.zig
@@ -928,7 +928,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     // TODO separate architectures with registers from
                     // stack-based architectures (spu_2)
                     if (callee_preserved_regs.len > 0) {
-                        if (self.register_manager.tryAllocReg(inst)) |reg| {
+                        if (self.register_manager.tryAllocReg(inst, &.{})) |reg| {
                             return MCValue{ .register = registerAlias(reg, abi_size) };
                         }
                     }
@@ -940,6 +940,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
 
         pub fn spillInstruction(self: *Self, src: LazySrcLoc, reg: Register, inst: *ir.Inst) !void {
             const stack_mcv = try self.allocRegOrMem(inst, false);
+            log.debug("spilling {*} to stack mcv {any}", .{ inst, stack_mcv });
             const reg_mcv = self.getResolvedInstValue(inst);
             assert(reg == toCanonicalReg(reg_mcv.register));
             const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -951,7 +952,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         /// allocated. A second call to `copyToTmpRegister` may return the same register.
         /// This can have a side effect of spilling instructions to the stack to free up a register.
         fn copyToTmpRegister(self: *Self, src: LazySrcLoc, ty: Type, mcv: MCValue) !Register {
-            const reg = try self.register_manager.allocRegWithoutTracking();
+            const reg = try self.register_manager.allocRegWithoutTracking(&.{});
             try self.genSetReg(src, ty, reg, mcv);
             return reg;
         }
@@ -960,7 +961,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         /// `reg_owner` is the instruction that gets associated with the register in the register table.
         /// This can have a side effect of spilling instructions to the stack to free up a register.
         fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue {
-            const reg = try self.register_manager.allocReg(reg_owner);
+            const reg = try self.register_manager.allocReg(reg_owner, &.{});
             try self.genSetReg(reg_owner.src, reg_owner.ty, reg, mcv);
             return MCValue{ .register = reg };
         }
@@ -1380,36 +1381,124 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             }
         }
 
+        fn armOperandShouldBeRegister(self: *Self, src: LazySrcLoc, mcv: MCValue) !bool {
+            return switch (mcv) {
+                .none => unreachable,
+                .undef => unreachable,
+                .dead, .unreach => unreachable,
+                .compare_flags_unsigned => unreachable,
+                .compare_flags_signed => unreachable,
+                .ptr_stack_offset => unreachable,
+                .ptr_embedded_in_code => unreachable,
+                .immediate => |imm| blk: {
+                    if (imm > std.math.maxInt(u32)) return self.fail(src, "TODO ARM binary arithmetic immediate larger than u32", .{});
+
+                    // Load immediate into register if it doesn't fit
+                    // in an operand
+                    break :blk Instruction.Operand.fromU32(@intCast(u32, imm)) == null;
+                },
+                .register => true,
+                .stack_offset,
+                .embedded_in_code,
+                .memory,
+                => true,
+            };
+        }
+
         fn genArmBinOp(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, op: ir.Inst.Tag) !MCValue {
             const lhs = try self.resolveInst(op_lhs);
             const rhs = try self.resolveInst(op_rhs);
 
+            const lhs_is_register = lhs == .register;
+            const rhs_is_register = rhs == .register;
+            const lhs_should_be_register = try self.armOperandShouldBeRegister(op_lhs.src, lhs);
+            const rhs_should_be_register = try self.armOperandShouldBeRegister(op_rhs.src, rhs);
+            const reuse_lhs = lhs_is_register and self.reuseOperand(inst, 0, lhs);
+            const reuse_rhs = !reuse_lhs and rhs_is_register and self.reuseOperand(inst, 1, rhs);
+
             // Destination must be a register
             var dst_mcv: MCValue = undefined;
-            var lhs_mcv: MCValue = undefined;
-            var rhs_mcv: MCValue = undefined;
-            if (self.reuseOperand(inst, 0, lhs)) {
-                // LHS is the destination
-                // RHS is the source
-                lhs_mcv = if (lhs != .register) try self.copyToNewRegister(inst, lhs) else lhs;
-                rhs_mcv = rhs;
-                dst_mcv = lhs_mcv;
-            } else if (self.reuseOperand(inst, 1, rhs)) {
-                // RHS is the destination
-                // LHS is the source
-                lhs_mcv = lhs;
-                rhs_mcv = if (rhs != .register) try self.copyToNewRegister(inst, rhs) else rhs;
-                dst_mcv = rhs_mcv;
+            var lhs_mcv = lhs;
+            var rhs_mcv = rhs;
+            var swap_lhs_and_rhs = false;
+
+            // Allocate registers for operands and/or destination
+            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+            if (reuse_lhs) {
+                // Allocate 0 or 1 registers
+                if (!rhs_is_register and rhs_should_be_register) {
+                    rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(op_rhs, &.{lhs.register}) };
+                    branch.inst_table.putAssumeCapacity(op_rhs, rhs_mcv);
+                }
+                dst_mcv = lhs;
+            } else if (reuse_rhs) {
+                // Allocate 0 or 1 registers
+                if (!lhs_is_register and lhs_should_be_register) {
+                    lhs_mcv = MCValue{ .register = try self.register_manager.allocReg(op_lhs, &.{rhs.register}) };
+                    branch.inst_table.putAssumeCapacity(op_lhs, lhs_mcv);
+                }
+                dst_mcv = rhs;
+
+                swap_lhs_and_rhs = true;
             } else {
-                // TODO save 1 copy instruction by directly allocating the destination register
-                // LHS is the destination
-                // RHS is the source
-                lhs_mcv = try self.copyToNewRegister(inst, lhs);
-                rhs_mcv = rhs;
-                dst_mcv = lhs_mcv;
+                // Allocate 1 or 2 registers
+                if (lhs_should_be_register and rhs_should_be_register) {
+                    if (lhs_is_register and rhs_is_register) {
+                        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{ lhs.register, rhs.register }) };
+                    } else if (lhs_is_register) {
+                        // Move RHS to register
+                        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{lhs.register}) };
+                        rhs_mcv = dst_mcv;
+                    } else if (rhs_is_register) {
+                        // Move LHS to register
+                        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{rhs.register}) };
+                        lhs_mcv = dst_mcv;
+                    } else {
+                        // Move LHS and RHS to register
+                        const regs = try self.register_manager.allocRegs(2, .{ inst, op_rhs }, &.{});
+                        lhs_mcv = MCValue{ .register = regs[0] };
+                        rhs_mcv = MCValue{ .register = regs[1] };
+                        dst_mcv = lhs_mcv;
+
+                        branch.inst_table.putAssumeCapacity(op_rhs, rhs_mcv);
+                    }
+                } else if (lhs_should_be_register) {
+                    // RHS is immediate
+                    if (lhs_is_register) {
+                        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{lhs.register}) };
+                    } else {
+                        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{}) };
+                        lhs_mcv = dst_mcv;
+                    }
+                } else if (rhs_should_be_register) {
+                    // LHS is immediate
+                    if (rhs_is_register) {
+                        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{rhs.register}) };
+                    } else {
+                        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{}) };
+                        rhs_mcv = dst_mcv;
+                    }
+
+                    swap_lhs_and_rhs = true;
+                } else unreachable; // binary operation on two immediates
             }
 
-            try self.genArmBinOpCode(inst.src, dst_mcv.register, lhs_mcv, rhs_mcv, op);
+            // Move the operands to the newly allocated registers
+            if (lhs_mcv == .register and !lhs_is_register) {
+                try self.genSetReg(op_lhs.src, op_lhs.ty, lhs_mcv.register, lhs);
+            }
+            if (rhs_mcv == .register and !rhs_is_register) {
+                try self.genSetReg(op_rhs.src, op_rhs.ty, rhs_mcv.register, rhs);
+            }
+
+            try self.genArmBinOpCode(
+                inst.src,
+                dst_mcv.register,
+                lhs_mcv,
+                rhs_mcv,
+                swap_lhs_and_rhs,
+                op,
+            );
             return dst_mcv;
         }
 
@@ -1419,11 +1508,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             dst_reg: Register,
             lhs_mcv: MCValue,
             rhs_mcv: MCValue,
+            swap_lhs_and_rhs: bool,
             op: ir.Inst.Tag,
         ) !void {
-            assert(lhs_mcv == .register or lhs_mcv == .register);
+            assert(lhs_mcv == .register or rhs_mcv == .register);
 
-            const swap_lhs_and_rhs = rhs_mcv == .register and lhs_mcv != .register;
             const op1 = if (swap_lhs_and_rhs) rhs_mcv.register else lhs_mcv.register;
             const op2 = if (swap_lhs_and_rhs) lhs_mcv else rhs_mcv;
 
@@ -1435,19 +1524,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .compare_flags_signed => unreachable,
                 .ptr_stack_offset => unreachable,
                 .ptr_embedded_in_code => unreachable,
-                .immediate => |imm| blk: {
-                    if (imm > std.math.maxInt(u32)) return self.fail(src, "TODO ARM binary arithmetic immediate larger than u32", .{});
-
-                    // Load immediate into register if it doesn't fit
-                    // as an operand
-                    break :blk Instruction.Operand.fromU32(@intCast(u32, imm)) orelse
-                        Instruction.Operand.reg(try self.copyToTmpRegister(src, Type.initTag(.u32), op2), Instruction.Operand.Shift.none);
-                },
+                .immediate => |imm| Instruction.Operand.fromU32(@intCast(u32, imm)).?,
                 .register => |reg| Instruction.Operand.reg(reg, Instruction.Operand.Shift.none),
                 .stack_offset,
                 .embedded_in_code,
                 .memory,
-                => Instruction.Operand.reg(try self.copyToTmpRegister(src, Type.initTag(.u32), op2), Instruction.Operand.Shift.none),
+                => unreachable,
             };
 
             switch (op) {
@@ -2613,10 +2695,42 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     const lhs = try self.resolveInst(inst.lhs);
                     const rhs = try self.resolveInst(inst.rhs);
 
-                    const src_mcv = rhs;
-                    const dst_mcv = if (lhs != .register) try self.copyToNewRegister(inst.lhs, lhs) else lhs;
+                    const lhs_is_register = lhs == .register;
+                    const rhs_is_register = rhs == .register;
+                    // lhs should always be a register
+                    const rhs_should_be_register = try self.armOperandShouldBeRegister(inst.rhs.src, rhs);
+
+                    var lhs_mcv = lhs;
+                    var rhs_mcv = rhs;
+
+                    // Allocate registers
+                    if (rhs_should_be_register) {
+                        if (!lhs_is_register and !rhs_is_register) {
+                            const regs = try self.register_manager.allocRegs(2, .{ inst.rhs, inst.lhs }, &.{});
+                            lhs_mcv = MCValue{ .register = regs[0] };
+                            rhs_mcv = MCValue{ .register = regs[1] };
+                        } else if (!rhs_is_register) {
+                            rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(inst.rhs, &.{}) };
+                        }
+                    }
+                    if (!lhs_is_register) {
+                        lhs_mcv = MCValue{ .register = try self.register_manager.allocReg(inst.lhs, &.{}) };
+                    }
+
+                    // Move the operands to the newly allocated registers
+                    const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+                    if (lhs_mcv == .register and !lhs_is_register) {
+                        try self.genSetReg(inst.lhs.src, inst.lhs.ty, lhs_mcv.register, lhs);
+                        branch.inst_table.putAssumeCapacity(inst.lhs, lhs);
+                    }
+                    if (rhs_mcv == .register and !rhs_is_register) {
+                        try self.genSetReg(inst.rhs.src, inst.rhs.ty, rhs_mcv.register, rhs);
+                        branch.inst_table.putAssumeCapacity(inst.rhs, rhs);
+                    }
+
+                    // The destination register is not present in the cmp instruction
+                    try self.genArmBinOpCode(inst.base.src, undefined, lhs_mcv, rhs_mcv, false, .cmp_eq);
 
-                    try self.genArmBinOpCode(inst.base.src, dst_mcv.register, dst_mcv, src_mcv, .cmp_eq);
                     const info = inst.lhs.ty.intInfo(self.target.*);
                     return switch (info.signedness) {
                         .signed => MCValue{ .compare_flags_signed = op },
src/register_manager.zig
@@ -1,5 +1,6 @@
 const std = @import("std");
 const math = std.math;
+const mem = std.mem;
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const ir = @import("ir.zig");
@@ -66,8 +67,13 @@ pub fn RegisterManager(
         }
 
         /// Returns `null` if all registers are allocated.
-        pub fn tryAllocRegs(self: *Self, comptime count: comptime_int, insts: [count]*ir.Inst) ?[count]Register {
-            if (self.tryAllocRegsWithoutTracking(count)) |regs| {
+        pub fn tryAllocRegs(
+            self: *Self,
+            comptime count: comptime_int,
+            insts: [count]*ir.Inst,
+            exceptions: []Register,
+        ) ?[count]Register {
+            if (self.tryAllocRegsWithoutTracking(count, exceptions)) |regs| {
                 for (regs) |reg, i| {
                     const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null
                     self.registers[index] = insts[i];
@@ -81,21 +87,30 @@ pub fn RegisterManager(
         }
 
         /// Returns `null` if all registers are allocated.
-        pub fn tryAllocReg(self: *Self, inst: *ir.Inst) ?Register {
-            return if (tryAllocRegs(self, 1, .{inst})) |regs| regs[0] else null;
+        pub fn tryAllocReg(self: *Self, inst: *ir.Inst, exceptions: []Register) ?Register {
+            return if (tryAllocRegs(self, 1, .{inst}, exceptions)) |regs| regs[0] else null;
         }
 
-        pub fn allocRegs(self: *Self, comptime count: comptime_int, insts: [count]*ir.Inst) ![count]Register {
+        pub fn allocRegs(
+            self: *Self,
+            comptime count: comptime_int,
+            insts: [count]*ir.Inst,
+            exceptions: []Register,
+        ) ![count]Register {
             comptime assert(count > 0 and count <= callee_preserved_regs.len);
+            assert(count + exceptions.len <= callee_preserved_regs.len);
 
-            return self.tryAllocRegs(count, insts) orelse blk: {
+            return self.tryAllocRegs(count, insts, exceptions) orelse blk: {
                 // We'll take over the first count registers. Spill
                 // the instructions that were previously there to a
                 // stack allocations.
                 var regs: [count]Register = undefined;
-                std.mem.copy(Register, &regs, callee_preserved_regs[0..count]);
+                var i: usize = 0;
+                for (callee_preserved_regs) |reg| {
+                    if (i >= count) break;
+                    if (mem.indexOfScalar(Register, exceptions, reg) != null) continue;
+                    regs[i] = reg;
 
-                for (regs) |reg, i| {
                     const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null
                     if (self.isRegFree(reg)) {
                         self.markRegUsed(reg);
@@ -104,21 +119,28 @@ pub fn RegisterManager(
                         try self.getFunction().spillInstruction(spilled_inst.src, reg, spilled_inst);
                     }
                     self.registers[index] = insts[i];
+
+                    i += 1;
                 }
 
                 break :blk regs;
             };
         }
 
-        pub fn allocReg(self: *Self, inst: *ir.Inst) !Register {
-            return (try self.allocRegs(1, .{inst}))[0];
+        pub fn allocReg(self: *Self, inst: *ir.Inst, exceptions: []Register) !Register {
+            return (try self.allocRegs(1, .{inst}, exceptions))[0];
         }
 
         /// Does not track the registers.
         /// Returns `null` if not enough registers are free.
-        pub fn tryAllocRegsWithoutTracking(self: *Self, comptime count: comptime_int) ?[count]Register {
+        pub fn tryAllocRegsWithoutTracking(
+            self: *Self,
+            comptime count: comptime_int,
+            exceptions: []Register,
+        ) ?[count]Register {
             comptime if (callee_preserved_regs.len == 0) return null;
             comptime assert(count > 0 and count <= callee_preserved_regs.len);
+            assert(count + exceptions.len <= callee_preserved_regs.len);
 
             const free_registers = @popCount(FreeRegInt, self.free_registers);
             if (free_registers < count) return null;
@@ -127,30 +149,35 @@ pub fn RegisterManager(
             var i: usize = 0;
             for (callee_preserved_regs) |reg| {
                 if (i >= count) break;
+                if (mem.indexOfScalar(Register, exceptions, reg) != null) continue;
                 if (self.isRegFree(reg)) {
                     regs[i] = reg;
                     i += 1;
                 }
             }
-            return regs;
+
+            return if (i < count) null else regs;
         }
 
         /// Does not track the register.
         /// Returns `null` if all registers are allocated.
-        pub fn tryAllocRegWithoutTracking(self: *Self) ?Register {
-            return if (self.tryAllocRegsWithoutTracking(1)) |regs| regs[0] else null;
+        pub fn tryAllocRegWithoutTracking(self: *Self, exceptions: []Register) ?Register {
+            return if (self.tryAllocRegsWithoutTracking(1, exceptions)) |regs| regs[0] else null;
         }
 
         /// Does not track the registers
-        pub fn allocRegsWithoutTracking(self: *Self, comptime count: comptime_int) ![count]Register {
-            return self.tryAllocRegsWithoutTracking(count) orelse blk: {
+        pub fn allocRegsWithoutTracking(self: *Self, comptime count: comptime_int, exceptions: []Register) ![count]Register {
+            return self.tryAllocRegsWithoutTracking(count, exceptions) orelse blk: {
                 // We'll take over the first count registers. Spill
                 // the instructions that were previously there to a
                 // stack allocations.
                 var regs: [count]Register = undefined;
-                std.mem.copy(Register, &regs, callee_preserved_regs[0..count]);
+                var i: usize = 0;
+                for (callee_preserved_regs) |reg| {
+                    if (i >= count) break;
+                    if (mem.indexOfScalar(Register, exceptions, reg) != null) continue;
+                    regs[i] = reg;
 
-                for (regs) |reg, i| {
                     const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null
                     if (!self.isRegFree(reg)) {
                         const spilled_inst = self.registers[index].?;
@@ -158,6 +185,8 @@ pub fn RegisterManager(
                         self.registers[index] = null;
                         self.markRegFree(reg);
                     }
+
+                    i += 1;
                 }
 
                 break :blk regs;
@@ -165,8 +194,8 @@ pub fn RegisterManager(
         }
 
         /// Does not track the register.
-        pub fn allocRegWithoutTracking(self: *Self) !Register {
-            return (try self.allocRegsWithoutTracking(1))[0];
+        pub fn allocRegWithoutTracking(self: *Self, exceptions: []Register) !Register {
+            return (try self.allocRegsWithoutTracking(1, exceptions))[0];
         }
 
         /// Allocates the specified register with the specified
@@ -270,9 +299,9 @@ test "tryAllocReg: no spilling" {
     try std.testing.expect(!function.register_manager.isRegAllocated(.r2));
     try std.testing.expect(!function.register_manager.isRegAllocated(.r3));
 
-    try std.testing.expectEqual(@as(?MockRegister, .r2), function.register_manager.tryAllocReg(&mock_instruction));
-    try std.testing.expectEqual(@as(?MockRegister, .r3), function.register_manager.tryAllocReg(&mock_instruction));
-    try std.testing.expectEqual(@as(?MockRegister, null), function.register_manager.tryAllocReg(&mock_instruction));
+    try std.testing.expectEqual(@as(?MockRegister, .r2), function.register_manager.tryAllocReg(&mock_instruction, &.{}));
+    try std.testing.expectEqual(@as(?MockRegister, .r3), function.register_manager.tryAllocReg(&mock_instruction, &.{}));
+    try std.testing.expectEqual(@as(?MockRegister, null), function.register_manager.tryAllocReg(&mock_instruction, &.{}));
 
     try std.testing.expect(function.register_manager.isRegAllocated(.r2));
     try std.testing.expect(function.register_manager.isRegAllocated(.r3));
@@ -301,16 +330,16 @@ test "allocReg: spilling" {
     try std.testing.expect(!function.register_manager.isRegAllocated(.r2));
     try std.testing.expect(!function.register_manager.isRegAllocated(.r3));
 
-    try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction));
-    try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction));
+    try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction, &.{}));
+    try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction, &.{}));
 
     // Spill a register
-    try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction));
+    try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction, &.{}));
     try std.testing.expectEqualSlices(MockRegister, &[_]MockRegister{.r2}, function.spilled.items);
 
     // No spilling necessary
     function.register_manager.freeReg(.r3);
-    try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction));
+    try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction, &.{}));
     try std.testing.expectEqualSlices(MockRegister, &[_]MockRegister{.r2}, function.spilled.items);
 }