Commit 8204ad1937

joachimschmidt557 <joachim.schmidt557@outlook.com>
2022-02-05 20:51:11
stage2 AArch64: implement slice_len and slice_elem_val
1 parent 82f91ad
Changed files (3)
src/arch/aarch64/CodeGen.zig
@@ -1164,7 +1164,20 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement slice_len for {}", .{self.target.cpu.arch});
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
+        const mcv = try self.resolveInst(ty_op.operand);
+        switch (mcv) {
+            .dead, .unreach => unreachable,
+            .register => unreachable, // a slice doesn't fit in one register
+            .stack_offset => |off| {
+                break :result MCValue{ .stack_offset = off + 8 };
+            },
+            .memory => |addr| {
+                break :result MCValue{ .memory = addr + 8 };
+            },
+            else => return self.fail("TODO implement slice_len for {}", .{mcv}),
+        }
+    };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
@@ -1183,10 +1196,114 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
 fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
     const is_volatile = false; // TODO
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
-    const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement slice_elem_val for {}", .{self.target.cpu.arch});
+
+    if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
+    const result: MCValue = result: {
+        const slice_mcv = try self.resolveInst(bin_op.lhs);
+
+        // TODO optimize for the case where the index is a constant,
+        // i.e. index_mcv == .immediate
+        const index_mcv = try self.resolveInst(bin_op.rhs);
+        const index_is_register = index_mcv == .register;
+
+        const slice_ty = self.air.typeOf(bin_op.lhs);
+        const elem_ty = slice_ty.childType();
+        const elem_size = elem_ty.abiSize(self.target.*);
+
+        var buf: Type.SlicePtrFieldTypeBuffer = undefined;
+        const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
+
+        if (index_is_register) self.register_manager.freezeRegs(&.{index_mcv.register});
+        defer if (index_is_register) self.register_manager.unfreezeRegs(&.{index_mcv.register});
+
+        const base_mcv: MCValue = switch (slice_mcv) {
+            .stack_offset => |off| .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, .{ .stack_offset = off + 8 }) },
+            else => return self.fail("TODO slice_elem_val when slice is {}", .{slice_mcv}),
+        };
+        self.register_manager.freezeRegs(&.{base_mcv.register});
+
+        // TODO implement optimized ldr for airSliceElemVal
+        const dst_mcv = try self.allocRegOrMem(inst, true);
+
+        const offset_mcv = try self.genMulConstant(bin_op.rhs, @intCast(u32, elem_size));
+        assert(offset_mcv == .register); // result of multiplication should always be register
+        self.register_manager.freezeRegs(&.{offset_mcv.register});
+
+        const addr_reg = try self.register_manager.allocReg(null);
+        self.register_manager.freezeRegs(&.{addr_reg});
+        defer self.register_manager.unfreezeRegs(&.{addr_reg});
+
+        _ = try self.addInst(.{
+            .tag = .add_shifted_register,
+            .data = .{ .rrr_imm6_shift = .{
+                .rd = addr_reg,
+                .rn = base_mcv.register,
+                .rm = offset_mcv.register,
+                .imm6 = 0,
+                .shift = .lsl,
+            } },
+        });
+
+        // At this point in time, neither the base register
+        // nor the offset register contains any valuable data
+        // anymore.
+        self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
+
+        try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
+
+        break :result dst_mcv;
+    };
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
+fn genMulConstant(self: *Self, op: Air.Inst.Ref, imm: u32) !MCValue {
+    const lhs = try self.resolveInst(op);
+    const rhs = MCValue{ .immediate = imm };
+
+    const lhs_is_register = lhs == .register;
+
+    if (lhs_is_register) self.register_manager.freezeRegs(&.{lhs.register});
+    defer if (lhs_is_register) self.register_manager.unfreezeRegs(&.{lhs.register});
+
+    // Destination must be a register
+    // LHS must be a register
+    // RHS must be a register
+    var dst_mcv: MCValue = undefined;
+    var lhs_mcv: MCValue = lhs;
+    var rhs_mcv: MCValue = rhs;
+
+    // Allocate registers for operands and/or destination
+    // Allocate 1 or 2 registers
+    if (lhs_is_register) {
+        // Move RHS to register
+        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
+        rhs_mcv = dst_mcv;
+    } else {
+        // Move LHS and RHS to register
+        const regs = try self.register_manager.allocRegs(2, .{ null, null });
+        lhs_mcv = MCValue{ .register = regs[0] };
+        rhs_mcv = MCValue{ .register = regs[1] };
+        dst_mcv = lhs_mcv;
+    }
+
+    // Move the operands to the newly allocated registers
+    if (!lhs_is_register) {
+        try self.genSetReg(self.air.typeOf(op), lhs_mcv.register, lhs);
+    }
+    try self.genSetReg(Type.initTag(.usize), rhs_mcv.register, rhs);
+
+    _ = try self.addInst(.{
+        .tag = .mul,
+        .data = .{ .rrr = .{
+            .rd = dst_mcv.register,
+            .rn = lhs_mcv.register,
+            .rm = rhs_mcv.register,
+        } },
+    });
+
+    return dst_mcv;
+}
+
 fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void {
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
@@ -1310,6 +1427,16 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
                 .undef => unreachable,
                 .compare_flags_signed, .compare_flags_unsigned => unreachable,
                 .embedded_in_code => unreachable,
+                .register => |dst_reg| {
+                    _ = try self.addInst(.{
+                        .tag = .ldr_immediate,
+                        .data = .{ .load_store_register_immediate = .{
+                            .rt = dst_reg,
+                            .rn = addr_reg,
+                            .offset = Instruction.LoadStoreOffset.none.immediate,
+                        } },
+                    });
+                },
                 .stack_offset => |off| {
                     if (elem_ty.abiSize(self.target.*) <= 8) {
                         const tmp_reg = try self.register_manager.allocReg(null);
@@ -2590,8 +2717,61 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
             if (stack_offset == off)
                 return; // Copy stack variable to itself; nothing to do.
 
-            const reg = try self.copyToTmpRegister(ty, mcv);
-            return self.genSetStack(ty, stack_offset, MCValue{ .register = reg });
+            const ptr_bits = self.target.cpu.arch.ptrBitWidth();
+            const ptr_bytes: u64 = @divExact(ptr_bits, 8);
+            if (ty.abiSize(self.target.*) <= ptr_bytes) {
+                const reg = try self.copyToTmpRegister(ty, mcv);
+                return self.genSetStack(ty, stack_offset, MCValue{ .register = reg });
+            } else {
+                // TODO optimize the register allocation
+                const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null });
+                self.register_manager.freezeRegs(&regs);
+                defer self.register_manager.unfreezeRegs(&regs);
+
+                const src_reg = regs[0];
+                const dst_reg = regs[1];
+                const len_reg = regs[2];
+                const count_reg = regs[3];
+                const tmp_reg = regs[4];
+
+                // sub src_reg, fp, #off
+                const adj_src_offset = off + @intCast(u32, ty.abiSize(self.target.*));
+                const src_offset = math.cast(u12, adj_src_offset) catch return self.fail("TODO load: larger stack offsets", .{});
+                _ = try self.addInst(.{
+                    .tag = .sub_immediate,
+                    .data = .{ .rr_imm12_sh = .{
+                        .rd = src_reg,
+                        .rn = .x29,
+                        .imm12 = src_offset,
+                    } },
+                });
+
+                // sub dst_reg, fp, #stack_offset
+                const adj_dst_off = stack_offset + @intCast(u32, ty.abiSize(self.target.*));
+                const dst_offset = math.cast(u12, adj_dst_off) catch return self.fail("TODO load: larger stack offsets", .{});
+                _ = try self.addInst(.{
+                    .tag = .sub_immediate,
+                    .data = .{ .rr_imm12_sh = .{
+                        .rd = dst_reg,
+                        .rn = .x29,
+                        .imm12 = dst_offset,
+                    } },
+                });
+
+                // mov len, #elem_size
+                const elem_size = @intCast(u32, ty.abiSize(self.target.*));
+                const len_imm = math.cast(u16, elem_size) catch return self.fail("TODO load: larger stack offsets", .{});
+                _ = try self.addInst(.{
+                    .tag = .movk,
+                    .data = .{ .r_imm16_sh = .{
+                        .rd = len_reg,
+                        .imm16 = len_imm,
+                    } },
+                });
+
+                // memcpy(src, dst, len)
+                try self.genInlineMemcpy(src_reg, dst_reg, len_reg, count_reg, tmp_reg);
+            }
         },
     }
 }
@@ -2711,7 +2891,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                         } },
                     });
                 },
-                else => return self.fail("TODO implement genSetReg other types abi_size={}", .{abi_size}),
+                3 => return self.fail("TODO implement genSetReg types size 3", .{}),
+                else => unreachable,
             }
         },
         else => return self.fail("TODO implement genSetReg for aarch64 {}", .{mcv}),
src/arch/aarch64/Emit.zig
@@ -91,6 +91,7 @@ pub fn emitMir(
 
             .call_extern => try emit.mirCallExtern(inst),
 
+            .add_shifted_register => try emit.mirAddSubtractShiftedRegister(inst),
             .cmp_shifted_register => try emit.mirAddSubtractShiftedRegister(inst),
 
             .cset => try emit.mirConditionalSelect(inst),
@@ -132,6 +133,8 @@ pub fn emitMir(
             .movk => try emit.mirMoveWideImmediate(inst),
             .movz => try emit.mirMoveWideImmediate(inst),
 
+            .mul => try emit.mirDataProcessing3Source(inst),
+
             .nop => try emit.mirNop(),
 
             .push_regs => try emit.mirPushPopRegs(inst),
@@ -201,6 +204,12 @@ fn instructionSize(emit: *Emit, inst: Mir.Inst.Index) usize {
                 return 5 * 4;
             }
         },
+        .pop_regs, .push_regs => {
+            const reg_list = emit.mir.instructions.items(.data)[inst].reg_list;
+            const number_of_regs = @popCount(u32, reg_list);
+            const number_of_insts = std.math.divCeil(u6, number_of_regs, 2) catch unreachable;
+            return number_of_insts * 4;
+        },
         .call_extern => return 4,
         .dbg_line,
         .dbg_epilogue_begin,
@@ -565,15 +574,15 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void {
 fn mirAddSubtractShiftedRegister(emit: *Emit, inst: Mir.Inst.Index) !void {
     const tag = emit.mir.instructions.items(.tag)[inst];
     const rrr_imm6_shift = emit.mir.instructions.items(.data)[inst].rrr_imm6_shift;
+    const rd = rrr_imm6_shift.rd;
+    const rn = rrr_imm6_shift.rn;
+    const rm = rrr_imm6_shift.rm;
+    const shift = rrr_imm6_shift.shift;
+    const imm6 = rrr_imm6_shift.imm6;
 
     switch (tag) {
-        .cmp_shifted_register => try emit.writeInstruction(Instruction.subsShiftedRegister(
-            rrr_imm6_shift.rd,
-            rrr_imm6_shift.rn,
-            rrr_imm6_shift.rm,
-            rrr_imm6_shift.shift,
-            rrr_imm6_shift.imm6,
-        )),
+        .cmp_shifted_register => try emit.writeInstruction(Instruction.subsShiftedRegister(rd, rn, rm, shift, imm6)),
+        .add_shifted_register => try emit.writeInstruction(Instruction.addShiftedRegister(rd, rn, rm, shift, imm6)),
         else => unreachable,
     }
 }
@@ -802,6 +811,16 @@ fn mirMoveWideImmediate(emit: *Emit, inst: Mir.Inst.Index) !void {
     }
 }
 
+fn mirDataProcessing3Source(emit: *Emit, inst: Mir.Inst.Index) !void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    const rrr = emit.mir.instructions.items(.data)[inst].rrr;
+
+    switch (tag) {
+        .mul => try emit.writeInstruction(Instruction.mul(rrr.rd, rrr.rn, rrr.rm)),
+        else => unreachable,
+    }
+}
+
 fn mirNop(emit: *Emit) !void {
     try emit.writeInstruction(Instruction.nop());
 }
src/arch/aarch64/Mir.zig
@@ -26,6 +26,8 @@ pub const Inst = struct {
     pub const Tag = enum(u16) {
         /// Add (immediate)
         add_immediate,
+        /// Add (shifted register)
+        add_shifted_register,
         /// Branch conditionally
         b_cond,
         /// Branch
@@ -82,6 +84,8 @@ pub const Inst = struct {
         movk,
         /// Move wide with zero
         movz,
+        /// Multiply
+        mul,
         /// No Operation
         nop,
         /// Pseudo-instruction: Pop multiple registers
@@ -187,6 +191,14 @@ pub const Inst = struct {
             imm12: u12,
             sh: u1 = 0,
         },
+        /// Two registers
+        ///
+        /// Used by e.g. mul
+        rrr: struct {
+            rd: Register,
+            rn: Register,
+            rm: Register,
+        },
         /// Three registers and a shift (shift type and 6-bit amount)
         ///
         /// Used by e.g. cmp_shifted_register
@@ -208,7 +220,7 @@ pub const Inst = struct {
         },
         /// Two registers and a LoadStoreOffsetImmediate
         ///
-        /// Used by e.g. str_register
+        /// Used by e.g. str_immediate
         load_store_register_immediate: struct {
             rt: Register,
             rn: Register,