Commit 04f379dd41

joachimschmidt557 <joachim.schmidt557@outlook.com>
2022-02-03 20:31:01
stage2 ARM: optimize airSliceElemVal for elem_size 1 or 4
In these cases, the AIR inst can be lowered to only one ldr instruction. Also fixes shifts in arm.bits.Offset
1 parent 71321b6
Changed files (2)
src
src/arch/arm/bits.zig
@@ -343,11 +343,11 @@ pub const Instruction = union(enum) {
     /// which can either be content from a register or an immediate
     /// value
     pub const Operand = union(enum) {
-        Register: packed struct {
+        register: packed struct {
             rm: u4,
             shift: u8,
         },
-        Immediate: packed struct {
+        immediate: packed struct {
             imm: u8,
             rotate: u4,
         },
@@ -356,12 +356,12 @@ pub const Instruction = union(enum) {
         /// register can be shifted by a specific immediate value or
         /// by the contents of another register
         pub const Shift = union(enum) {
-            Immediate: packed struct {
+            immediate: packed struct {
                 fixed: u1 = 0b0,
                 typ: u2,
                 amount: u5,
             },
-            Register: packed struct {
+            register: packed struct {
                 fixed_1: u1 = 0b1,
                 typ: u2,
                 fixed_2: u1 = 0b0,
@@ -376,7 +376,7 @@ pub const Instruction = union(enum) {
             };
 
             pub const none = Shift{
-                .Immediate = .{
+                .immediate = .{
                     .amount = 0,
                     .typ = 0,
                 },
@@ -384,14 +384,14 @@ pub const Instruction = union(enum) {
 
             pub fn toU8(self: Shift) u8 {
                 return switch (self) {
-                    .Register => |v| @bitCast(u8, v),
-                    .Immediate => |v| @bitCast(u8, v),
+                    .register => |v| @bitCast(u8, v),
+                    .immediate => |v| @bitCast(u8, v),
                 };
             }
 
             pub fn reg(rs: Register, typ: Type) Shift {
                 return Shift{
-                    .Register = .{
+                    .register = .{
                         .rs = rs.id(),
                         .typ = @enumToInt(typ),
                     },
@@ -400,7 +400,7 @@ pub const Instruction = union(enum) {
 
             pub fn imm(amount: u5, typ: Type) Shift {
                 return Shift{
-                    .Immediate = .{
+                    .immediate = .{
                         .amount = amount,
                         .typ = @enumToInt(typ),
                     },
@@ -410,14 +410,14 @@ pub const Instruction = union(enum) {
 
         pub fn toU12(self: Operand) u12 {
             return switch (self) {
-                .Register => |v| @bitCast(u12, v),
-                .Immediate => |v| @bitCast(u12, v),
+                .register => |v| @bitCast(u12, v),
+                .immediate => |v| @bitCast(u12, v),
             };
         }
 
         pub fn reg(rm: Register, shift: Shift) Operand {
             return Operand{
-                .Register = .{
+                .register = .{
                     .rm = rm.id(),
                     .shift = shift.toU8(),
                 },
@@ -426,7 +426,7 @@ pub const Instruction = union(enum) {
 
         pub fn imm(immediate: u8, rotate: u4) Operand {
             return Operand{
-                .Immediate = .{
+                .immediate = .{
                     .imm = immediate,
                     .rotate = rotate,
                 },
@@ -447,7 +447,7 @@ pub const Instruction = union(enum) {
             return for (masks) |mask, i| {
                 if (x & mask == x) {
                     break Operand{
-                        .Immediate = .{
+                        .immediate = .{
                             .imm = @intCast(u8, std.math.rotl(u32, x, 2 * i)),
                             .rotate = @intCast(u4, i),
                         },
@@ -461,35 +461,67 @@ pub const Instruction = union(enum) {
     /// instruction. Data can be loaded from memory with either an
     /// immediate offset or an offset that is stored in some register.
     pub const Offset = union(enum) {
-        Immediate: u12,
-        Register: packed struct {
+        immediate: u12,
+        register: packed struct {
             rm: u4,
-            shift: u8,
+            fixed: u1 = 0b0,
+            stype: u2,
+            imm5: u5,
         },
 
+        pub const Shift = union(enum) {
+            /// No shift
+            none,
+            /// Logical shift left
+            lsl: u5,
+            /// Logical shift right
+            lsr: u5,
+            /// Arithmetic shift right
+            asr: u5,
+            /// Rotate right
+            ror: u5,
+            /// Rotate right one bit, with extend
+            rrx,
+        };
+
         pub const none = Offset{
-            .Immediate = 0,
+            .immediate = 0,
         };
 
         pub fn toU12(self: Offset) u12 {
             return switch (self) {
-                .Register => |v| @bitCast(u12, v),
-                .Immediate => |v| v,
+                .register => |v| @bitCast(u12, v),
+                .immediate => |v| v,
             };
         }
 
-        pub fn reg(rm: Register, shift: u8) Offset {
+        pub fn reg(rm: Register, shift: Shift) Offset {
             return Offset{
-                .Register = .{
+                .register = .{
                     .rm = rm.id(),
-                    .shift = shift,
+                    .stype = switch (shift) {
+                        .none => 0b00,
+                        .lsl => 0b00,
+                        .lsr => 0b01,
+                        .asr => 0b10,
+                        .ror => 0b11,
+                        .rrx => 0b11,
+                    },
+                    .imm5 = switch (shift) {
+                        .none => 0,
+                        .lsl => |n| n,
+                        .lsr => |n| n,
+                        .asr => |n| n,
+                        .ror => |n| n,
+                        .rrx => 0,
+                    },
                 },
             };
         }
 
         pub fn imm(immediate: u12) Offset {
             return Offset{
-                .Immediate = immediate,
+                .immediate = immediate,
             };
         }
     };
@@ -567,7 +599,7 @@ pub const Instruction = union(enum) {
         return Instruction{
             .data_processing = .{
                 .cond = @enumToInt(cond),
-                .i = @boolToInt(op2 == .Immediate),
+                .i = @boolToInt(op2 == .immediate),
                 .opcode = @enumToInt(opcode),
                 .s = s,
                 .rn = rn.id(),
@@ -681,7 +713,7 @@ pub const Instruction = union(enum) {
                 .byte_word = byte_word,
                 .up_down = @boolToInt(positive),
                 .pre_post = @boolToInt(pre_index),
-                .imm = @boolToInt(offset != .Immediate),
+                .imm = @boolToInt(offset != .immediate),
             },
         };
     }
src/arch/arm/CodeGen.zig
@@ -1222,9 +1222,16 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
 fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
     const is_volatile = false; // TODO
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
-    const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) .dead else result: {
+
+    if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
+    const result: MCValue = result: {
         const slice_mcv = try self.resolveInst(bin_op.lhs);
 
+        // TODO optimize for the case where the index is a constant,
+        // i.e. index_mcv == .immediate
+        const index_mcv = try self.resolveInst(bin_op.rhs);
+        const index_is_register = index_mcv == .register;
+
         const slice_ty = self.air.typeOf(bin_op.lhs);
         const elem_ty = slice_ty.childType();
         const elem_size = elem_ty.abiSize(self.target.*);
@@ -1232,12 +1239,8 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
         var buf: Type.SlicePtrFieldTypeBuffer = undefined;
         const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
 
-        // TODO optimize this for the case when elem_size is a power
-        // of two (includes elem_size == 1)
-        const offset_mcv = try self.genArmMulConstant(inst, bin_op.rhs, 1, @intCast(u32, elem_size));
-        assert(offset_mcv == .register); // result of multiplication should always be register
-        self.register_manager.freezeRegs(&.{offset_mcv.register});
-        defer self.register_manager.unfreezeRegs(&.{offset_mcv.register});
+        if (index_is_register) self.register_manager.freezeRegs(&.{index_mcv.register});
+        defer if (index_is_register) self.register_manager.unfreezeRegs(&.{index_mcv.register});
 
         const base_mcv: MCValue = switch (slice_mcv) {
             .stack_offset => .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, slice_mcv) },
@@ -1246,61 +1249,67 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
         self.register_manager.freezeRegs(&.{base_mcv.register});
         defer self.register_manager.unfreezeRegs(&.{base_mcv.register});
 
-        if (elem_size <= 4) {
-            const dst_reg = try self.register_manager.allocReg(inst);
-            self.register_manager.freezeRegs(&.{dst_reg});
-            defer self.register_manager.unfreezeRegs(&.{dst_reg});
+        switch (elem_size) {
+            1, 4 => {
+                const dst_reg = try self.register_manager.allocReg(inst);
+                const dst_mcv = MCValue{ .register = dst_reg };
+                self.register_manager.freezeRegs(&.{dst_reg});
+                defer self.register_manager.unfreezeRegs(&.{dst_reg});
 
-            switch (elem_size) {
-                1, 4 => {
-                    const tag: Mir.Inst.Tag = switch (elem_size) {
-                        1 => .ldrb,
-                        4 => .ldr,
-                        else => unreachable,
-                    };
+                const index_reg: Register = switch (index_mcv) {
+                    .register => |reg| reg,
+                    else => try self.copyToTmpRegister(Type.usize, index_mcv),
+                };
+                self.register_manager.freezeRegs(&.{index_reg});
+                defer self.register_manager.unfreezeRegs(&.{index_reg});
 
-                    _ = try self.addInst(.{
-                        .tag = tag,
-                        .data = .{ .rr_offset = .{
-                            .rt = dst_reg,
-                            .rn = base_mcv.register,
-                            .offset = .{ .offset = Instruction.Offset.reg(offset_mcv.register, 0) },
-                        } },
-                    });
-                },
-                2 => {
-                    _ = try self.addInst(.{
-                        .tag = .ldrh,
-                        .data = .{ .rr_extra_offset = .{
-                            .rt = dst_reg,
-                            .rn = base_mcv.register,
-                            .offset = .{ .offset = Instruction.ExtraLoadStoreOffset.reg(offset_mcv.register) },
-                        } },
-                    });
-                },
-                else => unreachable,
-            }
+                const tag: Mir.Inst.Tag = switch (elem_size) {
+                    1 => .ldrb,
+                    4 => .ldr,
+                    else => unreachable,
+                };
+                const shift: u5 = switch (elem_size) {
+                    1 => 0,
+                    4 => 2,
+                    else => unreachable,
+                };
 
-            break :result MCValue{ .register = dst_reg };
-        } else {
-            const dst_mcv = try self.allocRegOrMem(inst, false);
+                _ = try self.addInst(.{
+                    .tag = tag,
+                    .data = .{ .rr_offset = .{
+                        .rt = dst_reg,
+                        .rn = base_mcv.register,
+                        .offset = .{ .offset = Instruction.Offset.reg(index_reg, .{ .lsl = shift }) },
+                    } },
+                });
 
-            const addr_reg = try self.register_manager.allocReg(null);
-            self.register_manager.freezeRegs(&.{addr_reg});
-            defer self.register_manager.unfreezeRegs(&.{addr_reg});
+                break :result dst_mcv;
+            },
+            else => {
+                const dst_mcv = try self.allocRegOrMem(inst, true);
+
+                const offset_mcv = try self.genArmMulConstant(bin_op.rhs, @intCast(u32, elem_size));
+                assert(offset_mcv == .register); // result of multiplication should always be register
+                self.register_manager.freezeRegs(&.{offset_mcv.register});
+                defer self.register_manager.unfreezeRegs(&.{offset_mcv.register});
 
-            try self.genArmBinOpCode(addr_reg, base_mcv, offset_mcv, false, .add, .unsigned);
+                const addr_reg = try self.register_manager.allocReg(null);
+                self.register_manager.freezeRegs(&.{addr_reg});
+                defer self.register_manager.unfreezeRegs(&.{addr_reg});
 
-            // I know we will unfreeze these registers at the end of
-            // the scope of :result. However, at this point in time,
-            // neither the base register nor the offset register
-            // contains any valuable data anymore. In order to reduce
-            // register pressure, unfreeze them prematurely
-            self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
+                try self.genArmBinOpCode(addr_reg, base_mcv, offset_mcv, false, .add, .unsigned);
 
-            try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
+                // I know we will unfreeze these registers at the end of
+                // the scope of :result. However, at this point in time,
+                // neither the base register nor the offset register
+                // contains any valuable data anymore. In order to reduce
+                // register pressure, unfreeze them prematurely
+                self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
 
-            break :result dst_mcv;
+                try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
+
+                break :result dst_mcv;
+            },
         }
     };
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
@@ -1931,8 +1940,8 @@ fn genArmBinOpCode(
         .shl, .shr => {
             assert(!swap_lhs_and_rhs);
             const shift_amount = switch (operand) {
-                .Register => |reg_op| Instruction.ShiftAmount.reg(@intToEnum(Register, reg_op.rm)),
-                .Immediate => |imm_op| Instruction.ShiftAmount.imm(@intCast(u5, imm_op.imm)),
+                .register => |reg_op| Instruction.ShiftAmount.reg(@intToEnum(Register, reg_op.rm)),
+                .immediate => |imm_op| Instruction.ShiftAmount.imm(@intCast(u5, imm_op.imm)),
             };
 
             const tag: Mir.Inst.Tag = switch (op) {
@@ -2036,12 +2045,11 @@ fn genArmMul(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Ai
     return dst_mcv;
 }
 
-fn genArmMulConstant(self: *Self, inst: Air.Inst.Index, op: Air.Inst.Ref, op_index: Liveness.OperandInt, imm: u32) !MCValue {
+fn genArmMulConstant(self: *Self, op: Air.Inst.Ref, imm: u32) !MCValue {
     const lhs = try self.resolveInst(op);
     const rhs = MCValue{ .immediate = imm };
 
     const lhs_is_register = lhs == .register;
-    const reuse_lhs = lhs_is_register and self.reuseOperand(inst, op, op_index, lhs);
 
     if (lhs_is_register) self.register_manager.freezeRegs(&.{lhs.register});
     defer if (lhs_is_register) self.register_manager.unfreezeRegs(&.{lhs.register});
@@ -2054,23 +2062,17 @@ fn genArmMulConstant(self: *Self, inst: Air.Inst.Index, op: Air.Inst.Ref, op_ind
     var rhs_mcv: MCValue = rhs;
 
     // Allocate registers for operands and/or destination
-    if (reuse_lhs) {
-        // Allocate 1 register
-        rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
-        dst_mcv = lhs;
+    // Allocate 1 or 2 registers
+    if (lhs_is_register) {
+        // Move RHS to register
+        dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
+        rhs_mcv = dst_mcv;
     } else {
-        // Allocate 1 or 2 registers
-        if (lhs_is_register) {
-            // Move RHS to register
-            dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
-            rhs_mcv = dst_mcv;
-        } else {
-            // Move LHS and RHS to register
-            const regs = try self.register_manager.allocRegs(2, .{ null, null });
-            lhs_mcv = MCValue{ .register = regs[0] };
-            rhs_mcv = MCValue{ .register = regs[1] };
-            dst_mcv = lhs_mcv;
-        }
+        // Move LHS and RHS to register
+        const regs = try self.register_manager.allocRegs(2, .{ null, null });
+        lhs_mcv = MCValue{ .register = regs[0] };
+        rhs_mcv = MCValue{ .register = regs[1] };
+        dst_mcv = lhs_mcv;
     }
 
     // Move the operands to the newly allocated registers
@@ -2132,7 +2134,7 @@ fn genArmInlineMemcpy(
         .data = .{ .rr_offset = .{
             .rt = tmp,
             .rn = src,
-            .offset = .{ .offset = Instruction.Offset.reg(count, 0) },
+            .offset = .{ .offset = Instruction.Offset.reg(count, .none) },
         } },
     });
 
@@ -2142,7 +2144,7 @@ fn genArmInlineMemcpy(
         .data = .{ .rr_offset = .{
             .rt = tmp,
             .rn = dst,
-            .offset = .{ .offset = Instruction.Offset.reg(count, 0) },
+            .offset = .{ .offset = Instruction.Offset.reg(count, .none) },
         } },
     });
 
@@ -3126,7 +3128,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
                 1, 4 => {
                     const offset = if (math.cast(u12, adj_off)) |imm| blk: {
                         break :blk Instruction.Offset.imm(imm);
-                    } else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
+                    } else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
 
                     const tag: Mir.Inst.Tag = switch (abi_size) {
                         1 => .strb,
@@ -3450,7 +3452,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                 1, 4 => {
                     const offset = if (adj_off <= math.maxInt(u12)) blk: {
                         break :blk Instruction.Offset.imm(@intCast(u12, adj_off));
-                    } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
+                    } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
 
                     const tag: Mir.Inst.Tag = switch (abi_size) {
                         1 => .ldrb,
@@ -3536,7 +3538,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I
                 1, 4 => {
                     const offset = if (math.cast(u12, adj_off)) |imm| blk: {
                         break :blk Instruction.Offset.imm(imm);
-                    } else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), 0);
+                    } else |_| Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = adj_off }), .none);
 
                     const tag: Mir.Inst.Tag = switch (abi_size) {
                         1 => .strb,