Commit fa8935426b

Eleanor Bartle <eleanor@eleanor-nb.com>
2020-08-18 06:30:00
Cleaned up RISC-V instruction creation, added 32-bit immediates (#6077)
* Implemented all R-type arithmetic/logical instructions * Implemented all I-type arithmetic/logical instructions * Implemented all load and store instructions * Implemented all of RV64I except FENCE
1 parent 3cc1f8b
Changed files (2)
src-self-hosted
src-self-hosted/codegen/riscv64.zig
@@ -1,50 +1,387 @@
 const std = @import("std");
 const DW = std.dwarf;
 
-pub const instructions = struct {
-    pub const CallBreak = packed struct {
-        pub const Mode = packed enum(u12) { ecall, ebreak };
-        opcode: u7 = 0b1110011,
-        unused1: u5 = 0,
-        unused2: u3 = 0,
-        unused3: u5 = 0,
-        mode: u12, //: Mode
-    };
-    // I-type
-    pub const Addi = packed struct {
-        pub const Mode = packed enum(u3) { addi = 0b000, slti = 0b010, sltiu = 0b011, xori = 0b100, ori = 0b110, andi = 0b111 };
-        opcode: u7 = 0b0010011,
+// TODO: this is only tagged to facilitate the monstrosity.
+// Once packed structs work make it packed.
+pub const Instruction = union(enum) {
+    R: packed struct {
+        opcode: u7,
         rd: u5,
-        mode: u3, //: Mode
+        funct3: u3,
         rs1: u5,
-        imm: i12,
-    };
-    pub const Lui = packed struct {
-        opcode: u7 = 0b0110111,
+        rs2: u5,
+        funct7: u7,
+    },
+    I: packed struct {
+        opcode: u7,
         rd: u5,
-        imm: i20,
-    };
-    // I_type
-    pub const Load = packed struct {
-        pub const Mode = packed enum(u3) { ld = 0b011, lwu = 0b110 };
-        opcode: u7 = 0b0000011,
-        rd: u5,
-        mode: u3, //: Mode
+        funct3: u3,
         rs1: u5,
-        offset: i12,
-    };
-    // I-type
-    pub const Jalr = packed struct {
-        opcode: u7 = 0b1100111,
-        rd: u5,
-        mode: u3 = 0,
+        imm0_11: u12,
+    },
+    S: packed struct {
+        opcode: u7,
+        imm0_4: u5,
+        funct3: u3,
+        rs1: u5,
+        rs2: u5,
+        imm5_11: u7,
+    },
+    B: packed struct {
+        opcode: u7,
+        imm11: u1,
+        imm1_4: u4,
+        funct3: u3,
         rs1: u5,
-        offset: i12,
-    };
+        rs2: u5,
+        imm5_10: u6,
+        imm12: u1,
+    },
+    U: packed struct {
+        opcode: u7,
+        rd: u5,
+        imm12_31: u20,
+    },
+    J: packed struct {
+        opcode: u7,
+        rd: u5,
+        imm12_19: u8,
+        imm11: u1,
+        imm1_10: u10,
+        imm20: u1,
+    },
+
+    // TODO: once packed structs work we can remove this monstrosity.
+    pub fn toU32(self: Instruction) u32 {
+        return switch (self) {
+            .R => |v| @bitCast(u32, v),
+            .I => |v| @bitCast(u32, v),
+            .S => |v| @bitCast(u32, v),
+            .B => |v| @intCast(u32, v.opcode) + (@intCast(u32, v.imm11) << 7) + (@intCast(u32, v.imm1_4) << 8) + (@intCast(u32, v.funct3) << 12) + (@intCast(u32, v.rs1) << 15) + (@intCast(u32, v.rs2) << 20) + (@intCast(u32, v.imm5_10) << 25) + (@intCast(u32, v.imm12) << 31),
+            .U => |v| @bitCast(u32, v),
+            .J => |v| @bitCast(u32, v),
+        };
+    }
+
+    fn rType(op: u7, fn3: u3, fn7: u7, rd: Register, r1: Register, r2: Register) Instruction {
+        return Instruction{
+            .R = .{
+                .opcode = op,
+                .funct3 = fn3,
+                .funct7 = fn7,
+                .rd = @enumToInt(rd),
+                .rs1 = @enumToInt(r1),
+                .rs2 = @enumToInt(r2),
+            },
+        };
+    }
+
+    // RISC-V is all signed all the time -- convert immediates to unsigned for processing
+    fn iType(op: u7, fn3: u3, rd: Register, r1: Register, imm: i12) Instruction {
+        const umm = @bitCast(u12, imm);
+
+        return Instruction{
+            .I = .{
+                .opcode = op,
+                .funct3 = fn3,
+                .rd = @enumToInt(rd),
+                .rs1 = @enumToInt(r1),
+                .imm0_11 = umm,
+            },
+        };
+    }
+
+    fn sType(op: u7, fn3: u3, r1: Register, r2: Register, imm: i12) Instruction {
+        const umm = @bitCast(u12, imm);
+
+        return Instruction{
+            .S = .{
+                .opcode = op,
+                .funct3 = fn3,
+                .rs1 = @enumToInt(r1),
+                .rs2 = @enumToInt(r2),
+                .imm0_4 = @truncate(u5, umm),
+                .imm5_11 = @truncate(u7, umm >> 5),
+            },
+        };
+    }
+
+    // Use significance value rather than bit value, same for J-type
+    // -- less burden on callsite, bonus semantic checking
+    fn bType(op: u7, fn3: u3, r1: Register, r2: Register, imm: i13) Instruction {
+        const umm = @bitCast(u13, imm);
+        if (umm % 2 != 0) @panic("Internal error: misaligned branch target");
+
+        return Instruction{
+            .B = .{
+                .opcode = op,
+                .funct3 = fn3,
+                .rs1 = @enumToInt(r1),
+                .rs2 = @enumToInt(r2),
+                .imm1_4 = @truncate(u4, umm >> 1),
+                .imm5_10 = @truncate(u6, umm >> 5),
+                .imm11 = @truncate(u1, umm >> 11),
+                .imm12 = @truncate(u1, umm >> 12),
+            },
+        };
+    }
+
+    // We have to extract the 20 bits anyway -- let's not make it more painful
+    fn uType(op: u7, rd: Register, imm: i20) Instruction {
+        const umm = @bitCast(u20, imm);
+
+        return Instruction{
+            .U = .{
+                .opcode = op,
+                .rd = @enumToInt(rd),
+                .imm12_31 = umm,
+            },
+        };
+    }
+
+    fn jType(op: u7, rd: Register, imm: i21) Instruction {
+        const umm = @bitcast(u21, imm);
+        if (umm % 2 != 0) @panic("Internal error: misaligned jump target");
+
+        return Instruction{
+            .J = .{
+                .opcode = op,
+                .rd = @enumToInt(rd),
+                .imm1_10 = @truncate(u10, umm >> 1),
+                .imm11 = @truncate(u1, umm >> 1),
+                .imm12_19 = @truncate(u8, umm >> 12),
+                .imm20 = @truncate(u1, umm >> 20),
+            },
+        };
+    }
+
+    // The meat and potatoes. Arguments are in the order in which they would appear in assembly code.
+
+    // Arithmetic/Logical, Register-Register
+
+    pub fn add(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b000, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn sub(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b000, 0b0100000, rd, r1, r2);
+    }
+
+    pub fn @"and"(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b111, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn @"or"(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b110, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn xor(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b100, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn sll(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b001, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn srl(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b101, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn sra(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b101, 0b0100000, rd, r1, r2);
+    }
+
+    pub fn slt(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b010, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn sltu(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0110011, 0b011, 0b0000000, rd, r1, r2);
+    }
+
+    // Arithmetic/Logical, Register-Register (32-bit)
+
+    pub fn addw(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0111011, 0b000, rd, r1, r2);
+    }
+
+    pub fn subw(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0111011, 0b000, 0b0100000, rd, r1, r2);
+    }
+
+    pub fn sllw(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0111011, 0b001, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn srlw(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0111011, 0b101, 0b0000000, rd, r1, r2);
+    }
+
+    pub fn sraw(rd: Register, r1: Register, r2: Register) Instruction {
+        return rType(0b0111011, 0b101, 0b0100000, rd, r1, r2);
+    }
+
+    // Arithmetic/Logical, Register-Immediate
+
+    pub fn addi(rd: Register, r1: Register, imm: i12) Instruction {
+        return iType(0b0010011, 0b000, rd, r1, imm);
+    }
+
+    pub fn andi(rd: Register, r1: Register, imm: i12) Instruction {
+        return iType(0b0010011, 0b111, rd, r1, imm);
+    }
+
+    pub fn ori(rd: Register, r1: Register, imm: i12) Instruction {
+        return iType(0b0010011, 0b110, rd, r1, imm);
+    }
+
+    pub fn xori(rd: Register, r1: Register, imm: i12) Instruction {
+        return iType(0b0010011, 0b100, rd, r1, imm);
+    }
+
+    pub fn slli(rd: Register, r1: Register, shamt: u6) Instruction {
+        return iType(0b0010011, 0b001, rd, r1, shamt);
+    }
+
+    pub fn srli(rd: Register, r1: Register, shamt: u6) Instruction {
+        return iType(0b0010011, 0b101, rd, r1, shamt);
+    }
+
+    pub fn srai(rd: Register, r1: Register, shamt: u6) Instruction {
+        return iType(0b0010011, 0b101, rd, r1, (1 << 10) + shamt);
+    }
+
+    pub fn slti(rd: Register, r1: Register, imm: i12) Instruction {
+        return iType(0b0010011, 0b010, rd, r1, imm);
+    }
+
+    pub fn sltiu(rd: Register, r1: Register, imm: u12) Instruction {
+        return iType(0b0010011, 0b011, rd, r1, @bitCast(i12, imm));
+    }
+
+    // Arithmetic/Logical, Register-Immediate (32-bit)
+
+    pub fn addiw(rd: Register, r1: Register, imm: i12) Instruction {
+        return iType(0b0011011, 0b000, rd, r1, imm);
+    }
+
+    pub fn slliw(rd: Register, r1: Register, shamt: u5) Instruction {
+        return iType(0b0011011, 0b001, rd, r1, shamt);
+    }
+
+    pub fn srliw(rd: Register, r1: Register, shamt: u5) Instruction {
+        return iType(0b0011011, 0b101, rd, r1, shamt);
+    }
+
+    pub fn sraiw(rd: Register, r1: Register, shamt: u5) Instruction {
+        return iType(0b0011011, 0b101, rd, r1, (1 << 10) + shamt);
+    }
+
+    // Upper Immediate
+
+    pub fn lui(rd: Register, imm: i20) Instruction {
+        return uType(0b0110111, rd, imm);
+    }
+
+    pub fn auipc(rd: Register, imm: i20) Instruction {
+        return uType(0b0010111, rd, imm);
+    }
+
+    // Load
+
+    pub fn ld(rd: Register, offset: i12, base: Register) Instruction {
+        return iType(0b0000011, 0b011, rd, base, offset);
+    }
+
+    pub fn lw(rd: Register, offset: i12, base: Register) Instruction {
+        return iType(0b0000011, 0b010, rd, base, offset);
+    }
+
+    pub fn lwu(rd: Register, offset: i12, base: Register) Instruction {
+        return iType(0b0000011, 0b110, rd, base, offset);
+    }
+
+    pub fn lh(rd: Register, offset: i12, base: Register) Instruction {
+        return iType(0b0000011, 0b001, rd, base, offset);
+    }
+
+    pub fn lhu(rd: Register, offset: i12, base: Register) Instruction {
+        return iType(0b0000011, 0b101, rd, base, offset);
+    }
+
+    pub fn lb(rd: Register, offset: i12, base: Register) Instruction {
+        return iType(0b0000011, 0b000, rd, base, offset);
+    }
+
+    pub fn lbu(rd: Register, offset: i12, base: Register) Instruction {
+        return iType(0b0000011, 0b100, rd, base, offset);
+    }
+
+    // Store
+
+    pub fn sd(rs: Register, offset: i12, base: Register) Instruction {
+        return sType(0b0100011, 0b011, base, rs, offset);
+    }
+
+    pub fn sw(rs: Register, offset: i12, base: Register) Instruction {
+        return sType(0b0100011, 0b010, base, rs, offset);
+    }
+
+    pub fn sh(rs: Register, offset: i12, base: Register) Instruction {
+        return sType(0b0100011, 0b001, base, rs, offset);
+    }
+
+    pub fn sb(rs: Register, offset: i12, base: Register) Instruction {
+        return sType(0b0100011, 0b000, base, rs, offset);
+    }
+
+    // Fence
+    // TODO: implement fence
+
+    // Branch
+
+    pub fn beq(r1: Register, r2: Register, offset: u13) Instruction {
+        return bType(0b1100011, 0b000, r1, r2, offset);
+    }
+
+    pub fn bne(r1: Register, r2: Register, offset: u13) Instruction {
+        return bType(0b1100011, 0b001, r1, r2, offset);
+    }
+
+    pub fn blt(r1: Register, r2: Register, offset: u13) Instruction {
+        return bType(0b1100011, 0b100, r1, r2, offset);
+    }
+
+    pub fn bge(r1: Register, r2: Register, offset: u13) Instruction {
+        return bType(0b1100011, 0b101, r1, r2, offset);
+    }
+
+    pub fn bltu(r1: Register, r2: Register, offset: u13) Instruction {
+        return bType(0b1100011, 0b110, r1, r2, offset);
+    }
+
+    pub fn bgeu(r1: Register, r2: Register, offset: u13) Instruction {
+        return bType(0b1100011, 0b111, r1, r2, offset);
+    }
+
+    // Jump
+
+    pub fn jal(link: Register, offset: i21) Instruction {
+        return jType(0b1101111, link, offset);
+    }
+
+    pub fn jalr(link: Register, offset: i12, base: Register) Instruction {
+        return iType(0b1100111, 0b000, link, base, offset);
+    }
+
+    // System
+
+    pub const ecall = iType(0b1110011, 0b000, .zero, .zero, 0x000);
+    pub const ebreak = iType(0b1110011, 0b000, .zero, .zero, 0x001);
 };
 
 // zig fmt: off
-pub const RawRegister = enum(u8) {
+pub const RawRegister = enum(u5) {
     x0,  x1,  x2,  x3,  x4,  x5,  x6,  x7,
     x8,  x9,  x10, x11, x12, x13, x14, x15,
     x16, x17, x18, x19, x20, x21, x22, x23,
@@ -55,7 +392,7 @@ pub const RawRegister = enum(u8) {
     }
 };
 
-pub const Register = enum(u8) {
+pub const Register = enum(u5) {
     // 64 bit registers
     zero, // zero
     ra, // return address. caller saved
@@ -76,11 +413,6 @@ pub const Register = enum(u8) {
         return null;
     }
 
-    /// Returns the register's id.
-    pub fn id(self: @This()) u5 {
-        return @truncate(u5, @enumToInt(self));
-    }
-
     /// Returns the index into `callee_preserved_regs`.
     pub fn allocIndex(self: Register) ?u4 {
         inline for(callee_preserved_regs) |cpreg, i| {
@@ -90,7 +422,7 @@ pub const Register = enum(u8) {
     }
 
     pub fn dwarfLocOp(reg: Register) u8 {
-        return @enumToInt(reg) + DW.OP_reg0;
+        return @as(u8, @enumToInt(reg)) + DW.OP_reg0;
     }
 };
 
src-self-hosted/codegen.zig
@@ -1113,11 +1113,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     try self.code.append(0xcc); // int3
                 },
                 .riscv64 => {
-                    const full = @bitCast(u32, instructions.CallBreak{
-                        .mode = @enumToInt(instructions.CallBreak.Mode.ebreak),
-                    });
-
-                    mem.writeIntLittle(u32, try self.code.addManyAsArray(4), full);
+                    mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ebreak.toU32());
                 },
                 else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.target.cpu.arch}),
             }
@@ -1193,12 +1189,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             const got_addr = @intCast(u32, got.p_vaddr + func.owner_decl.link.elf.offset_table_index * ptr_bytes);
 
                             try self.genSetReg(inst.base.src, .ra, .{ .memory = got_addr });
-                            const jalr = instructions.Jalr{
-                                .rd = Register.ra.id(),
-                                .rs1 = Register.ra.id(),
-                                .offset = 0,
-                            };
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), @bitCast(u32, jalr));
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.jalr(.ra, 0, .ra).toU32());
                         } else {
                             return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{});
                         }
@@ -1255,12 +1246,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
                 },
                 .riscv64 => {
-                    const jalr = instructions.Jalr{
-                        .rd = Register.zero.id(),
-                        .rs1 = Register.ra.id(),
-                        .offset = 0,
-                    };
-                    mem.writeIntLittle(u32, try self.code.addManyAsArray(4), @bitCast(u32, jalr));
+                    mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.jalr(.zero, 0, .ra).toU32());
                 },
                 else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}),
             }
@@ -1512,11 +1498,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     }
 
                     if (mem.eql(u8, inst.asm_source, "ecall")) {
-                        const full = @bitCast(u32, instructions.CallBreak{
-                            .mode = @enumToInt(instructions.CallBreak.Mode.ecall),
-                        });
-
-                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), full);
+                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ecall.toU32());
                     } else {
                         return self.fail(inst.base.src, "TODO implement support for more riscv64 assembly instructions", .{});
                     }
@@ -1723,36 +1705,17 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     .immediate => |unsigned_x| {
                         const x = @bitCast(i64, unsigned_x);
                         if (math.minInt(i12) <= x and x <= math.maxInt(i12)) {
-                            const instruction = @bitCast(u32, instructions.Addi{
-                                .mode = @enumToInt(instructions.Addi.Mode.addi),
-                                .imm = @truncate(i12, x),
-                                .rs1 = Register.zero.id(),
-                                .rd = reg.id(),
-                            });
-
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), instruction);
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.addi(reg, .zero, @truncate(i12, x)).toU32());
                             return;
                         }
                         if (math.minInt(i32) <= x and x <= math.maxInt(i32)) {
-                            const split = @bitCast(packed struct {
-                                low12: i12,
-                                up20: i20,
-                            }, @truncate(i32, x));
-                            if (split.low12 < 0) return self.fail(src, "TODO support riscv64 genSetReg i32 immediates with 12th bit set to 1", .{});
-
-                            const lui = @bitCast(u32, instructions.Lui{
-                                .imm = split.up20,
-                                .rd = reg.id(),
-                            });
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), lui);
+                            const lo12 = @truncate(i12, x);
+                            const carry: i32 = if (lo12 < 0) 1 else 0;
+                            const hi20 = @truncate(i20, (x >> 12) +% carry);
 
-                            const addi = @bitCast(u32, instructions.Addi{
-                                .mode = @enumToInt(instructions.Addi.Mode.addi),
-                                .imm = @truncate(i12, split.low12),
-                                .rs1 = reg.id(),
-                                .rd = reg.id(),
-                            });
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), addi);
+                            // TODO: add test case for 32-bit immediate
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.lui(reg, hi20).toU32());
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.addi(reg, reg, lo12).toU32());
                             return;
                         }
                         // li rd, immediate
@@ -1764,14 +1727,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         // If the type is a pointer, it means the pointer address is at this memory location.
                         try self.genSetReg(src, reg, .{ .immediate = addr });
 
-                        const ld = @bitCast(u32, instructions.Load{
-                            .mode = @enumToInt(instructions.Load.Mode.ld),
-                            .rs1 = reg.id(),
-                            .rd = reg.id(),
-                            .offset = 0,
-                        });
-
-                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), ld);
+                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ld(reg, 0, reg).toU32());
                         // LOAD imm=[i12 offset = 0], rs1 =
 
                         // return self.fail("TODO implement genSetReg memory for riscv64");