Commit 031d8248e0

David Rubin <daviru007@icloud.com>
2024-05-11 08:11:27
riscv: first sign of floats!
1 parent 7ed2f21
src/arch/riscv64/abi.zig
@@ -238,62 +238,81 @@ fn classifyStruct(
     }
 }
 
-pub const callee_preserved_regs = [_]Register{
-    // .s0 is ommited to be used as a frame pointer
-    .s1, .s2, .s3, .s4, .s5, .s6, .s7, .s8, .s9, .s10, .s11,
-};
+const allocatable_registers = Registers.Integer.all_regs ++ Registers.Float.all_regs;
+pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
 
-pub const function_arg_regs = [_]Register{
-    .a0, .a1, .a2, .a3, .a4, .a5, .a6, .a7,
-};
+// Register classes
+const RegisterBitSet = RegisterManager.RegisterBitSet;
 
-pub const function_ret_regs = [_]Register{
-    .a0, .a1,
+pub const RegisterClass = enum {
+    int,
+    float,
 };
 
-pub const temporary_regs = [_]Register{
-    .t0, .t1, .t2, .t3, .t4, .t5, .t6,
-};
+pub const Registers = struct {
+    pub const all_preserved = Integer.callee_preserved_regs ++ Float.callee_preserved_regs;
 
-const allocatable_registers = callee_preserved_regs ++ function_arg_regs ++ temporary_regs;
-pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
+    pub const Integer = struct {
+        // zig fmt: off
+        pub const general_purpose = initRegBitSet(0,                                                 callee_preserved_regs.len);
+        pub const function_arg    = initRegBitSet(callee_preserved_regs.len,                         function_arg_regs.len);
+        pub const function_ret    = initRegBitSet(callee_preserved_regs.len,                         function_ret_regs.len);
+        pub const temporary       = initRegBitSet(callee_preserved_regs.len + function_arg_regs.len, temporary_regs.len);
+        // zig fmt: on
 
-// Register classes
-const RegisterBitSet = RegisterManager.RegisterBitSet;
-pub const RegisterClass = struct {
-    pub const gp: RegisterBitSet = blk: {
-        var set = RegisterBitSet.initEmpty();
-        set.setRangeValue(.{
-            .start = 0,
-            .end = callee_preserved_regs.len,
-        }, true);
-        break :blk set;
-    };
+        pub const callee_preserved_regs = [_]Register{
+            // .s0 is omitted to be used as the frame pointer register
+            .s1, .s2, .s3, .s4, .s5, .s6, .s7, .s8, .s9, .s10, .s11,
+        };
 
-    pub const fa: RegisterBitSet = blk: {
-        var set = RegisterBitSet.initEmpty();
-        set.setRangeValue(.{
-            .start = callee_preserved_regs.len,
-            .end = callee_preserved_regs.len + function_arg_regs.len,
-        }, true);
-        break :blk set;
-    };
+        pub const function_arg_regs = [_]Register{
+            .a0, .a1, .a2, .a3, .a4, .a5, .a6, .a7,
+        };
+
+        pub const function_ret_regs = [_]Register{
+            .a0, .a1,
+        };
+
+        pub const temporary_regs = [_]Register{
+            .t0, .t1, .t2, .t3, .t4, .t5, .t6,
+        };
 
-    pub const fr: RegisterBitSet = blk: {
-        var set = RegisterBitSet.initEmpty();
-        set.setRangeValue(.{
-            .start = callee_preserved_regs.len,
-            .end = callee_preserved_regs.len + function_ret_regs.len,
-        }, true);
-        break :blk set;
+        pub const all_regs = callee_preserved_regs ++ function_arg_regs ++ temporary_regs;
     };
 
-    pub const tp: RegisterBitSet = blk: {
-        var set = RegisterBitSet.initEmpty();
-        set.setRangeValue(.{
-            .start = callee_preserved_regs.len + function_arg_regs.len,
-            .end = callee_preserved_regs.len + function_arg_regs.len + temporary_regs.len,
-        }, true);
-        break :blk set;
+    pub const Float = struct {
+        // zig fmt: off
+        pub const general_purpose = initRegBitSet(Integer.all_regs.len,                                                     callee_preserved_regs.len);
+        pub const function_arg    = initRegBitSet(Integer.all_regs.len + callee_preserved_regs.len,                         function_arg_regs.len);
+        pub const function_ret    = initRegBitSet(Integer.all_regs.len + callee_preserved_regs.len,                         function_ret_regs.len);
+        pub const temporary       = initRegBitSet(Integer.all_regs.len + callee_preserved_regs.len + function_arg_regs.len, temporary_regs.len);
+        // zig fmt: on
+
+        pub const callee_preserved_regs = [_]Register{
+            .fs0, .fs1, .fs2, .fs3, .fs4, .fs5, .fs6, .fs7, .fs8, .fs9, .fs10, .fs11,
+        };
+
+        pub const function_arg_regs = [_]Register{
+            .fa0, .fa1, .fa2, .fa3, .fa4, .fa5, .fa6, .fa7,
+        };
+
+        pub const function_ret_regs = [_]Register{
+            .fa0, .fa1,
+        };
+
+        pub const temporary_regs = [_]Register{
+            .ft0, .ft1, .ft2, .ft3, .ft4, .ft5, .ft6, .ft7, .ft8, .ft9, .ft10, .ft11,
+        };
+
+        pub const all_regs = callee_preserved_regs ++ function_arg_regs ++ temporary_regs;
     };
 };
+
+fn initRegBitSet(start: usize, length: usize) RegisterBitSet {
+    var set = RegisterBitSet.initEmpty();
+    set.setRangeValue(.{
+        .start = start,
+        .end = start + length,
+    }, true);
+    return set;
+}
src/arch/riscv64/bits.zig
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
 const testing = std.testing;
 const Encoding = @import("Encoding.zig");
 const Mir = @import("Mir.zig");
+const abi = @import("abi.zig");
 
 pub const Memory = struct {
     base: Base,
@@ -154,10 +155,10 @@ pub const Immediate = union(enum) {
     }
 };
 
-pub const Register = enum(u6) {
+pub const Register = enum(u8) {
     // zig fmt: off
 
-    // general purpose registers
+    // base extension registers
 
     zero, // zero
     ra, // return address. caller saved
@@ -178,12 +179,48 @@ pub const Register = enum(u6) {
     x24, x25, x26, x27, x28, x29, x30, x31,
 
 
+    // F extension registers
+
+    ft0, ft1, ft2, ft3, ft4, ft5, ft6, ft7, // float temporaries. caller saved.
+    fs0, fs1, // float saved. callee saved.
+    fa0, fa1, // float arg/ret. caller saved.
+    fa2, fa3, fa4, fa5, fa6, fa7, // float arg. called saved.
+    fs2, fs3, fs4, fs5, fs6, fs7, fs8, fs9, fs10, fs11,  // float saved. callee saved.
+    ft8, ft9, ft10, ft11, // foat temporaries. calller saved.
+
+    // this register is accessed only through API instructions instead of directly
+    // fcsr, 
+
+    f0, f1,  f2,  f3,  f4,  f5,  f6,  f7,  
+    f8, f9,  f10, f11, f12, f13, f14, f15, 
+    f16, f17, f18, f19, f20, f21, f22, f23, 
+    f24, f25, f26, f27, f28, f29, f30, f31, 
+
     // zig fmt: on
 
-    /// Returns the unique 5-bit ID of this register which is used in
-    /// the machine code
-    pub fn id(self: Register) u5 {
-        return @as(u5, @truncate(@intFromEnum(self)));
+    /// in RISC-V registers are stored as 5 bit IDs and a register can have
+    /// two names. Example being `zero` and `x0` are the same register and have the
+    /// same ID, but are two different entries in the enum. We store floating point
+    /// registers in the same enum. RISC-V uses the same IDs for `f0` and `x0` by
+    /// infering which register is being talked about given the instruction it's in.
+    ///
+    /// The goal of this function is to return the same ID for `zero` and `x0` but two
+    /// seperate IDs for `x0` and `f0`. We will assume that each register set has 32 registers
+    /// and is repeated twice, once for the named version, once for the number version.
+    pub fn id(reg: Register) u7 {
+        const base = switch (@intFromEnum(reg)) {
+            // zig fmt: off
+            @intFromEnum(Register.zero) ... @intFromEnum(Register.x31) => @intFromEnum(Register.zero),
+            @intFromEnum(Register.ft0)  ... @intFromEnum(Register.f31) => @intFromEnum(Register.ft0),
+            else => unreachable,
+            // zig fmt: on
+        };
+
+        return @intCast(base + reg.encodeId());
+    }
+
+    pub fn encodeId(reg: Register) u5 {
+        return @truncate(@intFromEnum(reg));
     }
 
     pub fn dwarfLocOp(reg: Register) u8 {
@@ -192,7 +229,21 @@ pub const Register = enum(u6) {
 
     pub fn bitSize(reg: Register) u32 {
         return switch (@intFromEnum(reg)) {
-            @intFromEnum(Register.zero)...@intFromEnum(Register.x31) => 64,
+            // zig fmt: off
+            @intFromEnum(Register.zero) ... @intFromEnum(Register.x31) => 64,
+            @intFromEnum(Register.ft0)  ... @intFromEnum(Register.f31) => 32,
+            else => unreachable,
+            // zig fmt: on
+        };
+    }
+
+    pub fn class(reg: Register) abi.RegisterClass {
+        return switch (@intFromEnum(reg)) {
+            // zig fmt: off
+            @intFromEnum(Register.zero) ... @intFromEnum(Register.x31) => .int,
+            @intFromEnum(Register.ft0)  ... @intFromEnum(Register.f31) => .float,
+            else => unreachable,
+            // zig fmt: on
         };
     }
 };
src/arch/riscv64/CodeGen.zig
@@ -38,23 +38,9 @@ const Memory = bits.Memory;
 const FrameIndex = bits.FrameIndex;
 const RegisterManager = abi.RegisterManager;
 const RegisterLock = RegisterManager.RegisterLock;
-const callee_preserved_regs = abi.callee_preserved_regs;
-/// General Purpose
-const gp = abi.RegisterClass.gp;
-/// Function Args
-const fa = abi.RegisterClass.fa;
-/// Function Returns
-const fr = abi.RegisterClass.fr;
-/// Temporary Use
-const tp = abi.RegisterClass.tp;
 
 const InnerError = CodeGenError || error{OutOfRegisters};
 
-const RegisterView = enum(u1) {
-    caller,
-    callee,
-};
-
 gpa: Allocator,
 air: Air,
 mod: *Package.Module,
@@ -919,10 +905,24 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
     return result;
 }
 
+const required_features = [_]Target.riscv.Feature{
+    .d,
+    .m,
+};
+
 fn gen(self: *Self) !void {
     const mod = self.bin_file.comp.module.?;
     const fn_info = mod.typeToFunc(self.fn_type).?;
 
+    inline for (required_features) |feature| {
+        if (!self.hasFeature(feature)) {
+            return self.fail(
+                "target missing required feature {s}",
+                .{@tagName(feature)},
+            );
+        }
+    }
+
     if (fn_info.cc != .Naked) {
         try self.addPseudoNone(.pseudo_dbg_prologue_end);
 
@@ -1454,9 +1454,9 @@ fn computeFrameLayout(self: *Self) !FrameLayout {
     }
 
     var save_reg_list = Mir.RegisterList{};
-    for (callee_preserved_regs) |reg| {
+    for (abi.Registers.all_preserved) |reg| {
         if (self.register_manager.isRegAllocated(reg)) {
-            save_reg_list.push(&callee_preserved_regs, reg);
+            save_reg_list.push(&abi.Registers.all_preserved, reg);
         }
     }
 
@@ -1600,6 +1600,33 @@ fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex {
     }));
 }
 
+fn typeRegClass(self: *Self, ty: Type) abi.RegisterClass {
+    const zcu = self.bin_file.comp.module.?;
+    return switch (ty.zigTypeTag(zcu)) {
+        .Float => .float,
+        .Vector => @panic("TODO: typeRegClass for Vectors"),
+        inline else => .int,
+    };
+}
+
+fn regGeneralClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet {
+    const zcu = self.bin_file.comp.module.?;
+    return switch (ty.zigTypeTag(zcu)) {
+        .Float => abi.Registers.Float.general_purpose,
+        .Vector => @panic("TODO: regGeneralClassForType for Vectors"),
+        else => abi.Registers.Integer.general_purpose,
+    };
+}
+
+fn regTempClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet {
+    const zcu = self.bin_file.comp.module.?;
+    return switch (ty.zigTypeTag(zcu)) {
+        .Float => abi.Registers.Float.temporary,
+        .Vector => @panic("TODO: regTempClassForType for Vectors"),
+        else => abi.Registers.Integer.temporary,
+    };
+}
+
 fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
     const zcu = self.bin_file.comp.module.?;
     const elem_ty = self.typeOfIndex(inst);
@@ -1608,11 +1635,15 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
         return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(zcu)});
     };
 
-    if (reg_ok) {
-        if (abi_size <= 8) {
-            if (self.register_manager.tryAllocReg(inst, gp)) |reg| {
-                return .{ .register = reg };
-            }
+    const min_size: u32 = switch (elem_ty.zigTypeTag(zcu)) {
+        .Float => 4,
+        .Vector => @panic("allocRegOrMem Vector"),
+        else => 8,
+    };
+
+    if (reg_ok and abi_size <= min_size) {
+        if (self.register_manager.tryAllocReg(inst, self.regGeneralClassForType(elem_ty))) |reg| {
+            return .{ .register = reg };
         }
     }
 
@@ -1623,19 +1654,37 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
 /// Allocates a register from the general purpose set and returns the Register and the Lock.
 ///
 /// Up to the caller to unlock the register later.
-fn allocReg(self: *Self) !struct { Register, RegisterLock } {
-    const reg = try self.register_manager.allocReg(null, gp);
+fn allocReg(self: *Self, reg_class: abi.RegisterClass) !struct { Register, RegisterLock } {
+    if (reg_class == .float and !self.hasFeature(.f))
+        std.debug.panic("allocReg class == float where F isn't enabled", .{});
+
+    const class = switch (reg_class) {
+        .int => abi.Registers.Integer.general_purpose,
+        .float => abi.Registers.Float.general_purpose,
+    };
+
+    const reg = try self.register_manager.allocReg(null, class);
     const lock = self.register_manager.lockRegAssumeUnused(reg);
     return .{ reg, lock };
 }
 
+/// Similar to `allocReg` but will copy the MCValue into the Register unless `operand` is already
+/// a register, in which case it will return a possible lock to that register.
+fn promoteReg(self: *Self, ty: Type, operand: MCValue) !struct { Register, ?RegisterLock } {
+    if (operand == .register) return .{ operand.register, self.register_manager.lockReg(operand.register) };
+
+    const reg, const lock = try self.allocReg(self.typeRegClass(ty));
+    try self.genSetReg(ty, reg, operand);
+    return .{ reg, lock };
+}
+
 fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register {
     const reg: Register = blk: {
         switch (index) {
             .immediate => |imm| {
                 // Optimisation: if index MCValue is an immediate, we can multiply in `comptime`
                 // and set the register directly to the scaled offset as an immediate.
-                const reg = try self.register_manager.allocReg(null, gp);
+                const reg = try self.register_manager.allocReg(null, self.regGeneralClassForType(index_ty));
                 try self.genSetReg(index_ty, reg, .{ .immediate = imm * elem_size });
                 break :blk reg;
             },
@@ -1671,7 +1720,8 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void
 /// allocated. A second call to `copyToTmpRegister` may return the same register.
 /// This can have a side effect of spilling instructions to the stack to free up a register.
 fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
-    const reg = try self.register_manager.allocReg(null, tp);
+    log.debug("copyToTmpRegister ty: {}", .{ty.fmt(self.bin_file.comp.module.?)});
+    const reg = try self.register_manager.allocReg(null, self.regTempClassForType(ty));
     try self.genSetReg(ty, reg, mcv);
     return reg;
 }
@@ -1680,7 +1730,8 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
 /// `reg_owner` is the instruction that gets associated with the register in the register table.
 /// This can have a side effect of spilling instructions to the stack to free up a register.
 fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, mcv: MCValue) !MCValue {
-    const reg = try self.register_manager.allocReg(reg_owner, gp);
+    const ty = self.typeOfIndex(reg_owner);
+    const reg = try self.register_manager.allocReg(reg_owner, self.regGeneralClassForType(ty));
     try self.genSetReg(self.typeOfIndex(reg_owner), reg, mcv);
     return MCValue{ .register = reg };
 }
@@ -1797,7 +1848,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void {
                     if (self.reuseOperand(inst, ty_op.operand, 0, operand) and operand == .register)
                     operand.register
                 else
-                    try self.register_manager.allocReg(inst, gp);
+                    (try self.allocRegOrMem(inst, true)).register;
 
                 _ = try self.addInst(.{
                     .tag = .pseudo,
@@ -1843,7 +1894,7 @@ fn airMinMax(
         const lhs_reg, const lhs_lock = blk: {
             if (lhs == .register) break :blk .{ lhs.register, self.register_manager.lockReg(lhs.register) };
 
-            const lhs_reg, const lhs_lock = try self.allocReg();
+            const lhs_reg, const lhs_lock = try self.allocReg(.int);
             try self.genSetReg(lhs_ty, lhs_reg, lhs);
             break :blk .{ lhs_reg, lhs_lock };
         };
@@ -1852,16 +1903,16 @@ fn airMinMax(
         const rhs_reg, const rhs_lock = blk: {
             if (rhs == .register) break :blk .{ rhs.register, self.register_manager.lockReg(rhs.register) };
 
-            const rhs_reg, const rhs_lock = try self.allocReg();
+            const rhs_reg, const rhs_lock = try self.allocReg(.int);
             try self.genSetReg(rhs_ty, rhs_reg, rhs);
             break :blk .{ rhs_reg, rhs_lock };
         };
         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const mask_reg, const mask_lock = try self.allocReg();
+        const mask_reg, const mask_lock = try self.allocReg(.int);
         defer self.register_manager.unlockReg(mask_lock);
 
-        const result_reg, const result_lock = try self.allocReg();
+        const result_reg, const result_lock = try self.allocReg(.int);
         defer self.register_manager.unlockReg(result_lock);
 
         _ = try self.addInst(.{
@@ -1955,20 +2006,6 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
-/// For all your binary operation needs, this function will generate
-/// the corresponding Mir instruction(s). Returns the location of the
-/// result.
-///
-/// If the binary operation itself happens to be an Air instruction,
-/// pass the corresponding index in the inst parameter. That helps
-/// this function do stuff like reusing operands.
-///
-/// This function does not do any lowering to Mir itself, but instead
-/// looks at the lhs and rhs and determines which kind of lowering
-/// would be best suitable and then delegates the lowering to other
-/// functions.
-///
-/// `maybe_inst` **needs** to be a bin_op, make sure of that.
 fn binOp(
     self: *Self,
     tag: Air.Inst.Tag,
@@ -1991,11 +2028,18 @@ fn binOp(
         .cmp_lt,
         .cmp_lte,
         => {
+            assert(lhs_ty.eql(rhs_ty, zcu));
             switch (lhs_ty.zigTypeTag(zcu)) {
-                .Float => return self.fail("TODO binary operations on floats", .{}),
+                .Float => {
+                    const float_bits = lhs_ty.floatBits(zcu.getTarget());
+                    if (float_bits <= 32) {
+                        return self.binOpFloat(tag, lhs, lhs_ty, rhs, rhs_ty);
+                    } else {
+                        return self.fail("TODO: binary operations for  floats with bits > 32", .{});
+                    }
+                },
                 .Vector => return self.fail("TODO binary operations on vectors", .{}),
                 .Int, .Enum, .ErrorSet => {
-                    assert(lhs_ty.eql(rhs_ty, zcu));
                     const int_info = lhs_ty.intInfo(zcu);
                     if (int_info.bits <= 64) {
                         return self.binOpRegister(tag, lhs, lhs_ty, rhs, rhs_ty);
@@ -2071,14 +2115,7 @@ fn binOp(
         else => return self.fail("TODO binOp {}", .{tag}),
     }
 }
-/// Don't call this function directly. Use binOp instead.
-///
-/// Calling this function signals an intention to generate a Mir
-/// instruction of the form
-///
-///     op dest, lhs, rhs
-///
-/// Asserts that generating an instruction of that form is possible.
+
 fn binOpRegister(
     self: *Self,
     tag: Air.Inst.Tag,
@@ -2087,25 +2124,13 @@ fn binOpRegister(
     rhs: MCValue,
     rhs_ty: Type,
 ) !MCValue {
-    const lhs_reg, const lhs_lock = blk: {
-        if (lhs == .register) break :blk .{ lhs.register, self.register_manager.lockReg(lhs.register) };
-
-        const lhs_reg, const lhs_lock = try self.allocReg();
-        try self.genSetReg(lhs_ty, lhs_reg, lhs);
-        break :blk .{ lhs_reg, lhs_lock };
-    };
+    const lhs_reg, const lhs_lock = try self.promoteReg(lhs_ty, lhs);
     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-    const rhs_reg, const rhs_lock = blk: {
-        if (rhs == .register) break :blk .{ rhs.register, self.register_manager.lockReg(rhs.register) };
-
-        const rhs_reg, const rhs_lock = try self.allocReg();
-        try self.genSetReg(rhs_ty, rhs_reg, rhs);
-        break :blk .{ rhs_reg, rhs_lock };
-    };
+    const rhs_reg, const rhs_lock = try self.promoteReg(rhs_ty, rhs);
     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-    const dest_reg, const dest_lock = try self.allocReg();
+    const dest_reg, const dest_lock = try self.allocReg(.int);
     defer self.register_manager.unlockReg(dest_lock);
 
     const mir_tag: Mir.Inst.Tag = switch (tag) {
@@ -2184,7 +2209,50 @@ fn binOpRegister(
         else => unreachable,
     }
 
-    // generate the struct for OF checks
+    return MCValue{ .register = dest_reg };
+}
+
+fn binOpFloat(
+    self: *Self,
+    tag: Air.Inst.Tag,
+    lhs: MCValue,
+    lhs_ty: Type,
+    rhs: MCValue,
+    rhs_ty: Type,
+) !MCValue {
+    const zcu = self.bin_file.comp.module.?;
+    const float_bits = lhs_ty.floatBits(zcu.getTarget());
+
+    const lhs_reg, const lhs_lock = try self.promoteReg(lhs_ty, lhs);
+    defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const rhs_reg, const rhs_lock = try self.promoteReg(rhs_ty, rhs);
+    defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const mir_tag: Mir.Inst.Tag = switch (tag) {
+        .add => if (float_bits == 32) .fadds else .faddd,
+        .cmp_eq => if (float_bits == 32) .feqs else .feqd,
+        else => return self.fail("TODO: binOpFloat mir_tag {s}", .{@tagName(tag)}),
+    };
+
+    const return_class: abi.RegisterClass = switch (tag) {
+        .add => .float,
+        .cmp_eq => .int,
+        else => unreachable,
+    };
+
+    const dest_reg, const dest_lock = try self.allocReg(return_class);
+    defer self.register_manager.unlockReg(dest_lock);
+
+    _ = try self.addInst(.{
+        .tag = mir_tag,
+        .ops = .rrr,
+        .data = .{ .r_type = .{
+            .rd = dest_reg,
+            .rs1 = lhs_reg,
+            .rs2 = rhs_reg,
+        } },
+    });
 
     return MCValue{ .register = dest_reg };
 }
@@ -2279,7 +2347,7 @@ fn airAddWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
 
             const shift_amount: u6 = @intCast(Type.usize.bitSize(zcu) - int_info.bits);
 
-            const shift_reg, const shift_lock = try self.allocReg();
+            const shift_reg, const shift_lock = try self.allocReg(.int);
             defer self.register_manager.unlockReg(shift_lock);
 
             _ = try self.addInst(.{
@@ -2357,25 +2425,13 @@ fn airSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
         const result_mcv = try self.allocRegOrMem(inst, false);
         const offset = result_mcv.load_frame;
 
-        const lhs_reg, const lhs_lock = blk: {
-            if (lhs == .register) break :blk .{ lhs.register, self.register_manager.lockReg(lhs.register) };
-
-            const lhs_reg, const lhs_lock = try self.allocReg();
-            try self.genSetReg(lhs_ty, lhs_reg, lhs);
-            break :blk .{ lhs_reg, lhs_lock };
-        };
+        const lhs_reg, const lhs_lock = try self.promoteReg(lhs_ty, lhs);
         defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const rhs_reg, const rhs_lock = blk: {
-            if (rhs == .register) break :blk .{ rhs.register, self.register_manager.lockReg(rhs.register) };
-
-            const rhs_reg, const rhs_lock = try self.allocReg();
-            try self.genSetReg(rhs_ty, rhs_reg, rhs);
-            break :blk .{ rhs_reg, rhs_lock };
-        };
+        const rhs_reg, const rhs_lock = try self.promoteReg(rhs_ty, rhs);
         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const dest_reg, const dest_lock = try self.allocReg();
+        const dest_reg, const dest_lock = try self.allocReg(.int);
         defer self.register_manager.unlockReg(dest_lock);
 
         switch (int_info.signedness) {
@@ -2503,18 +2559,12 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                                         1...8 => {
                                             const max_val = std.math.pow(u16, 2, int_info.bits) - 1;
 
-                                            const overflow_reg, const overflow_lock = try self.allocReg();
-                                            defer self.register_manager.unlockReg(overflow_lock);
-
-                                            const add_reg, const add_lock = blk: {
-                                                if (dest == .register) break :blk .{ dest.register, null };
-
-                                                const add_reg, const add_lock = try self.allocReg();
-                                                try self.genSetReg(lhs_ty, add_reg, dest);
-                                                break :blk .{ add_reg, add_lock };
-                                            };
+                                            const add_reg, const add_lock = try self.promoteReg(lhs_ty, lhs);
                                             defer if (add_lock) |lock| self.register_manager.unlockReg(lock);
 
+                                            const overflow_reg, const overflow_lock = try self.allocReg(.int);
+                                            defer self.register_manager.unlockReg(overflow_lock);
+
                                             _ = try self.addInst(.{
                                                 .tag = .andi,
                                                 .ops = .rri,
@@ -2595,25 +2645,13 @@ fn airBitAnd(self: *Self, inst: Air.Inst.Index) !void {
         const lhs_ty = self.typeOf(bin_op.lhs);
         const rhs_ty = self.typeOf(bin_op.rhs);
 
-        const lhs_reg, const lhs_lock = blk: {
-            if (lhs == .register) break :blk .{ lhs.register, self.register_manager.lockReg(lhs.register) };
-
-            const lhs_reg, const lhs_lock = try self.allocReg();
-            try self.genSetReg(lhs_ty, lhs_reg, lhs);
-            break :blk .{ lhs_reg, lhs_lock };
-        };
+        const lhs_reg, const lhs_lock = try self.promoteReg(lhs_ty, lhs);
         defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const rhs_reg, const rhs_lock = blk: {
-            if (rhs == .register) break :blk .{ rhs.register, self.register_manager.lockReg(rhs.register) };
-
-            const rhs_reg, const rhs_lock = try self.allocReg();
-            try self.genSetReg(rhs_ty, rhs_reg, rhs);
-            break :blk .{ rhs_reg, rhs_lock };
-        };
+        const rhs_reg, const rhs_lock = try self.promoteReg(rhs_ty, rhs);
         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const dest_reg, const dest_lock = try self.allocReg();
+        const dest_reg, const dest_lock = try self.allocReg(.int);
         defer self.register_manager.unlockReg(dest_lock);
 
         _ = try self.addInst(.{
@@ -2640,25 +2678,13 @@ fn airBitOr(self: *Self, inst: Air.Inst.Index) !void {
         const lhs_ty = self.typeOf(bin_op.lhs);
         const rhs_ty = self.typeOf(bin_op.rhs);
 
-        const lhs_reg, const lhs_lock = blk: {
-            if (lhs == .register) break :blk .{ lhs.register, self.register_manager.lockReg(lhs.register) };
-
-            const lhs_reg, const lhs_lock = try self.allocReg();
-            try self.genSetReg(lhs_ty, lhs_reg, lhs);
-            break :blk .{ lhs_reg, lhs_lock };
-        };
+        const lhs_reg, const lhs_lock = try self.promoteReg(lhs_ty, lhs);
         defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const rhs_reg, const rhs_lock = blk: {
-            if (rhs == .register) break :blk .{ rhs.register, self.register_manager.lockReg(rhs.register) };
-
-            const rhs_reg, const rhs_lock = try self.allocReg();
-            try self.genSetReg(rhs_ty, rhs_reg, rhs);
-            break :blk .{ rhs_reg, rhs_lock };
-        };
+        const rhs_reg, const rhs_lock = try self.promoteReg(rhs_ty, rhs);
         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const dest_reg, const dest_lock = try self.allocReg();
+        const dest_reg, const dest_lock = try self.allocReg(.int);
         defer self.register_manager.unlockReg(dest_lock);
 
         _ = try self.addInst(.{
@@ -3102,7 +3128,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
         const elem_ty = array_ty.childType(zcu);
         const elem_abi_size = elem_ty.abiSize(zcu);
 
-        const addr_reg, const addr_reg_lock = try self.allocReg();
+        const addr_reg, const addr_reg_lock = try self.allocReg(.int);
         defer self.register_manager.unlockReg(addr_reg_lock);
 
         switch (array_mcv) {
@@ -3211,46 +3237,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-    const result: MCValue = if (self.liveness.isUnused(inst)) .unreach else result: {
-        const operand = try self.resolveInst(ty_op.operand);
-        const operand_ty = self.typeOf(ty_op.operand);
-
-        const dest_reg = try self.register_manager.allocReg(inst, gp);
-
-        const source_reg, const source_lock = blk: {
-            if (operand == .register) break :blk .{ operand.register, null };
-
-            const source_reg, const source_lock = try self.allocReg();
-            try self.genSetReg(operand_ty, source_reg, operand);
-            break :blk .{ source_reg, source_lock };
-        };
-        defer if (source_lock) |lock| self.register_manager.unlockReg(lock);
-
-        // TODO: the B extension for RISCV should have the ctz instruction, and we should use it.
-
-        try self.ctz(source_reg, dest_reg, operand_ty);
-
-        break :result .{ .register = dest_reg };
-    };
-    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
-}
-
-fn ctz(self: *Self, src: Register, dst: Register, ty: Type) !void {
-    const zcu = self.bin_file.comp.module.?;
-    const length = (ty.abiSize(zcu) * 8) - 1;
-
-    const count_reg, const count_lock = try self.allocReg();
-    defer self.register_manager.unlockReg(count_lock);
-
-    const len_reg, const len_lock = try self.allocReg();
-    defer self.register_manager.unlockReg(len_lock);
-
-    try self.genSetReg(Type.usize, count_reg, .{ .immediate = 0 });
-    try self.genSetReg(Type.usize, len_reg, .{ .immediate = length });
-
-    _ = src;
-    _ = dst;
-
+    _ = ty_op;
     return self.fail("TODO: finish ctz", .{});
 }
 
@@ -3267,38 +3254,18 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
         const ty = self.typeOf(ty_op.operand);
         const scalar_ty = ty.scalarType(zcu);
         const operand = try self.resolveInst(ty_op.operand);
+        _ = operand;
 
         switch (scalar_ty.zigTypeTag(zcu)) {
             .Int => if (ty.zigTypeTag(zcu) == .Vector) {
                 return self.fail("TODO implement airAbs for {}", .{ty.fmt(zcu)});
             } else {
-                const int_bits = ty.intInfo(zcu).bits;
-
-                if (int_bits > 32) {
-                    return self.fail("TODO: airAbs for larger than 32 bits", .{});
-                }
-
-                // promote the src into a register
-                const src_mcv = try self.copyToNewRegister(inst, operand);
-                // temp register for shift
-                const temp_reg = try self.register_manager.allocReg(inst, gp);
-
-                _ = try self.addInst(.{
-                    .tag = .abs,
-                    .ops = .rri,
-                    .data = .{
-                        .i_type = .{
-                            .rs1 = src_mcv.register,
-                            .rd = temp_reg,
-                            .imm12 = Immediate.s(int_bits - 1),
-                        },
-                    },
-                });
-
-                break :result src_mcv;
+                return self.fail("TODO: implement airAbs for Int", .{});
             },
             else => return self.fail("TODO: implement airAbs {}", .{scalar_ty.fmt(zcu)}),
         }
+
+        break :result .{.unreach};
     };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
@@ -3317,15 +3284,24 @@ fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
             return self.finishAir(inst, operand, .{ ty_op.operand, .none, .none });
         }
 
-        const dest_reg = try self.register_manager.allocReg(null, gp);
-        try self.genSetReg(ty, dest_reg, operand);
-
-        const dest_mcv: MCValue = .{ .register = dest_reg };
+        const dest_mcv = try self.copyToNewRegister(inst, operand);
+        const dest_reg = dest_mcv.register;
 
         switch (int_bits) {
             16 => {
-                const temp = try self.binOp(.shr, dest_mcv, ty, .{ .immediate = 8 }, Type.u8);
-                assert(temp == .register);
+                const temp_reg, const temp_lock = try self.allocReg(.int);
+                defer self.register_manager.unlockReg(temp_lock);
+
+                _ = try self.addInst(.{
+                    .tag = .srli,
+                    .ops = .rri,
+                    .data = .{ .i_type = .{
+                        .imm12 = Immediate.s(8),
+                        .rd = temp_reg,
+                        .rs1 = dest_reg,
+                    } },
+                });
+
                 _ = try self.addInst(.{
                     .tag = .slli,
                     .ops = .rri,
@@ -3341,7 +3317,7 @@ fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void {
                     .data = .{ .r_type = .{
                         .rd = dest_reg,
                         .rs1 = dest_reg,
-                        .rs2 = temp.register,
+                        .rs2 = temp_reg,
                     } },
                 });
             },
@@ -3360,11 +3336,12 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
 }
 
 fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
+    const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)];
     const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
     const result: MCValue = if (self.liveness.isUnused(inst))
         .unreach
     else
-        return self.fail("TODO implement airUnaryMath for {}", .{self.target.cpu.arch});
+        return self.fail("TODO implementairUnaryMath {s} for {}", .{ @tagName(tag), self.target.cpu.arch });
     return self.finishAir(inst, result, .{ un_op, .none, .none });
 }
 
@@ -3640,7 +3617,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
                             @intCast(field_bit_size),
                         );
 
-                        const dst_reg, const dst_lock = try self.allocReg();
+                        const dst_reg, const dst_lock = try self.allocReg(.int);
                         const dst_mcv = MCValue{ .register = dst_reg };
                         defer self.register_manager.unlockReg(dst_lock);
 
@@ -3658,7 +3635,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
                         break :dst dst_mcv;
                     };
                     if (field_abi_size * 8 > field_bit_size and dst_mcv.isMemory()) {
-                        const tmp_reg, const tmp_lock = try self.allocReg();
+                        const tmp_reg, const tmp_lock = try self.allocReg(.int);
                         defer self.register_manager.unlockReg(tmp_lock);
 
                         const hi_mcv =
@@ -3972,7 +3949,7 @@ fn genCall(
                 }
             } else {
                 assert(self.typeOf(callee).zigTypeTag(zcu) == .Pointer);
-                const addr_reg, const addr_lock = try self.allocReg();
+                const addr_reg, const addr_lock = try self.allocReg(.int);
                 defer self.register_manager.unlockReg(addr_lock);
                 try self.genSetReg(Type.usize, addr_reg, .{ .air_ref = callee });
 
@@ -4072,32 +4049,49 @@ fn airCmp(self: *Self, inst: Air.Inst.Index) !void {
         const rhs = try self.resolveInst(bin_op.rhs);
         const lhs_ty = self.typeOf(bin_op.lhs);
 
-        const int_ty = switch (lhs_ty.zigTypeTag(zcu)) {
-            .Vector => unreachable, // Handled by cmp_vector.
-            .Enum => lhs_ty.intTagType(zcu),
-            .Int => lhs_ty,
-            .Bool => Type.u1,
-            .Pointer => Type.usize,
-            .ErrorSet => Type.u16,
-            .Optional => blk: {
-                const payload_ty = lhs_ty.optionalChild(zcu);
-                if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
-                    break :blk Type.u1;
-                } else if (lhs_ty.isPtrLikeOptional(zcu)) {
-                    break :blk Type.usize;
+        switch (lhs_ty.zigTypeTag(zcu)) {
+            .Int,
+            .Enum,
+            .Bool,
+            .Pointer,
+            .ErrorSet,
+            .Optional,
+            => {
+                const int_ty = switch (lhs_ty.zigTypeTag(zcu)) {
+                    .Enum => lhs_ty.intTagType(zcu),
+                    .Int => lhs_ty,
+                    .Bool => Type.u1,
+                    .Pointer => Type.usize,
+                    .ErrorSet => Type.u16,
+                    .Optional => blk: {
+                        const payload_ty = lhs_ty.optionalChild(zcu);
+                        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+                            break :blk Type.u1;
+                        } else if (lhs_ty.isPtrLikeOptional(zcu)) {
+                            break :blk Type.usize;
+                        } else {
+                            return self.fail("TODO riscv cmp non-pointer optionals", .{});
+                        }
+                    },
+                    else => unreachable,
+                };
+
+                const int_info = int_ty.intInfo(zcu);
+                if (int_info.bits <= 64) {
+                    break :result try self.binOp(tag, lhs, int_ty, rhs, int_ty);
                 } else {
-                    return self.fail("TODO riscv cmp non-pointer optionals", .{});
+                    return self.fail("TODO riscv cmp for ints > 64 bits", .{});
                 }
             },
-            .Float => return self.fail("TODO riscv cmp floats", .{}),
-            else => unreachable,
-        };
+            .Float => {
+                const float_bits = lhs_ty.floatBits(self.target.*);
+                if (float_bits > 32) {
+                    return self.fail("TODO: airCmp float > 32 bits", .{});
+                }
 
-        const int_info = int_ty.intInfo(zcu);
-        if (int_info.bits <= 64) {
-            break :result try self.binOp(tag, lhs, int_ty, rhs, int_ty);
-        } else {
-            return self.fail("TODO riscv cmp for ints > 64 bits", .{});
+                break :result try self.binOpFloat(tag, lhs, lhs_ty, rhs, lhs_ty);
+            },
+            else => unreachable,
         }
     };
 
@@ -4716,25 +4710,13 @@ fn airBoolOp(self: *Self, inst: Air.Inst.Index) !void {
         const lhs_ty = Type.bool;
         const rhs_ty = Type.bool;
 
-        const lhs_reg, const lhs_lock = blk: {
-            if (lhs == .register) break :blk .{ lhs.register, self.register_manager.lockReg(lhs.register) };
-
-            const lhs_reg, const lhs_lock = try self.allocReg();
-            try self.genSetReg(lhs_ty, lhs_reg, lhs);
-            break :blk .{ lhs_reg, lhs_lock };
-        };
+        const lhs_reg, const lhs_lock = try self.promoteReg(lhs_ty, lhs);
         defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const rhs_reg, const rhs_lock = blk: {
-            if (rhs == .register) break :blk .{ rhs.register, self.register_manager.lockReg(rhs.register) };
-
-            const rhs_reg, const rhs_lock = try self.allocReg();
-            try self.genSetReg(rhs_ty, rhs_reg, rhs);
-            break :blk .{ rhs_reg, rhs_lock };
-        };
+        const rhs_reg, const rhs_lock = try self.promoteReg(rhs_ty, rhs);
         defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const result_reg, const result_lock = try self.allocReg();
+        const result_reg, const result_lock = try self.allocReg(.int);
         defer self.register_manager.unlockReg(result_lock);
 
         _ = try self.addInst(.{
@@ -4881,7 +4863,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAirResult(inst, result);
 }
 
-/// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
+/// Sets the value of `dst_mcv` to the value of `src_mcv`.
 fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
     const zcu = self.bin_file.comp.module.?;
 
@@ -4890,7 +4872,7 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
 
     if (!dst_mcv.isMutable()) {
         // panic so we can see the trace
-        return self.fail("tried to genCopy immutable: {s}", .{@tagName(dst_mcv)});
+        return std.debug.panic("tried to genCopy immutable: {s}", .{@tagName(dst_mcv)});
     }
 
     switch (dst_mcv) {
@@ -4924,13 +4906,12 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
         ),
         .memory => return self.fail("TODO: genCopy memory", .{}),
         .register_pair => |dst_regs| {
-            const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) {
+            const src_info: ?struct { addr_reg: Register, addr_lock: ?RegisterLock } = switch (src_mcv) {
                 .register_pair, .memory, .indirect, .load_frame => null,
                 .load_symbol => src: {
-                    const src_addr_reg, const src_addr_lock = try self.allocReg();
+                    const src_addr_reg, const src_addr_lock = try self.promoteReg(Type.usize, src_mcv.address());
                     errdefer self.register_manager.unlockReg(src_addr_lock);
 
-                    try self.genSetReg(Type.usize, src_addr_reg, src_mcv.address());
                     break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock };
                 },
                 .air_ref => |src_ref| return self.genCopy(
@@ -4940,7 +4921,12 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
                 ),
                 else => unreachable,
             };
-            defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
+
+            defer if (src_info) |info| {
+                if (info.addr_lock) |lock| {
+                    self.register_manager.unlockReg(lock);
+                }
+            };
 
             var part_disp: i32 = 0;
             for (dst_regs, try self.splitType(ty), 0..) |dst_reg, dst_ty, part_i| {
@@ -4966,7 +4952,7 @@ fn genInlineMemcpy(
     src_ptr: MCValue,
     len: MCValue,
 ) !void {
-    const regs = try self.register_manager.allocRegs(4, .{null} ** 4, tp);
+    const regs = try self.register_manager.allocRegs(4, .{null} ** 4, abi.Registers.Integer.temporary);
     const locks = self.register_manager.lockRegsAssumeUnused(4, regs);
     defer for (locks) |lock| self.register_manager.unlockReg(lock);
 
@@ -5060,9 +5046,7 @@ fn genInlineMemcpy(
     _ = try self.addInst(.{
         .tag = .pseudo,
         .ops = .pseudo_j,
-        .data = .{
-            .inst = first_inst,
-        },
+        .data = .{ .inst = first_inst },
     });
 }
 
@@ -5072,7 +5056,7 @@ fn genInlineMemset(
     src_value: MCValue,
     len: MCValue,
 ) !void {
-    const regs = try self.register_manager.allocRegs(3, .{null} ** 3, tp);
+    const regs = try self.register_manager.allocRegs(3, .{null} ** 3, abi.Registers.Integer.temporary);
     const locks = self.register_manager.lockRegsAssumeUnused(3, regs);
     defer for (locks) |lock| self.register_manager.unlockReg(lock);
 
@@ -5153,6 +5137,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
 
     if (abi_size > 8) return std.debug.panic("tried to set reg with size {}", .{abi_size});
 
+    const dst_reg_class = reg.class();
+
     switch (src_mcv) {
         .dead => unreachable,
         .unreach, .none => return, // Nothing to do.
@@ -5163,6 +5149,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
             return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa });
         },
         .immediate => |unsigned_x| {
+            assert(dst_reg_class == .int);
+
             const x: i64 = @bitCast(unsigned_x);
             if (math.minInt(i12) <= x and x <= math.maxInt(i12)) {
                 _ = try self.addInst(.{
@@ -5200,7 +5188,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
                 // TODO: use a more advanced myriad seq to do this without a reg.
                 // see: https://github.com/llvm/llvm-project/blob/081a66ffacfe85a37ff775addafcf3371e967328/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp#L224
 
-                const temp, const temp_lock = try self.allocReg();
+                const temp, const temp_lock = try self.allocReg(.int);
                 defer self.register_manager.unlockReg(temp_lock);
 
                 const lo32: i32 = @truncate(x);
@@ -5236,6 +5224,19 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
             if (src_reg.id() == reg.id())
                 return;
 
+            const src_reg_class = src_reg.class();
+
+            if (src_reg_class == .float) {
+                if (dst_reg_class == .float) {
+                    return self.fail("TODO: genSetReg float -> float", .{});
+                }
+
+                assert(dst_reg_class == .int); // a bit of future proofing
+
+                // to move from float -> int, we use FMV.X.W
+                return self.fail("TODO: genSetReg float -> int", .{});
+            }
+
             // mv reg, src_reg
             _ = try self.addInst(.{
                 .tag = .pseudo,
@@ -5309,11 +5310,19 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
             });
         },
         .indirect => |reg_off| {
+            const float_class = dst_reg_class == .float;
+
             const load_tag: Mir.Inst.Tag = switch (abi_size) {
-                1 => .lb,
-                2 => .lh,
-                4 => .lw,
-                8 => .ld,
+                1 => if (float_class)
+                    unreachable // Zig does not support 8-bit floats
+                else
+                    .lb,
+                2 => if (float_class)
+                    return self.fail("TODO: genSetReg indirect 16-bit float", .{})
+                else
+                    .lh,
+                4 => if (float_class) .flw else .lw,
+                8 => if (float_class) .fld else .ld,
                 else => return std.debug.panic("TODO: genSetReg for size {d}", .{abi_size}),
             };
 
@@ -5336,15 +5345,18 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
                 .tag = .pseudo,
                 .ops = .pseudo_load_symbol,
                 .data = .{ .payload = try self.addExtra(Mir.LoadSymbolPayload{
-                    .register = reg.id(),
+                    .register = reg.encodeId(),
                     .atom_index = atom_index,
                     .sym_index = sym_off.sym,
                 }) },
             });
         },
         .load_symbol => {
-            try self.genSetReg(ty, reg, src_mcv.address());
-            try self.genSetReg(ty, reg, .{ .indirect = .{ .reg = reg } });
+            const addr_reg, const addr_lock = try self.allocReg(.int);
+            defer self.register_manager.unlockReg(addr_lock);
+
+            try self.genSetReg(ty, addr_reg, src_mcv.address());
+            try self.genSetReg(ty, reg, .{ .indirect = .{ .reg = addr_reg } });
         },
         .air_ref => |ref| try self.genSetReg(ty, reg, try self.resolveInst(ref)),
         else => return self.fail("TODO: genSetReg {s}", .{@tagName(src_mcv)}),
@@ -5386,7 +5398,8 @@ fn genSetMem(
         => switch (abi_size) {
             0 => {},
             1, 2, 4, 8 => {
-                const src_reg = try self.copyToTmpRegister(ty, src_mcv);
+                // no matter what type, it should use an integer register
+                const src_reg = try self.copyToTmpRegister(Type.usize, src_mcv);
                 const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
                 defer self.register_manager.unlockReg(src_lock);
 
@@ -5460,10 +5473,8 @@ fn genSetMem(
         .immediate => {
             // TODO: remove this lock in favor of a copyToTmpRegister when we load 64 bit immediates with
             // a register allocation.
-            const reg, const reg_lock = try self.allocReg();
-            defer self.register_manager.unlockReg(reg_lock);
-
-            try self.genSetReg(ty, reg, src_mcv);
+            const reg, const reg_lock = try self.promoteReg(ty, src_mcv);
+            defer if (reg_lock) |lock| self.register_manager.unlockReg(lock);
 
             return self.genSetMem(base, disp, ty, .{ .register = reg });
         },
@@ -5632,7 +5643,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
                 assert(len != 0); // prevented by Sema
                 try self.store(dst_ptr, src_val, elem_ptr_ty, elem_ty);
 
-                const second_elem_ptr_reg, const second_elem_ptr_lock = try self.allocReg();
+                const second_elem_ptr_reg, const second_elem_ptr_lock = try self.allocReg(.int);
                 defer self.register_manager.unlockReg(second_elem_ptr_lock);
 
                 const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
@@ -5677,7 +5688,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
     const err_lock = self.register_manager.lockRegAssumeUnused(err_reg);
     defer self.register_manager.unlockReg(err_lock);
 
-    const addr_reg, const addr_lock = try self.allocReg();
+    const addr_reg, const addr_lock = try self.allocReg(.int);
     defer self.register_manager.unlockReg(addr_lock);
 
     // this is now the base address of the error name table
@@ -5691,13 +5702,13 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
         return self.fail("TODO: riscv non-elf", .{});
     }
 
-    const start_reg, const start_lock = try self.allocReg();
+    const start_reg, const start_lock = try self.allocReg(.int);
     defer self.register_manager.unlockReg(start_lock);
 
-    const end_reg, const end_lock = try self.allocReg();
+    const end_reg, const end_lock = try self.allocReg(.int);
     defer self.register_manager.unlockReg(end_lock);
 
-    // const tmp_reg, const tmp_lock = try self.allocReg();
+    // const tmp_reg, const tmp_lock = try self.allocReg(.int);
     // defer self.register_manager.unlockReg(tmp_lock);
 
     // we move the base address forward by the following formula: base + (errno * 8)
@@ -6025,16 +6036,16 @@ fn resolveCallingConventionValues(
 
                 for (classes) |class| switch (class) {
                     .integer => {
-                        const ret_int_reg = abi.function_ret_regs[ret_int_reg_i];
+                        const ret_int_reg = abi.Registers.Integer.function_ret_regs[ret_int_reg_i];
                         ret_int_reg_i += 1;
 
                         ret_tracking[ret_tracking_i] = InstTracking.init(.{ .register = ret_int_reg });
                         ret_tracking_i += 1;
                     },
                     .memory => {
-                        const ret_int_reg = abi.function_ret_regs[ret_int_reg_i];
+                        const ret_int_reg = abi.Registers.Integer.function_ret_regs[ret_int_reg_i];
                         ret_int_reg_i += 1;
-                        const ret_indirect_reg = abi.function_arg_regs[param_int_reg_i];
+                        const ret_indirect_reg = abi.Registers.Integer.function_arg_regs[param_int_reg_i];
                         param_int_reg_i += 1;
 
                         ret_tracking[ret_tracking_i] = .{
@@ -6069,7 +6080,7 @@ fn resolveCallingConventionValues(
 
                 for (classes) |class| switch (class) {
                     .integer => {
-                        const param_int_regs = abi.function_arg_regs;
+                        const param_int_regs = abi.Registers.Integer.function_arg_regs;
                         if (param_int_reg_i >= param_int_regs.len) break;
 
                         const param_int_reg = param_int_regs[param_int_reg_i];
@@ -6079,7 +6090,7 @@ fn resolveCallingConventionValues(
                         arg_mcv_i += 1;
                     },
                     .memory => {
-                        const param_int_regs = abi.function_arg_regs;
+                        const param_int_regs = abi.Registers.Integer.function_arg_regs;
 
                         const param_int_reg = param_int_regs[param_int_reg_i];
                         param_int_reg_i += 1;
src/arch/riscv64/Encoding.zig
@@ -1,7 +1,65 @@
 mnemonic: Mnemonic,
 data: Data,
 
+const OpCode = enum(u7) {
+    OP = 0b0110011,
+    OP_IMM = 0b0010011,
+    OP_32 = 0b0111011,
+
+    BRANCH = 0b1100011,
+    LOAD = 0b0000011,
+    STORE = 0b0100011,
+    SYSTEM = 0b1110011,
+
+    OP_FP = 0b1010011,
+    LOAD_FP = 0b0000111,
+    STORE_FP = 0b0100111,
+
+    JALR = 0b1100111,
+    AUIPC = 0b0010111,
+    LUI = 0b0110111,
+    JAL = 0b1101111,
+    NONE = 0b0000000,
+};
+
+const Fmt = enum(u2) {
+    /// 32-bit single-precision
+    S = 0b00,
+    /// 64-bit double-precision
+    D = 0b01,
+    _reserved = 0b10,
+    /// 128-bit quad-precision
+    Q = 0b11,
+};
+
+const Enc = struct {
+    opcode: OpCode,
+
+    data: union(enum) {
+        /// funct3 + funct7
+        ff: struct {
+            funct3: u3,
+            funct7: u7,
+        },
+        /// funct3 + offset
+        fo: struct {
+            funct3: u3,
+            offset: u12 = 0,
+        },
+        /// funct5 + rm + fmt
+        fmt: struct {
+            funct5: u5,
+            rm: u3,
+            fmt: Fmt,
+        },
+        /// U-type
+        none,
+    },
+};
+
 pub const Mnemonic = enum {
+    // base mnemonics
+
     // I Type
     ld,
     lw,
@@ -10,6 +68,7 @@ pub const Mnemonic = enum {
     lhu,
     lb,
     lbu,
+
     sltiu,
     xori,
     andi,
@@ -52,56 +111,130 @@ pub const Mnemonic = enum {
     ebreak,
     unimp,
 
+    // float mnemonics
+    fadds,
+    faddd,
+
+    feqs,
+    feqd,
+
+    fld,
+    flw,
+
+    fsd,
+    fsw,
+
     pub fn encoding(mnem: Mnemonic) Enc {
         return switch (mnem) {
             // zig fmt: off
-            .add    => .{ .opcode = 0b0110011, .funct3 = 0b000, .funct7 = 0b0000000 },
-            .sltu   => .{ .opcode = 0b0110011, .funct3 = 0b011, .funct7 = 0b0000000 },
-            .@"and" => .{ .opcode = 0b0110011, .funct3 = 0b111, .funct7 = 0b0000000 },
-            .@"or"  => .{ .opcode = 0b0110011, .funct3 = 0b110, .funct7 = 0b0000000 },
-            .sub    => .{ .opcode = 0b0110011, .funct3 = 0b000, .funct7 = 0b0100000 }, 
-
-            .ld     => .{ .opcode = 0b0000011, .funct3 = 0b011, .funct7 = null      },
-            .lw     => .{ .opcode = 0b0000011, .funct3 = 0b010, .funct7 = null      },
-            .lwu    => .{ .opcode = 0b0000011, .funct3 = 0b110, .funct7 = null      },
-            .lh     => .{ .opcode = 0b0000011, .funct3 = 0b001, .funct7 = null      },
-            .lhu    => .{ .opcode = 0b0000011, .funct3 = 0b101, .funct7 = null      },
-            .lb     => .{ .opcode = 0b0000011, .funct3 = 0b000, .funct7 = null      },
-            .lbu    => .{ .opcode = 0b0000011, .funct3 = 0b100, .funct7 = null      },
-
-            .sltiu  => .{ .opcode = 0b0010011, .funct3 = 0b011, .funct7 = null      },
-
-            .addi   => .{ .opcode = 0b0010011, .funct3 = 0b000, .funct7 = null      },
-            .andi   => .{ .opcode = 0b0010011, .funct3 = 0b111, .funct7 = null      },
-            .xori   => .{ .opcode = 0b0010011, .funct3 = 0b100, .funct7 = null      },
-            .jalr   => .{ .opcode = 0b1100111, .funct3 = 0b000, .funct7 = null      },
-            .slli   => .{ .opcode = 0b0010011, .funct3 = 0b001, .funct7 = null      },
-            .srli   => .{ .opcode = 0b0010011, .funct3 = 0b101, .funct7 = null      },
-            .srai   => .{ .opcode = 0b0010011, .funct3 = 0b101, .funct7 = null,   .offset = 1 << 10  },
+
+            // OP
+
+            .add    => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b000, .funct7 = 0b0000000 } } },
+            .sub    => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b000, .funct7 = 0b0100000 } } }, 
+
+            .@"and" => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b111, .funct7 = 0b0000000 } } },
+            .@"or"  => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b110, .funct7 = 0b0000000 } } },
+            .xor    => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b100, .funct7 = 0b0000000 } } },
+
+            .sltu   => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b011, .funct7 = 0b0000000 } } },
+            .slt    => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b010, .funct7 = 0b0000000 } } },
+
+            .mul    => .{ .opcode = .OP, .data = .{ .ff = .{ .funct3 = 0b000, .funct7 = 0b0000001 } } },
+
+
+            // OP_IMM
+
+            .addi   => .{ .opcode = .OP_IMM, .data = .{ .fo = .{ .funct3 = 0b000 } } },
+            .andi   => .{ .opcode = .OP_IMM, .data = .{ .fo = .{ .funct3 = 0b111 } } },
+            .xori   => .{ .opcode = .OP_IMM, .data = .{ .fo = .{ .funct3 = 0b100 } } },
             
-            .sllw   => .{ .opcode = 0b0111011, .funct3 = 0b001, .funct7 = 0b0000000 },
+            .sltiu  => .{ .opcode = .OP_IMM, .data = .{ .fo = .{ .funct3 = 0b011 } } },
+
+            .slli   => .{ .opcode = .OP_IMM, .data = .{ .fo = .{ .funct3 = 0b001 } } },
+            .srli   => .{ .opcode = .OP_IMM, .data = .{ .fo = .{ .funct3 = 0b101 } } },
+            .srai   => .{ .opcode = .OP_IMM, .data = .{ .fo = .{ .funct3 = 0b101, .offset = 1 << 10 } } },
+
+
+            // OP_FP
+
+            .fadds  => .{ .opcode = .OP_FP, .data = .{ .fmt = .{ .funct5 = 0b00000, .fmt = .S, .rm = 0b111 } } },
+            .faddd  => .{ .opcode = .OP_FP, .data = .{ .fmt = .{ .funct5 = 0b00000, .fmt = .D, .rm = 0b111 } } },
+
+            .feqs   => .{ .opcode = .OP_FP, .data = .{ .fmt = .{ .funct5 = 0b10100, .fmt = .S, .rm = 0b010 } } },
+            .feqd   => .{ .opcode = .OP_FP, .data = .{ .fmt = .{ .funct5 = 0b10100, .fmt = .D, .rm = 0b010 } } },
+
+            // LOAD
+
+            .ld     => .{ .opcode = .LOAD, .data = .{ .fo = .{ .funct3 = 0b011 } } },
+            .lw     => .{ .opcode = .LOAD, .data = .{ .fo = .{ .funct3 = 0b010 } } },
+            .lwu    => .{ .opcode = .LOAD, .data = .{ .fo = .{ .funct3 = 0b110 } } },
+            .lh     => .{ .opcode = .LOAD, .data = .{ .fo = .{ .funct3 = 0b001 } } },
+            .lhu    => .{ .opcode = .LOAD, .data = .{ .fo = .{ .funct3 = 0b101 } } },
+            .lb     => .{ .opcode = .LOAD, .data = .{ .fo = .{ .funct3 = 0b000 } } },
+            .lbu    => .{ .opcode = .LOAD, .data = .{ .fo = .{ .funct3 = 0b100 } } },
+
+
+            // STORE
+
+            .sd     => .{ .opcode = .STORE, .data = .{ .fo = .{ .funct3 = 0b011 } } },
+            .sw     => .{ .opcode = .STORE, .data = .{ .fo = .{ .funct3 = 0b010 } } },
+            .sh     => .{ .opcode = .STORE, .data = .{ .fo = .{ .funct3 = 0b001 } } },
+            .sb     => .{ .opcode = .STORE, .data = .{ .fo = .{ .funct3 = 0b000 } } },
+
+
+            // LOAD_FP
+
+            .fld    => .{ .opcode = .LOAD_FP, .data = .{ .fo = .{ .funct3 = 0b011 } } },
+            .flw    => .{ .opcode = .LOAD_FP, .data = .{ .fo = .{ .funct3 = 0b010 } } },
+
+            // STORE_FP
+
+            .fsd    => .{ .opcode = .STORE_FP, .data = .{ .fo = .{ .funct3 = 0b011 } } },
+            .fsw    => .{ .opcode = .STORE_FP, .data = .{ .fo = .{ .funct3 = 0b010 } } },
+
+
+            // JALR
+
+            .jalr   => .{ .opcode = .JALR, .data = .{ .fo = .{ .funct3 = 0b000 } } },
+
+
+            // OP_32
+
+            .sllw   => .{ .opcode = .OP_32, .data = .{ .ff = .{ .funct3 = 0b001, .funct7 = 0b0000000 } } },
 
-            .lui    => .{ .opcode = 0b0110111, .funct3 = null,  .funct7 = null      },
-            .auipc  => .{ .opcode = 0b0010111, .funct3 = null,  .funct7 = null      },
 
-            .sd     => .{ .opcode = 0b0100011, .funct3 = 0b011, .funct7 = null      },
-            .sw     => .{ .opcode = 0b0100011, .funct3 = 0b010, .funct7 = null      },
-            .sh     => .{ .opcode = 0b0100011, .funct3 = 0b001, .funct7 = null      },
-            .sb     => .{ .opcode = 0b0100011, .funct3 = 0b000, .funct7 = null      },
+            // LUI
 
-            .jal    => .{ .opcode = 0b1101111, .funct3 = null,  .funct7 = null      },
+            .lui    => .{ .opcode = .LUI, .data = .{ .none = {} } },
 
-            .beq    => .{ .opcode = 0b1100011, .funct3 = 0b000, .funct7 = null      },
 
-            .slt    => .{ .opcode = 0b0110011, .funct3 = 0b010, .funct7 = 0b0000000 },
+            // AUIPC
 
-            .xor    => .{ .opcode = 0b0110011, .funct3 = 0b100, .funct7 = 0b0000000 },
+            .auipc  => .{ .opcode = .AUIPC, .data = .{ .none = {} } },
+
+
+            // JAL
+
+            .jal    => .{ .opcode = .JAL, .data = .{ .none = {} } },
+
+
+            // BRANCH
+
+            .beq    => .{ .opcode = .BRANCH, .data = .{ .fo = .{ .funct3 = 0b000 } } },
+
+
+            // SYSTEM
+
+            .ecall  => .{ .opcode = .SYSTEM, .data = .{ .fo = .{ .funct3 = 0b000 } } },
+            .ebreak => .{ .opcode = .SYSTEM, .data = .{ .fo = .{ .funct3 = 0b000 } } },
+           
+
+            // NONE
+            
+            .unimp  => .{ .opcode = .NONE, .data = .{ .fo = .{ .funct3 = 0b000 } } },
 
-            .mul    => .{ .opcode = 0b0110011, .funct3 = 0b000, .funct7 = 0b0000001 },
 
-            .ecall  => .{ .opcode = 0b1110011, .funct3 = 0b000, .funct7 = null      },
-            .ebreak => .{ .opcode = 0b1110011, .funct3 = 0b000, .funct7 = null      },
-            .unimp  => .{ .opcode = 0b0000000, .funct3 = 0b000, .funct7 = null      },
             // zig fmt: on
         };
     }
@@ -109,6 +242,7 @@ pub const Mnemonic = enum {
 
 pub const InstEnc = enum {
     R,
+    R4,
     I,
     S,
     B,
@@ -121,13 +255,6 @@ pub const InstEnc = enum {
     pub fn fromMnemonic(mnem: Mnemonic) InstEnc {
         return switch (mnem) {
             .addi,
-            .ld,
-            .lw,
-            .lwu,
-            .lh,
-            .lhu,
-            .lb,
-            .lbu,
             .jalr,
             .sltiu,
             .xori,
@@ -135,6 +262,17 @@ pub const InstEnc = enum {
             .slli,
             .srli,
             .srai,
+
+            .ld,
+            .lw,
+            .lwu,
+            .lh,
+            .lhu,
+            .lb,
+            .lbu,
+
+            .flw,
+            .fld,
             => .I,
 
             .lui,
@@ -145,6 +283,9 @@ pub const InstEnc = enum {
             .sw,
             .sh,
             .sb,
+
+            .fsd,
+            .fsw,
             => .S,
 
             .jal,
@@ -162,6 +303,11 @@ pub const InstEnc = enum {
             .sub,
             .@"and",
             .@"or",
+
+            .fadds,
+            .faddd,
+            .feqs,
+            .feqd,
             => .R,
 
             .ecall,
@@ -171,16 +317,17 @@ pub const InstEnc = enum {
         };
     }
 
-    pub fn opsList(enc: InstEnc) [3]std.meta.FieldEnum(Operand) {
+    pub fn opsList(enc: InstEnc) [4]std.meta.FieldEnum(Operand) {
         return switch (enc) {
             // zig fmt: off
-            .R =>      .{ .reg,  .reg,  .reg,  },
-            .I =>      .{ .reg,  .reg,  .imm,  },
-            .S =>      .{ .reg,  .reg,  .imm,  },
-            .B =>      .{ .reg,  .reg,  .imm,  },
-            .U =>      .{ .reg,  .imm,  .none, },
-            .J =>      .{ .reg,  .imm,  .none, },
-            .system => .{ .none, .none, .none, },
+            .R      => .{ .reg,  .reg,  .reg,  .none },
+            .R4     => .{ .reg,  .reg,  .reg,  .reg  },  
+            .I      => .{ .reg,  .reg,  .imm,  .none },
+            .S      => .{ .reg,  .reg,  .imm,  .none },
+            .B      => .{ .reg,  .reg,  .imm,  .none },
+            .U      => .{ .reg,  .imm,  .none, .none },
+            .J      => .{ .reg,  .imm,  .none, .none },
+            .system => .{ .none, .none, .none, .none },
             // zig fmt: on
         };
     }
@@ -195,6 +342,15 @@ pub const Data = union(InstEnc) {
         rs2: u5,
         funct7: u7,
     },
+    R4: packed struct {
+        opcode: u7,
+        rd: u5,
+        funct3: u3,
+        rs1: u5,
+        rs2: u5,
+        funct2: u2,
+        rs3: u5,
+    },
     I: packed struct {
         opcode: u7,
         rd: u5,
@@ -237,19 +393,21 @@ pub const Data = union(InstEnc) {
 
     pub fn toU32(self: Data) u32 {
         return switch (self) {
-            .R => |v| @as(u32, @bitCast(v)),
-            .I => |v| @as(u32, @bitCast(v)),
-            .S => |v| @as(u32, @bitCast(v)),
-            .B => |v| @as(u32, @intCast(v.opcode)) + (@as(u32, @intCast(v.imm11)) << 7) + (@as(u32, @intCast(v.imm1_4)) << 8) + (@as(u32, @intCast(v.funct3)) << 12) + (@as(u32, @intCast(v.rs1)) << 15) + (@as(u32, @intCast(v.rs2)) << 20) + (@as(u32, @intCast(v.imm5_10)) << 25) + (@as(u32, @intCast(v.imm12)) << 31),
-            .U => |v| @as(u32, @bitCast(v)),
-            .J => |v| @as(u32, @bitCast(v)),
+            // zig fmt: off
+            .R  => |v| @bitCast(v),
+            .R4 => |v| @bitCast(v),
+            .I  => |v| @bitCast(v),
+            .S  => |v| @bitCast(v),
+            .B  => |v| @as(u32, @intCast(v.opcode)) + (@as(u32, @intCast(v.imm11)) << 7) + (@as(u32, @intCast(v.imm1_4)) << 8) + (@as(u32, @intCast(v.funct3)) << 12) + (@as(u32, @intCast(v.rs1)) << 15) + (@as(u32, @intCast(v.rs2)) << 20) + (@as(u32, @intCast(v.imm5_10)) << 25) + (@as(u32, @intCast(v.imm12)) << 31),
+            .U  => |v| @bitCast(v),
+            .J  => |v| @bitCast(v),
             .system => unreachable,
+            // zig fmt: on
         };
     }
 
     pub fn construct(mnem: Mnemonic, ops: []const Operand) !Data {
         const inst_enc = InstEnc.fromMnemonic(mnem);
-
         const enc = mnem.encoding();
 
         // special mnemonics
@@ -261,8 +419,8 @@ pub const Data = union(InstEnc) {
                 assert(ops.len == 0);
                 return .{
                     .I = .{
-                        .rd = Register.zero.id(),
-                        .rs1 = Register.zero.id(),
+                        .rd = Register.zero.encodeId(),
+                        .rs1 = Register.zero.encodeId(),
                         .imm0_11 = switch (mnem) {
                             .ecall => 0x000,
                             .ebreak => 0x001,
@@ -270,8 +428,8 @@ pub const Data = union(InstEnc) {
                             else => unreachable,
                         },
 
-                        .opcode = enc.opcode,
-                        .funct3 = enc.funct3.?,
+                        .opcode = @intFromEnum(enc.opcode),
+                        .funct3 = enc.data.fo.funct3,
                     },
                 };
             },
@@ -282,14 +440,26 @@ pub const Data = union(InstEnc) {
             .R => {
                 assert(ops.len == 3);
                 return .{
-                    .R = .{
-                        .rd = ops[0].reg.id(),
-                        .rs1 = ops[1].reg.id(),
-                        .rs2 = ops[2].reg.id(),
-
-                        .opcode = enc.opcode,
-                        .funct3 = enc.funct3.?,
-                        .funct7 = enc.funct7.?,
+                    .R = switch (enc.data) {
+                        .ff => |ff| .{
+                            .rd = ops[0].reg.encodeId(),
+                            .rs1 = ops[1].reg.encodeId(),
+                            .rs2 = ops[2].reg.encodeId(),
+
+                            .opcode = @intFromEnum(enc.opcode),
+                            .funct3 = ff.funct3,
+                            .funct7 = ff.funct7,
+                        },
+                        .fmt => |fmt| .{
+                            .rd = ops[0].reg.encodeId(),
+                            .rs1 = ops[1].reg.encodeId(),
+                            .rs2 = ops[2].reg.encodeId(),
+
+                            .opcode = @intFromEnum(enc.opcode),
+                            .funct3 = fmt.rm,
+                            .funct7 = (@as(u7, fmt.funct5) << 2) | @intFromEnum(fmt.fmt),
+                        },
+                        else => unreachable,
                     },
                 };
             },
@@ -300,12 +470,12 @@ pub const Data = union(InstEnc) {
                 return .{
                     .S = .{
                         .imm0_4 = @truncate(umm),
-                        .rs1 = ops[0].reg.id(),
-                        .rs2 = ops[1].reg.id(),
+                        .rs1 = ops[0].reg.encodeId(),
+                        .rs2 = ops[1].reg.encodeId(),
                         .imm5_11 = @truncate(umm >> 5),
 
-                        .opcode = enc.opcode,
-                        .funct3 = enc.funct3.?,
+                        .opcode = @intFromEnum(enc.opcode),
+                        .funct3 = enc.data.fo.funct3,
                     },
                 };
             },
@@ -313,12 +483,12 @@ pub const Data = union(InstEnc) {
                 assert(ops.len == 3);
                 return .{
                     .I = .{
-                        .rd = ops[0].reg.id(),
-                        .rs1 = ops[1].reg.id(),
-                        .imm0_11 = ops[2].imm.asBits(u12) + enc.offset,
+                        .rd = ops[0].reg.encodeId(),
+                        .rs1 = ops[1].reg.encodeId(),
+                        .imm0_11 = ops[2].imm.asBits(u12) + enc.data.fo.offset,
 
-                        .opcode = enc.opcode,
-                        .funct3 = enc.funct3.?,
+                        .opcode = @intFromEnum(enc.opcode),
+                        .funct3 = enc.data.fo.funct3,
                     },
                 };
             },
@@ -326,10 +496,10 @@ pub const Data = union(InstEnc) {
                 assert(ops.len == 2);
                 return .{
                     .U = .{
-                        .rd = ops[0].reg.id(),
+                        .rd = ops[0].reg.encodeId(),
                         .imm12_31 = ops[1].imm.asBits(u20),
 
-                        .opcode = enc.opcode,
+                        .opcode = @intFromEnum(enc.opcode),
                     },
                 };
             },
@@ -341,13 +511,13 @@ pub const Data = union(InstEnc) {
 
                 return .{
                     .J = .{
-                        .rd = ops[0].reg.id(),
+                        .rd = ops[0].reg.encodeId(),
                         .imm1_10 = @truncate(umm >> 1),
                         .imm11 = @truncate(umm >> 11),
                         .imm12_19 = @truncate(umm >> 12),
                         .imm20 = @truncate(umm >> 20),
 
-                        .opcode = enc.opcode,
+                        .opcode = @intFromEnum(enc.opcode),
                     },
                 };
             },
@@ -359,15 +529,15 @@ pub const Data = union(InstEnc) {
 
                 return .{
                     .B = .{
-                        .rs1 = ops[0].reg.id(),
-                        .rs2 = ops[1].reg.id(),
+                        .rs1 = ops[0].reg.encodeId(),
+                        .rs2 = ops[1].reg.encodeId(),
                         .imm1_4 = @truncate(umm >> 1),
                         .imm5_10 = @truncate(umm >> 5),
                         .imm11 = @truncate(umm >> 11),
                         .imm12 = @truncate(umm >> 12),
 
-                        .opcode = enc.opcode,
-                        .funct3 = enc.funct3.?,
+                        .opcode = @intFromEnum(enc.opcode),
+                        .funct3 = enc.data.fo.funct3,
                     },
                 };
             },
@@ -386,13 +556,6 @@ pub fn findByMnemonic(mnem: Mnemonic, ops: []const Operand) !?Encoding {
     };
 }
 
-const Enc = struct {
-    opcode: u7,
-    funct3: ?u3,
-    funct7: ?u7,
-    offset: u12 = 0,
-};
-
 fn verifyOps(mnem: Mnemonic, ops: []const Operand) bool {
     const inst_enc = InstEnc.fromMnemonic(mnem);
     const list = std.mem.sliceTo(&inst_enc.opsList(), .none);
src/arch/riscv64/Lower.zig
@@ -14,7 +14,7 @@ result_relocs_len: u8 = undefined,
 result_insts: [
     @max(
         1, // non-pseudo instruction
-        abi.callee_preserved_regs.len, // spill / restore regs,
+        abi.Registers.all_preserved.len, // spill / restore regs,
     )
 ]Instruction = undefined,
 result_relocs: [1]Reloc = undefined,
@@ -71,11 +71,24 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
 
                 switch (inst.ops) {
                     .pseudo_load_rm => {
-                        const tag: Encoding.Mnemonic = switch (rm.m.mod.size()) {
-                            .byte => .lb,
-                            .hword => .lh,
-                            .word => .lw,
-                            .dword => .ld,
+                        const dest_reg = rm.r;
+                        const dest_reg_class = dest_reg.class();
+                        const float = dest_reg_class == .float;
+
+                        const src_size = rm.m.mod.size();
+
+                        const tag: Encoding.Mnemonic = if (!float)
+                            switch (src_size) {
+                                .byte => .lb,
+                                .hword => .lh,
+                                .word => .lw,
+                                .dword => .ld,
+                            }
+                        else switch (src_size) {
+                            .byte => unreachable, // Zig does not support 8-bit floats
+                            .hword => return lower.fail("TODO: lowerMir pseudo_load_rm support 16-bit floats", .{}),
+                            .word => .flw,
+                            .dword => .fld,
                         };
 
                         try lower.emit(tag, &.{
@@ -85,11 +98,25 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
                         });
                     },
                     .pseudo_store_rm => {
-                        const tag: Encoding.Mnemonic = switch (rm.m.mod.size()) {
-                            .byte => .sb,
-                            .hword => .sh,
-                            .word => .sw,
-                            .dword => .sd,
+                        const src_reg = rm.r;
+                        const src_reg_class = src_reg.class();
+                        const float = src_reg_class == .float;
+
+                        // TODO: do we actually need this? are all stores not usize?
+                        const dest_size = rm.m.mod.size();
+
+                        const tag: Encoding.Mnemonic = if (!float)
+                            switch (dest_size) {
+                                .byte => .sb,
+                                .hword => .sh,
+                                .word => .sw,
+                                .dword => .sd,
+                            }
+                        else switch (dest_size) {
+                            .byte => unreachable, // Zig does not support 8-bit floats
+                            .hword => return lower.fail("TODO: lowerMir pseudo_load_rm support 16-bit floats", .{}),
+                            .word => .fsw,
+                            .dword => .fsd,
                         };
 
                         try lower.emit(tag, &.{
@@ -336,16 +363,19 @@ fn pushPopRegList(lower: *Lower, comptime spilling: bool, reg_list: Mir.Register
     var reg_i: u31 = 0;
     while (it.next()) |i| {
         const frame = lower.mir.frame_locs.get(@intFromEnum(bits.FrameIndex.spill_frame));
+        const reg = abi.Registers.all_preserved[i];
+        const reg_class = reg.class();
+        const is_float_reg = reg_class == .float;
 
         if (spilling) {
-            try lower.emit(.sd, &.{
+            try lower.emit(if (is_float_reg) .fsd else .sd, &.{
                 .{ .reg = frame.base },
-                .{ .reg = abi.callee_preserved_regs[i] },
+                .{ .reg = abi.Registers.all_preserved[i] },
                 .{ .imm = Immediate.s(frame.disp + reg_i) },
             });
         } else {
-            try lower.emit(.ld, &.{
-                .{ .reg = abi.callee_preserved_regs[i] },
+            try lower.emit(if (is_float_reg) .fld else .ld, &.{
+                .{ .reg = abi.Registers.all_preserved[i] },
                 .{ .reg = frame.base },
                 .{ .imm = Immediate.s(frame.disp + reg_i) },
             });
src/arch/riscv64/Mir.zig
@@ -20,87 +20,68 @@ pub const Inst = struct {
     pub const Index = u32;
 
     pub const Tag = enum(u16) {
-        /// Add immediate. Uses i_type payload.
-        addi,
 
-        /// Add immediate and produce a sign-extended result.
-        ///
-        /// Uses i-type payload.
+        // base extension
+        addi,
         addiw,
 
         jalr,
         lui,
-        mv,
 
         @"and",
+        andi,
+
         xor,
+        @"or",
 
         ebreak,
         ecall,
         unimp,
 
-        /// OR instruction. Uses r_type payload.
-        @"or",
-
-        /// Addition
         add,
-        /// Subtraction
         sub,
-        /// Multiply, uses r_type. Needs the M extension.
-        mul,
-
-        /// Absolute Value, uses i_type payload.
-        abs,
 
         sltu,
         slt,
 
-        /// Immediate Logical Right Shift, uses i_type payload
         srli,
-        /// Immediate Logical Left Shift, uses i_type payload
         slli,
-        /// Immediate Arithmetic Right Shift, uses i_type payload.
         srai,
-        /// Register Logical Left Shift, uses r_type payload
         sllw,
-        /// Register Logical Right Shit, uses r_type payload
         srlw,
 
-        /// Jumps, but stores the address of the instruction following the
-        /// jump in `rd`.
-        ///
-        /// Uses j_type payload.
         jal,
 
-        /// Immediate AND, uses i_type payload
-        andi,
-
-        /// Branch if equal, Uses b_type
         beq,
-        /// Branch if not equal, Uses b_type
         bne,
 
-        /// Generates a NO-OP, uses nop payload
         nop,
 
-        /// Load double (64 bits), uses i_type payload
         ld,
-        /// Load word (32 bits), uses i_type payload
         lw,
-        /// Load half (16 bits), uses i_type payload
         lh,
-        /// Load byte (8 bits), uses i_type payload
         lb,
 
-        /// Store double (64 bits), uses s_type payload
         sd,
-        /// Store word (32 bits), uses s_type payload
         sw,
-        /// Store half (16 bits), uses s_type payload
         sh,
-        /// Store byte (8 bits), uses s_type payload
         sb,
 
+        // M extension
+        mul,
+
+        // F extension (32-bit float)
+        fadds,
+        flw,
+        fsw,
+        feqs,
+
+        // D extension (64-bit float)
+        faddd,
+        fld,
+        fsd,
+        feqd,
+
         /// A pseudo-instruction. Used for anything that isn't 1:1 with an
         /// assembly instruction.
         pseudo,
test/behavior/byteswap.zig
@@ -100,6 +100,7 @@ test "@byteSwap vectors u8" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try comptime vector8();
     try vector8();
test/behavior/fn.zig
@@ -349,7 +349,6 @@ test "function call with anon list literal" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -370,7 +369,6 @@ test "function call with anon list literal - 2D" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
test/behavior/globals.zig
@@ -7,7 +7,6 @@ test "store to global array" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try expect(pos[1] == 0.0);
     pos = [2]f32{ 0.0, 1.0 };
test/behavior/slice.zig
@@ -1001,6 +1001,7 @@ test "sentinel-terminated 0-length slices" {
     if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const u32s: [4]u32 = [_]u32{ 0, 1, 2, 3 };
 
test/behavior/vector.zig
@@ -434,6 +434,7 @@ test "load vector elements via runtime index" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {