Commit 09e9812086
Changed files (5)
src
arch
src/arch/riscv64/CodeGen.zig
@@ -995,7 +995,6 @@ fn addInst(func: *Func, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
.pseudo_dbg_prologue_end,
.pseudo_dbg_line_column,
.pseudo_dbg_epilogue_begin,
- .pseudo_mv,
.pseudo_dead,
=> false,
}) wip_mir_log.debug("{}", .{func.fmtWipMir(result_index)});
@@ -2445,6 +2444,7 @@ fn genBinOp(
.Vector => {
const mir_tag: Mir.Inst.Tag = switch (tag) {
.add => .vaddvv,
+ .sub => .vsubvv,
else => return func.fail("TODO: genBinOp {s} Vector", .{@tagName(tag)}),
};
@@ -2454,7 +2454,6 @@ fn genBinOp(
const elem_size = lhs_ty.childType(zcu).bitSize(pt);
try func.setVl(.zero, num_elem, .{
- .vlmul = .mf2,
.vsew = switch (elem_size) {
8 => .@"8",
16 => .@"16",
@@ -2462,6 +2461,7 @@ fn genBinOp(
64 => .@"64",
else => unreachable,
},
+ .vlmul = .m1,
.vma = true,
.vta = true,
});
@@ -2472,8 +2472,8 @@ fn genBinOp(
.data = .{
.r_type = .{
.rd = dst_reg,
- .rs1 = lhs_reg,
- .rs2 = rhs_reg,
+ .rs1 = rhs_reg,
+ .rs2 = lhs_reg,
},
},
});
@@ -3576,20 +3576,54 @@ fn airArrayElemVal(func: *Func, inst: Air.Inst.Index) !void {
else => try func.genSetReg(Type.usize, addr_reg, array_mcv.address()),
}
+ const dst_mcv = try func.allocRegOrMem(result_ty, inst, false);
+
+ if (array_ty.isVector(zcu)) {
+ // we need to load the vector, vslidedown to get the element we want
+ // and store that element at in a load frame.
+
+ const src_reg, const src_lock = try func.allocReg(.vector);
+ defer func.register_manager.unlockReg(src_lock);
+
+ // load the vector into a temporary register
+ try func.genCopy(array_ty, .{ .register = src_reg }, .{ .indirect = .{ .reg = addr_reg } });
+
+ // we need to construct a 1xbitSize vector because of how lane splitting works in RISC-V
+ const single_ty = try pt.vectorType(.{ .child = elem_ty.toIntern(), .len = 1 });
+
+ // we can do a shortcut here where we don't need a vslicedown
+ // and can just copy to the frame index.
+ if (!(index_mcv == .immediate and index_mcv.immediate == 0)) {
+ const index_reg = try func.copyToTmpRegister(Type.usize, index_mcv);
+
+ _ = try func.addInst(.{
+ .tag = .vslidedownvx,
+ .ops = .rrr,
+ .data = .{ .r_type = .{
+ .rd = src_reg,
+ .rs1 = index_reg,
+ .rs2 = src_reg,
+ } },
+ });
+ }
+
+ try func.genCopy(single_ty, dst_mcv, .{ .register = src_reg });
+ break :result dst_mcv;
+ }
+
const offset_reg = try func.elemOffset(index_ty, index_mcv, elem_abi_size);
const offset_lock = func.register_manager.lockRegAssumeUnused(offset_reg);
defer func.register_manager.unlockReg(offset_lock);
-
- const dst_mcv = try func.allocRegOrMem(result_ty, inst, false);
_ = try func.addInst(.{
.tag = .add,
.ops = .rrr,
.data = .{ .r_type = .{
.rd = addr_reg,
- .rs1 = offset_reg,
- .rs2 = addr_reg,
+ .rs1 = addr_reg,
+ .rs2 = offset_reg,
} },
});
+
try func.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
break :result dst_mcv;
};
@@ -5965,6 +5999,27 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
if (src_reg.id() == reg.id())
return;
+ // there is no instruction for loading the contents of a vector register
+ // into an integer register, however we can cheat a bit by setting the element
+ // size to the total size of the vector, and vmv.x.s will work then
+ if (src_reg.class() == .vector) {
+ try func.setVl(.zero, 0, .{
+ .vsew = switch (ty.totalVectorBits(pt)) {
+ 8 => .@"8",
+ 16 => .@"16",
+ 32 => .@"32",
+ 64 => .@"64",
+ else => |vec_bits| return func.fail("TODO: genSetReg vec -> {s} bits {d}", .{
+ @tagName(reg.class()),
+ vec_bits,
+ }),
+ },
+ .vlmul = .m1,
+ .vta = true,
+ .vma = true,
+ });
+ }
+
// mv reg, src_reg
_ = try func.addInst(.{
.tag = .pseudo,
@@ -5978,57 +6033,28 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
.register_pair => return func.fail("genSetReg should we allow reg -> reg_pair?", .{}),
.load_frame => |frame| {
if (reg.class() == .vector) {
- if (abi_size > 8)
- return func.fail("TODO: genSetReg vectors > 8", .{});
-
- const temp_reg = try func.register_manager.allocReg(null, abi.Registers.Integer.temporary);
- const temp_lock = func.register_manager.lockRegAssumeUnused(temp_reg);
- defer func.register_manager.unlockReg(temp_lock);
-
- try func.setVl(.zero, 1, .{
- .vsew = switch (abi_size) {
- 1 => .@"8",
- 2 => .@"16",
- 4 => .@"32",
- 8 => .@"64",
- else => unreachable,
- },
- .vlmul = .m1,
- .vma = true,
- .vta = true,
- });
-
- try func.genCopy(ty, .{ .register = temp_reg }, .{ .load_frame = frame });
+ const addr_reg, const addr_lock = try func.allocReg(.int);
+ defer func.register_manager.unlockReg(addr_lock);
+ try func.genCopy(ty, .{ .register = addr_reg }, src_mcv.address());
+ try func.genCopy(ty, .{ .register = reg }, .{ .indirect = .{ .reg = addr_reg } });
+ } else {
_ = try func.addInst(.{
.tag = .pseudo,
- .ops = .pseudo_mv,
- .data = .{
- .rr = .{
- .rd = reg,
- .rs = temp_reg,
+ .ops = .pseudo_load_rm,
+ .data = .{ .rm = .{
+ .r = reg,
+ .m = .{
+ .base = .{ .frame = frame.index },
+ .mod = .{
+ .size = func.memSize(ty),
+ .unsigned = ty.isUnsignedInt(zcu),
+ .disp = frame.off,
+ },
},
- },
+ } },
});
-
- return;
}
-
- _ = try func.addInst(.{
- .tag = .pseudo,
- .ops = .pseudo_load_rm,
- .data = .{ .rm = .{
- .r = reg,
- .m = .{
- .base = .{ .frame = frame.index },
- .mod = .{
- .size = func.memSize(ty),
- .unsigned = ty.isUnsignedInt(zcu),
- .disp = frame.off,
- },
- },
- } },
- });
},
.memory => |addr| {
try func.genSetReg(ty, reg, .{ .immediate = addr });
@@ -6072,20 +6098,64 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
});
},
.indirect => |reg_off| {
- const float_class = dst_reg_class == .float;
+ const load_tag: Mir.Inst.Tag = switch (reg.class()) {
+ .float => switch (abi_size) {
+ 1 => unreachable, // Zig does not support 8-bit floats
+ 2 => return func.fail("TODO: genSetReg indirect 16-bit float", .{}),
+ 4 => .flw,
+ 8 => .fld,
+ else => return std.debug.panic("TODO: genSetReg for float size {d}", .{abi_size}),
+ },
+ .int => switch (abi_size) {
+ 1 => .lb,
+ 2 => .lh,
+ 4 => .lw,
+ 8 => .ld,
+ else => return std.debug.panic("TODO: genSetReg for int size {d}", .{abi_size}),
+ },
+ .vector => {
+ assert(reg_off.off == 0);
+
+ // There is no vector instruction for loading with an offset to a base register,
+ // so we need to get an offset register containing the address of the vector first
+ // and load from it.
+ const len: u5 = math.cast(u5, ty.vectorLen(zcu)) orelse {
+ return func.fail("TODO: genSetReg load_frame -> vec reg, vector length doesn't fit into imm avl", .{});
+ };
+ const elem_ty = ty.childType(zcu);
+ const elem_size = elem_ty.abiSize(pt);
- const load_tag: Mir.Inst.Tag = switch (abi_size) {
- 1 => if (float_class)
- unreachable // Zig does not support 8-bit floats
- else
- .lb,
- 2 => if (float_class)
- return func.fail("TODO: genSetReg indirect 16-bit float", .{})
- else
- .lh,
- 4 => if (float_class) .flw else .lw,
- 8 => if (float_class) .fld else .ld,
- else => return std.debug.panic("TODO: genSetReg for size {d}", .{abi_size}),
+ try func.setVl(.zero, len, .{
+ .vsew = switch (elem_size) {
+ 1 => .@"8",
+ 2 => .@"16",
+ 4 => .@"32",
+ 8 => .@"64",
+ else => unreachable,
+ },
+ .vlmul = .m1,
+ .vma = true,
+ .vta = true,
+ });
+
+ _ = try func.addInst(.{
+ .tag = .pseudo,
+ .ops = .pseudo_load_rm,
+ .data = .{ .rm = .{
+ .r = reg,
+ .m = .{
+ .base = .{ .reg = reg_off.reg },
+ .mod = .{
+ .size = func.memSize(elem_ty),
+ .unsigned = false,
+ .disp = 0,
+ },
+ },
+ } },
+ });
+
+ return;
+ },
};
_ = try func.addInst(.{
@@ -6100,7 +6170,6 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
},
.lea_symbol => |sym_off| {
assert(sym_off.off == 0);
-
const atom_index = try func.symbolIndex();
_ = try func.addInst(.{
@@ -6166,12 +6235,12 @@ fn genSetMem(
=> switch (abi_size) {
0 => {},
1, 2, 4, 8 => {
- // no matter what type, it should use an integer register
- const src_reg = try func.copyToTmpRegister(ty, src_mcv);
- const src_lock = func.register_manager.lockRegAssumeUnused(src_reg);
+ const reg = try func.register_manager.allocReg(null, abi.Registers.Integer.temporary);
+ const src_lock = func.register_manager.lockRegAssumeUnused(reg);
defer func.register_manager.unlockReg(src_lock);
- try func.genSetMem(base, disp, ty, .{ .register = src_reg });
+ try func.genSetReg(ty, reg, src_mcv);
+ try func.genSetMem(base, disp, ty, .{ .register = reg });
},
else => try func.genInlineMemcpy(
dst_ptr_mcv,
@@ -6180,6 +6249,46 @@ fn genSetMem(
),
},
.register => |reg| {
+ if (reg.class() == .vector) {
+ const addr_reg = try func.copyToTmpRegister(Type.usize, dst_ptr_mcv);
+
+ const num_elem: u5 = math.cast(u5, ty.vectorLen(pt.zcu)) orelse {
+ return func.fail("TODO: genBinOp use vsetvli for larger avl sizes", .{});
+ };
+ const elem_size = ty.childType(pt.zcu).bitSize(pt);
+
+ try func.setVl(.zero, num_elem, .{
+ .vsew = switch (elem_size) {
+ 8 => .@"8",
+ 16 => .@"16",
+ 32 => .@"32",
+ 64 => .@"64",
+ else => unreachable,
+ },
+ .vlmul = .m1,
+ .vma = true,
+ .vta = true,
+ });
+
+ _ = try func.addInst(.{
+ .tag = .pseudo,
+ .ops = .pseudo_store_rm,
+ .data = .{ .rm = .{
+ .r = reg,
+ .m = .{
+ .base = .{ .reg = addr_reg },
+ .mod = .{
+ .disp = 0,
+ .size = func.memSize(ty.childType(pt.zcu)),
+ .unsigned = false,
+ },
+ },
+ } },
+ });
+
+ return;
+ }
+
const mem_size = switch (base) {
.frame => |base_fi| mem_size: {
assert(disp >= 0);
src/arch/riscv64/Emit.zig
@@ -75,7 +75,7 @@ pub fn emitMir(emit: *Emit) Error!void {
.r_info = (@as(u64, @intCast(symbol.sym_index)) << 32) | lo_r_type,
.r_addend = 0,
});
- } else return emit.fail("TODO: load_symbol_reloc non-ELF", .{});
+ } else unreachable;
},
.call_extern_fn_reloc => |symbol| {
if (emit.bin_file.cast(link.File.Elf)) |elf_file| {
src/arch/riscv64/Encoding.zig
@@ -116,8 +116,12 @@ const Enc = struct {
};
const VecWidth = enum(u3) {
+ // zig fmt: off
+ @"8" = 0b000,
+ @"16" = 0b101,
@"32" = 0b110,
@"64" = 0b111,
+ // zig fmt: on
};
const VecType = enum(u3) {
@@ -266,15 +270,26 @@ pub const Mnemonic = enum {
fsgnjxd,
// V Extension
+ vle8v,
+ vle16v,
vle32v,
vle64v,
+ vse8v,
+ vse16v,
vse32v,
vse64v,
+ vsoxei8v,
+
vaddvv,
- vadcxv,
- vadcvx,
+ vsubvv,
+
+ vadcvv,
+
+ vmvvx,
+
+ vslidedownvx,
// MISC
fence,
@@ -431,19 +446,25 @@ pub const Mnemonic = enum {
// LOAD_FP
.flw => .{ .opcode = .LOAD_FP, .data = .{ .f = .{ .funct3 = 0b010 } } },
- .fld => .{ .opcode = .LOAD_FP, .data = .{ .f = .{ .funct3 = 0b011 } } },
-
- .vle32v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } },
- .vle64v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } },
+ .fld => .{ .opcode = .LOAD_FP, .data = .{ .f = .{ .funct3 = 0b011 } } },
+
+ .vle8v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"8", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
+ .vle16v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"16", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
+ .vle32v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
+ .vle64v => .{ .opcode = .LOAD_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
// STORE_FP
- .fsw => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b010 } } },
- .fsd => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b011 } } },
+ .fsw => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b010 } } },
+ .fsd => .{ .opcode = .STORE_FP, .data = .{ .f = .{ .funct3 = 0b011 } } },
+
+ .vse8v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"8", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
+ .vse16v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"16", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
+ .vse32v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
+ .vse64v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = false, .nf = 0b000 } } },
- .vse32v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"32", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } },
- .vse64v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"64", .umop = .unit, .vm = true, .mop = .unit, .mew = true, .nf = 0b000 } } },
+ .vsoxei8v => .{ .opcode = .STORE_FP, .data = .{ .vecls = .{ .width = .@"8", .umop = .unit, .vm = true, .mop = .ord, .mew = false, .nf = 0b000 } } },
// JALR
@@ -516,11 +537,15 @@ pub const Mnemonic = enum {
.amomaxud => .{ .opcode = .AMO, .data = .{ .amo = .{ .width = .D, .funct5 = 0b11100 } } },
// OP_V
- .vsetivli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } },
- .vsetvli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } },
- .vaddvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000000, .funct3 = .OPIVV } } },
- .vadcxv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010000, .funct3 = .OPMVX } } },
- .vadcvx => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010000, .funct3 = .OPMVV } } },
+ .vsetivli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } },
+ .vsetvli => .{ .opcode = .OP_V, .data = .{ .f = .{ .funct3 = 0b111 } } },
+ .vaddvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000000, .funct3 = .OPIVV } } },
+ .vsubvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000010, .funct3 = .OPIVV } } },
+
+ .vadcvv => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010000, .funct3 = .OPMVV } } },
+ .vmvvx => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010111, .funct3 = .OPIVX } } },
+
+ .vslidedownvx => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b001111, .funct3 = .OPIVX } } },
// zig fmt: on
};
@@ -663,13 +688,23 @@ pub const InstEnc = enum {
.fsgnjxs,
.fsgnjxd,
+ .vle8v,
+ .vle16v,
.vle32v,
.vle64v,
+
+ .vse8v,
+ .vse16v,
.vse32v,
.vse64v,
+
+ .vsoxei8v,
+
.vaddvv,
- .vadcxv,
- .vadcvx,
+ .vsubvv,
+ .vadcvv,
+ .vmvvx,
+ .vslidedownvx,
=> .R,
.ecall,
src/arch/riscv64/Lower.zig
@@ -98,12 +98,8 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct {
.dword => .fld,
},
.vector => switch (src_size) {
- .byte,
- .hword,
- => return lower.fail(
- "TODO: lowerMir pseudo_load_rm support {s} vector",
- .{@tagName(src_size)},
- ),
+ .byte => .vle8v,
+ .hword => .vle32v,
.word => .vle32v,
.dword => .vle64v,
},
@@ -118,10 +114,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct {
});
},
.vector => {
+ assert(frame_loc.disp == 0);
try lower.emit(tag, &.{
.{ .reg = rm.r },
.{ .reg = frame_loc.base },
- .{ .reg = .x0 },
+ .{ .reg = .zero },
});
},
}
@@ -146,12 +143,8 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct {
.dword => .fsd,
},
.vector => switch (dest_size) {
- .byte,
- .hword,
- => return lower.fail(
- "TODO: lowerMir pseudo_load_rm support {s} vector",
- .{@tagName(dest_size)},
- ),
+ .byte => .vse8v,
+ .hword => .vse16v,
.word => .vse32v,
.dword => .vse64v,
},
@@ -166,10 +159,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct {
});
},
.vector => {
+ assert(frame_loc.disp == 0);
try lower.emit(tag, &.{
- .{ .reg = frame_loc.base },
.{ .reg = rm.r },
- .{ .reg = .x0 },
+ .{ .reg = frame_loc.base },
+ .{ .reg = .zero },
});
},
}
@@ -204,20 +198,20 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index, options: struct {
});
},
.vector => {
- try lower.emit(.vadcxv, &.{
+ try lower.emit(.vmvvx, &.{
.{ .reg = rr.rd },
.{ .reg = rr.rs },
- .{ .reg = .zero },
+ .{ .reg = .x0 },
});
},
.float => return lower.fail("TODO: lowerMir pseudo_mv int -> {s}", .{@tagName(dst_class)}),
},
.vector => switch (dst_class) {
.int => {
- try lower.emit(.vadcvx, &.{
+ try lower.emit(.vadcvv, &.{
.{ .reg = rr.rd },
- .{ .reg = rr.rs },
.{ .reg = .zero },
+ .{ .reg = rr.rs },
});
},
.float, .vector => return lower.fail("TODO: lowerMir pseudo_mv vector -> {s}", .{@tagName(dst_class)}),
src/arch/riscv64/Mir.zig
@@ -142,6 +142,8 @@ pub const Inst = struct {
vsetivli,
vsetvl,
vaddvv,
+ vsubvv,
+ vslidedownvx,
// A Extension Instructions
amo,