Commit add2976a9b
Changed files (18)
src
arch
aarch64
arm
riscv64
sparc64
wasm
x86_64
test
cases
compile_errors
src/Air/Liveness/Verify.zig
@@ -1,6 +1,7 @@
//! Verifies that Liveness information is valid.
gpa: std.mem.Allocator,
+zcu: *Zcu,
air: Air,
liveness: Liveness,
live: LiveMap = .{},
@@ -287,10 +288,13 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
try self.verifyInstOperands(inst, .{ extra.lhs, extra.rhs, .none });
},
- .shuffle => {
- const ty_pl = data[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
- try self.verifyInstOperands(inst, .{ extra.a, extra.b, .none });
+ .shuffle_one => {
+ const unwrapped = self.air.unwrapShuffleOne(self.zcu, inst);
+ try self.verifyInstOperands(inst, .{ unwrapped.operand, .none, .none });
+ },
+ .shuffle_two => {
+ const unwrapped = self.air.unwrapShuffleTwo(self.zcu, inst);
+ try self.verifyInstOperands(inst, .{ unwrapped.operand_a, unwrapped.operand_b, .none });
},
.cmp_vector,
.cmp_vector_optimized,
@@ -639,4 +643,5 @@ const log = std.log.scoped(.liveness_verify);
const Air = @import("../../Air.zig");
const Liveness = @import("../Liveness.zig");
const InternPool = @import("../../InternPool.zig");
+const Zcu = @import("../../Zcu.zig");
const Verify = @This();
src/Air/Legalize.zig
@@ -521,7 +521,8 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
}
},
.splat,
- .shuffle,
+ .shuffle_one,
+ .shuffle_two,
=> {},
.select,
=> if (l.features.contains(.scalarize_select)) continue :inst try l.scalarize(inst, .select_pl_op_bin),
src/Air/Liveness.zig
@@ -15,6 +15,7 @@ const Liveness = @This();
const trace = @import("../tracy.zig").trace;
const Air = @import("../Air.zig");
const InternPool = @import("../InternPool.zig");
+const Zcu = @import("../Zcu.zig");
pub const Verify = @import("Liveness/Verify.zig");
@@ -136,12 +137,15 @@ fn LivenessPassData(comptime pass: LivenessPass) type {
};
}
-pub fn analyze(gpa: Allocator, air: Air, intern_pool: *InternPool) Allocator.Error!Liveness {
+pub fn analyze(zcu: *Zcu, air: Air, intern_pool: *InternPool) Allocator.Error!Liveness {
const tracy = trace(@src());
defer tracy.end();
+ const gpa = zcu.gpa;
+
var a: Analysis = .{
.gpa = gpa,
+ .zcu = zcu,
.air = air,
.tomb_bits = try gpa.alloc(
usize,
@@ -220,6 +224,7 @@ const OperandCategory = enum {
pub fn categorizeOperand(
l: Liveness,
air: Air,
+ zcu: *Zcu,
inst: Air.Inst.Index,
operand: Air.Inst.Index,
ip: *const InternPool,
@@ -511,10 +516,15 @@ pub fn categorizeOperand(
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
return .none;
},
- .shuffle => {
- const extra = air.extraData(Air.Shuffle, air_datas[@intFromEnum(inst)].ty_pl.payload).data;
- if (extra.a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
- if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
+ .shuffle_one => {
+ const unwrapped = air.unwrapShuffleOne(zcu, inst);
+ if (unwrapped.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+ return .none;
+ },
+ .shuffle_two => {
+ const unwrapped = air.unwrapShuffleTwo(zcu, inst);
+ if (unwrapped.operand_a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+ if (unwrapped.operand_b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
return .none;
},
.reduce, .reduce_optimized => {
@@ -639,7 +649,7 @@ pub fn categorizeOperand(
var operand_live: bool = true;
for (&[_]Air.Inst.Index{ then_body[0], else_body[0] }) |cond_inst| {
- if (l.categorizeOperand(air, cond_inst, operand, ip) == .tomb)
+ if (l.categorizeOperand(air, zcu, cond_inst, operand, ip) == .tomb)
operand_live = false;
switch (air_tags[@intFromEnum(cond_inst)]) {
@@ -824,6 +834,7 @@ pub const BigTomb = struct {
/// In-progress data; on successful analysis converted into `Liveness`.
const Analysis = struct {
gpa: Allocator,
+ zcu: *Zcu,
air: Air,
intern_pool: *InternPool,
tomb_bits: []usize,
@@ -1119,9 +1130,13 @@ fn analyzeInst(
const extra = a.air.extraData(Air.Bin, pl_op.payload).data;
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, extra.lhs, extra.rhs });
},
- .shuffle => {
- const extra = a.air.extraData(Air.Shuffle, inst_datas[@intFromEnum(inst)].ty_pl.payload).data;
- return analyzeOperands(a, pass, data, inst, .{ extra.a, extra.b, .none });
+ .shuffle_one => {
+ const unwrapped = a.air.unwrapShuffleOne(a.zcu, inst);
+ return analyzeOperands(a, pass, data, inst, .{ unwrapped.operand, .none, .none });
+ },
+ .shuffle_two => {
+ const unwrapped = a.air.unwrapShuffleTwo(a.zcu, inst);
+ return analyzeOperands(a, pass, data, inst, .{ unwrapped.operand_a, unwrapped.operand_b, .none });
},
.reduce, .reduce_optimized => {
const reduce = inst_datas[@intFromEnum(inst)].reduce;
src/Air/types_resolved.zig
@@ -249,12 +249,22 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
if (!checkRef(extra.struct_operand, zcu)) return false;
},
- .shuffle => {
- const extra = air.extraData(Air.Shuffle, data.ty_pl.payload).data;
- if (!checkType(data.ty_pl.ty.toType(), zcu)) return false;
- if (!checkRef(extra.a, zcu)) return false;
- if (!checkRef(extra.b, zcu)) return false;
- if (!checkVal(Value.fromInterned(extra.mask), zcu)) return false;
+ .shuffle_one => {
+ const unwrapped = air.unwrapShuffleOne(zcu, inst);
+ if (!checkType(unwrapped.result_ty, zcu)) return false;
+ if (!checkRef(unwrapped.operand, zcu)) return false;
+ for (unwrapped.mask) |m| switch (m.unwrap()) {
+ .elem => {},
+ .value => |val| if (!checkVal(.fromInterned(val), zcu)) return false,
+ };
+ },
+
+ .shuffle_two => {
+ const unwrapped = air.unwrapShuffleTwo(zcu, inst);
+ if (!checkType(unwrapped.result_ty, zcu)) return false;
+ if (!checkRef(unwrapped.operand_a, zcu)) return false;
+ if (!checkRef(unwrapped.operand_b, zcu)) return false;
+ // No values to check because there are no comptime-known values other than undef
},
.cmpxchg_weak,
src/arch/aarch64/CodeGen.zig
@@ -778,7 +778,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => try self.airSelect(inst),
- .shuffle => try self.airShuffle(inst),
+ .shuffle_one => try self.airShuffleOne(inst),
+ .shuffle_two => try self.airShuffleTwo(inst),
.reduce => try self.airReduce(inst),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),
@@ -6049,11 +6050,14 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) InnerError!void {
return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
}
-fn airShuffle(self: *Self, inst: Air.Inst.Index) InnerError!void {
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
- const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airShuffle for {}", .{self.target.cpu.arch});
- return self.finishAir(inst, result, .{ extra.a, extra.b, .none });
+fn airShuffleOne(self: *Self, inst: Air.Inst.Index) InnerError!void {
+ _ = inst;
+ return self.fail("TODO implement airShuffleOne for {}", .{self.target.cpu.arch});
+}
+
+fn airShuffleTwo(self: *Self, inst: Air.Inst.Index) InnerError!void {
+ _ = inst;
+ return self.fail("TODO implement airShuffleTwo for {}", .{self.target.cpu.arch});
}
fn airReduce(self: *Self, inst: Air.Inst.Index) InnerError!void {
src/arch/arm/CodeGen.zig
@@ -767,7 +767,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => try self.airSelect(inst),
- .shuffle => try self.airShuffle(inst),
+ .shuffle_one => try self.airShuffleOne(inst),
+ .shuffle_two => try self.airShuffleTwo(inst),
.reduce => try self.airReduce(inst),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),
@@ -6021,10 +6022,14 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
}
-fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airShuffle for arm", .{});
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+fn airShuffleOne(self: *Self, inst: Air.Inst.Index) !void {
+ _ = inst;
+ return self.fail("TODO implement airShuffleOne for arm", .{});
+}
+
+fn airShuffleTwo(self: *Self, inst: Air.Inst.Index) !void {
+ _ = inst;
+ return self.fail("TODO implement airShuffleTwo for arm", .{});
}
fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
src/arch/riscv64/CodeGen.zig
@@ -1586,7 +1586,8 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
.error_name => try func.airErrorName(inst),
.splat => try func.airSplat(inst),
.select => try func.airSelect(inst),
- .shuffle => try func.airShuffle(inst),
+ .shuffle_one => try func.airShuffleOne(inst),
+ .shuffle_two => try func.airShuffleTwo(inst),
.reduce => try func.airReduce(inst),
.aggregate_init => try func.airAggregateInit(inst),
.union_init => try func.airUnionInit(inst),
@@ -8030,10 +8031,14 @@ fn airSelect(func: *Func, inst: Air.Inst.Index) !void {
return func.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
}
-fn airShuffle(func: *Func, inst: Air.Inst.Index) !void {
- const ty_op = func.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result: MCValue = if (func.liveness.isUnused(inst)) .unreach else return func.fail("TODO implement airShuffle for riscv64", .{});
- return func.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+fn airShuffleOne(func: *Func, inst: Air.Inst.Index) !void {
+ _ = inst;
+ return func.fail("TODO implement airShuffleOne for riscv64", .{});
+}
+
+fn airShuffleTwo(func: *Func, inst: Air.Inst.Index) !void {
+ _ = inst;
+ return func.fail("TODO implement airShuffleTwo for riscv64", .{});
}
fn airReduce(func: *Func, inst: Air.Inst.Index) !void {
src/arch/sparc64/CodeGen.zig
@@ -621,7 +621,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => @panic("TODO try self.airSelect(inst)"),
- .shuffle => @panic("TODO try self.airShuffle(inst)"),
+ .shuffle_one => @panic("TODO try self.airShuffleOne(inst)"),
+ .shuffle_two => @panic("TODO try self.airShuffleTwo(inst)"),
.reduce => @panic("TODO try self.airReduce(inst)"),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),
src/arch/wasm/CodeGen.zig
@@ -2004,7 +2004,8 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
.ret_load => cg.airRetLoad(inst),
.splat => cg.airSplat(inst),
.select => cg.airSelect(inst),
- .shuffle => cg.airShuffle(inst),
+ .shuffle_one => cg.airShuffleOne(inst),
+ .shuffle_two => cg.airShuffleTwo(inst),
.reduce => cg.airReduce(inst),
.aggregate_init => cg.airAggregateInit(inst),
.union_init => cg.airUnionInit(inst),
@@ -5177,66 +5178,100 @@ fn airSelect(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
return cg.fail("TODO: Implement wasm airSelect", .{});
}
-fn airShuffle(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+fn airShuffleOne(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
const pt = cg.pt;
const zcu = pt.zcu;
- const inst_ty = cg.typeOfIndex(inst);
- const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = cg.air.extraData(Air.Shuffle, ty_pl.payload).data;
-
- const a = try cg.resolveInst(extra.a);
- const b = try cg.resolveInst(extra.b);
- const mask = Value.fromInterned(extra.mask);
- const mask_len = extra.mask_len;
- const child_ty = inst_ty.childType(zcu);
- const elem_size = child_ty.abiSize(zcu);
+ const unwrapped = cg.air.unwrapShuffleOne(zcu, inst);
+ const result_ty = unwrapped.result_ty;
+ const mask = unwrapped.mask;
+ const operand = try cg.resolveInst(unwrapped.operand);
- // TODO: One of them could be by ref; handle in loop
- if (isByRef(cg.typeOf(extra.a), zcu, cg.target) or isByRef(inst_ty, zcu, cg.target)) {
- const result = try cg.allocStack(inst_ty);
+ const elem_ty = result_ty.childType(zcu);
+ const elem_size = elem_ty.abiSize(zcu);
- for (0..mask_len) |index| {
- const value = (try mask.elemValue(pt, index)).toSignedInt(zcu);
+ // TODO: this function could have an `i8x16_shuffle` fast path like `airShuffleTwo` if we were
+ // to lower the comptime-known operands to a non-by-ref vector value.
- try cg.emitWValue(result);
+ // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
+ // I tried to fix it, but I couldn't make much sense of how this backend handles memory.
- const loaded = if (value >= 0)
- try cg.load(a, child_ty, @as(u32, @intCast(@as(i64, @intCast(elem_size)) * value)))
- else
- try cg.load(b, child_ty, @as(u32, @intCast(@as(i64, @intCast(elem_size)) * ~value)));
+ const dest_alloc = try cg.allocStack(result_ty);
+ for (mask, 0..) |mask_elem, out_idx| {
+ try cg.emitWValue(dest_alloc);
+ const elem_val = switch (mask_elem.unwrap()) {
+ .elem => |idx| try cg.load(operand, elem_ty, @intCast(elem_size * idx)),
+ .value => |val| try cg.lowerConstant(.fromInterned(val), elem_ty),
+ };
+ try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
+ }
+ return cg.finishAir(inst, dest_alloc, &.{unwrapped.operand});
+}
- try cg.store(.stack, loaded, child_ty, result.stack_offset.value + @as(u32, @intCast(elem_size)) * @as(u32, @intCast(index)));
- }
+fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+ const pt = cg.pt;
+ const zcu = pt.zcu;
- return cg.finishAir(inst, result, &.{ extra.a, extra.b });
- } else {
- var operands = [_]u32{
- @intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle),
- } ++ [1]u32{undefined} ** 4;
+ const unwrapped = cg.air.unwrapShuffleTwo(zcu, inst);
+ const result_ty = unwrapped.result_ty;
+ const mask = unwrapped.mask;
+ const operand_a = try cg.resolveInst(unwrapped.operand_a);
+ const operand_b = try cg.resolveInst(unwrapped.operand_b);
- var lanes = mem.asBytes(operands[1..]);
- for (0..@as(usize, @intCast(mask_len))) |index| {
- const mask_elem = (try mask.elemValue(pt, index)).toSignedInt(zcu);
- const base_index = if (mask_elem >= 0)
- @as(u8, @intCast(@as(i64, @intCast(elem_size)) * mask_elem))
- else
- 16 + @as(u8, @intCast(@as(i64, @intCast(elem_size)) * ~mask_elem));
+ const a_ty = cg.typeOf(unwrapped.operand_a);
+ const b_ty = cg.typeOf(unwrapped.operand_b);
+ const elem_ty = result_ty.childType(zcu);
+ const elem_size = elem_ty.abiSize(zcu);
- for (0..@as(usize, @intCast(elem_size))) |byte_offset| {
- lanes[index * @as(usize, @intCast(elem_size)) + byte_offset] = base_index + @as(u8, @intCast(byte_offset));
+ // WASM has `i8x16_shuffle`, which we can apply if the element type bit size is a multiple of 8
+ // and the input and output vectors have a bit size of 128 (and are hence not by-ref). Otherwise,
+ // we fall back to a naive loop lowering.
+ if (!isByRef(a_ty, zcu, cg.target) and
+ !isByRef(b_ty, zcu, cg.target) and
+ !isByRef(result_ty, zcu, cg.target) and
+ elem_ty.bitSize(zcu) % 8 == 0)
+ {
+ var lane_map: [16]u8 align(4) = undefined;
+ const lanes_per_elem = elem_ty.bitSize(zcu) / 8;
+ for (mask, 0..) |mask_elem, out_idx| {
+ const out_first_lane = out_idx * lanes_per_elem;
+ const in_first_lane = switch (mask_elem.unwrap()) {
+ .a_elem => |i| i * lanes_per_elem,
+ .b_elem => |i| i * lanes_per_elem + 16,
+ .undef => 0, // doesn't matter
+ };
+ for (lane_map[out_first_lane..][0..lanes_per_elem], in_first_lane..) |*out, in| {
+ out.* = @intCast(in);
}
}
-
- try cg.emitWValue(a);
- try cg.emitWValue(b);
-
+ try cg.emitWValue(operand_a);
+ try cg.emitWValue(operand_b);
const extra_index = cg.extraLen();
- try cg.mir_extra.appendSlice(cg.gpa, &operands);
+ try cg.mir_extra.appendSlice(cg.gpa, &.{
+ @intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle),
+ @bitCast(lane_map[0..4].*),
+ @bitCast(lane_map[4..8].*),
+ @bitCast(lane_map[8..12].*),
+ @bitCast(lane_map[12..].*),
+ });
try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+ return cg.finishAir(inst, .stack, &.{ unwrapped.operand_a, unwrapped.operand_b });
+ }
+
+ // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
+ // I tried to fix it, but I couldn't make much sense of how this backend handles memory.
- return cg.finishAir(inst, .stack, &.{ extra.a, extra.b });
+ const dest_alloc = try cg.allocStack(result_ty);
+ for (mask, 0..) |mask_elem, out_idx| {
+ try cg.emitWValue(dest_alloc);
+ const elem_val = switch (mask_elem.unwrap()) {
+ .a_elem => |idx| try cg.load(operand_a, elem_ty, @intCast(elem_size * idx)),
+ .b_elem => |idx| try cg.load(operand_b, elem_ty, @intCast(elem_size * idx)),
+ .undef => try cg.emitUndefined(elem_ty),
+ };
+ try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
}
+ return cg.finishAir(inst, dest_alloc, &.{ unwrapped.operand_a, unwrapped.operand_b });
}
fn airReduce(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
src/arch/x86_64/CodeGen.zig
@@ -2490,7 +2490,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
switch (air_tags[@intFromEnum(inst)]) {
// zig fmt: off
.select => try cg.airSelect(inst),
- .shuffle => try cg.airShuffle(inst),
+ .shuffle_one, .shuffle_two => @panic("x86_64 TODO: shuffle_one/shuffle_two"),
// zig fmt: on
.arg => if (cg.debug_output != .none) {
src/codegen/c.zig
@@ -3374,7 +3374,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
.error_name => try airErrorName(f, inst),
.splat => try airSplat(f, inst),
.select => try airSelect(f, inst),
- .shuffle => try airShuffle(f, inst),
+ .shuffle_one => try airShuffleOne(f, inst),
+ .shuffle_two => try airShuffleTwo(f, inst),
.reduce => try airReduce(f, inst),
.aggregate_init => try airAggregateInit(f, inst),
.union_init => try airUnionInit(f, inst),
@@ -7163,34 +7164,73 @@ fn airSelect(f: *Function, inst: Air.Inst.Index) !CValue {
return local;
}
-fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airShuffleOne(f: *Function, inst: Air.Inst.Index) !CValue {
const pt = f.object.dg.pt;
const zcu = pt.zcu;
- const ty_pl = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = f.air.extraData(Air.Shuffle, ty_pl.payload).data;
-
- const mask = Value.fromInterned(extra.mask);
- const lhs = try f.resolveInst(extra.a);
- const rhs = try f.resolveInst(extra.b);
- const inst_ty = f.typeOfIndex(inst);
+ const unwrapped = f.air.unwrapShuffleOne(zcu, inst);
+ const mask = unwrapped.mask;
+ const operand = try f.resolveInst(unwrapped.operand);
+ const inst_ty = unwrapped.result_ty;
const writer = f.object.writer();
const local = try f.allocLocal(inst, inst_ty);
- try reap(f, inst, &.{ extra.a, extra.b }); // local cannot alias operands
- for (0..extra.mask_len) |index| {
+ try reap(f, inst, &.{unwrapped.operand}); // local cannot alias operand
+ for (mask, 0..) |mask_elem, out_idx| {
try f.writeCValue(writer, local, .Other);
try writer.writeByte('[');
- try f.object.dg.renderValue(writer, try pt.intValue(.usize, index), .Other);
+ try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other);
try writer.writeAll("] = ");
+ switch (mask_elem.unwrap()) {
+ .elem => |src_idx| {
+ try f.writeCValue(writer, operand, .Other);
+ try writer.writeByte('[');
+ try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
+ try writer.writeByte(']');
+ },
+ .value => |val| try f.object.dg.renderValue(writer, .fromInterned(val), .Other),
+ }
+ try writer.writeAll(";\n");
+ }
- const mask_elem = (try mask.elemValue(pt, index)).toSignedInt(zcu);
- const src_val = try pt.intValue(.usize, @as(u64, @intCast(mask_elem ^ mask_elem >> 63)));
+ return local;
+}
- try f.writeCValue(writer, if (mask_elem >= 0) lhs else rhs, .Other);
+fn airShuffleTwo(f: *Function, inst: Air.Inst.Index) !CValue {
+ const pt = f.object.dg.pt;
+ const zcu = pt.zcu;
+
+ const unwrapped = f.air.unwrapShuffleTwo(zcu, inst);
+ const mask = unwrapped.mask;
+ const operand_a = try f.resolveInst(unwrapped.operand_a);
+ const operand_b = try f.resolveInst(unwrapped.operand_b);
+ const inst_ty = unwrapped.result_ty;
+ const elem_ty = inst_ty.childType(zcu);
+
+ const writer = f.object.writer();
+ const local = try f.allocLocal(inst, inst_ty);
+ try reap(f, inst, &.{ unwrapped.operand_a, unwrapped.operand_b }); // local cannot alias operands
+ for (mask, 0..) |mask_elem, out_idx| {
+ try f.writeCValue(writer, local, .Other);
try writer.writeByte('[');
- try f.object.dg.renderValue(writer, src_val, .Other);
- try writer.writeAll("];\n");
+ try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other);
+ try writer.writeAll("] = ");
+ switch (mask_elem.unwrap()) {
+ .a_elem => |src_idx| {
+ try f.writeCValue(writer, operand_a, .Other);
+ try writer.writeByte('[');
+ try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
+ try writer.writeByte(']');
+ },
+ .b_elem => |src_idx| {
+ try f.writeCValue(writer, operand_b, .Other);
+ try writer.writeByte('[');
+ try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
+ try writer.writeByte(']');
+ },
+ .undef => try f.object.dg.renderUndefValue(writer, elem_ty, .Other),
+ }
+ try writer.writeAll(";\n");
}
return local;
src/codegen/llvm.zig
@@ -4969,7 +4969,8 @@ pub const FuncGen = struct {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => try self.airSelect(inst),
- .shuffle => try self.airShuffle(inst),
+ .shuffle_one => try self.airShuffleOne(inst),
+ .shuffle_two => try self.airShuffleTwo(inst),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),
.prefetch => try self.airPrefetch(inst),
@@ -9666,7 +9667,7 @@ pub const FuncGen = struct {
const zcu = o.pt.zcu;
const ip = &zcu.intern_pool;
for (body_tail[1..]) |body_inst| {
- switch (fg.liveness.categorizeOperand(fg.air, body_inst, body_tail[0], ip)) {
+ switch (fg.liveness.categorizeOperand(fg.air, zcu, body_inst, body_tail[0], ip)) {
.none => continue,
.write, .noret, .complex => return false,
.tomb => return true,
@@ -10421,42 +10422,192 @@ pub const FuncGen = struct {
return self.wip.select(.normal, pred, a, b, "");
}
- fn airShuffle(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
- const o = self.ng.object;
+ fn airShuffleOne(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+ const o = fg.ng.object;
const pt = o.pt;
const zcu = pt.zcu;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
- const a = try self.resolveInst(extra.a);
- const b = try self.resolveInst(extra.b);
- const mask = Value.fromInterned(extra.mask);
- const mask_len = extra.mask_len;
- const a_len = self.typeOf(extra.a).vectorLen(zcu);
-
- // LLVM uses integers larger than the length of the first array to
- // index into the second array. This was deemed unnecessarily fragile
- // when changing code, so Zig uses negative numbers to index the
- // second vector. These start at -1 and go down, and are easiest to use
- // with the ~ operator. Here we convert between the two formats.
- const values = try self.gpa.alloc(Builder.Constant, mask_len);
- defer self.gpa.free(values);
-
- for (values, 0..) |*val, i| {
- const elem = try mask.elemValue(pt, i);
- if (elem.isUndef(zcu)) {
- val.* = try o.builder.undefConst(.i32);
- } else {
- const int = elem.toSignedInt(zcu);
- const unsigned: u32 = @intCast(if (int >= 0) int else ~int + a_len);
- val.* = try o.builder.intConst(.i32, unsigned);
+ const gpa = zcu.gpa;
+
+ const unwrapped = fg.air.unwrapShuffleOne(zcu, inst);
+
+ const operand = try fg.resolveInst(unwrapped.operand);
+ const mask = unwrapped.mask;
+ const operand_ty = fg.typeOf(unwrapped.operand);
+ const llvm_operand_ty = try o.lowerType(operand_ty);
+ const llvm_result_ty = try o.lowerType(unwrapped.result_ty);
+ const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu));
+ const llvm_poison_elem = try o.builder.poisonConst(llvm_elem_ty);
+ const llvm_poison_mask_elem = try o.builder.poisonConst(.i32);
+ const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32);
+
+ // LLVM requires that the two input vectors have the same length, so lowering isn't trivial.
+ // And, in the words of jacobly0: "llvm sucks at shuffles so we do have to hold its hand at
+ // least a bit". So, there are two cases here.
+ //
+ // If the operand length equals the mask length, we do just the one `shufflevector`, where
+ // the second operand is a constant vector with comptime-known elements at the right indices
+ // and poison values elsewhere (in the indices which won't be selected).
+ //
+ // Otherwise, we lower to *two* `shufflevector` instructions. The first shuffles the runtime
+ // operand with an all-poison vector to extract and correctly position all of the runtime
+ // elements. We also make a constant vector with all of the comptime elements correctly
+ // positioned. Then, our second instruction selects elements from those "runtime-or-poison"
+ // and "comptime-or-poison" vectors to compute the result.
+
+ // This buffer is used primarily for the mask constants.
+ const llvm_elem_buf = try gpa.alloc(Builder.Constant, mask.len);
+ defer gpa.free(llvm_elem_buf);
+
+ // ...but first, we'll collect all of the comptime-known values.
+ var any_defined_comptime_value = false;
+ for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| {
+ llvm_elem.* = switch (mask_elem.unwrap()) {
+ .elem => llvm_poison_elem,
+ .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) elem: {
+ any_defined_comptime_value = true;
+ break :elem try o.lowerValue(val);
+ } else llvm_poison_elem,
+ };
+ }
+ // This vector is like the result, but runtime elements are replaced with poison.
+ const comptime_and_poison: Builder.Value = if (any_defined_comptime_value) vec: {
+ break :vec try o.builder.vectorValue(llvm_result_ty, llvm_elem_buf);
+ } else try o.builder.poisonValue(llvm_result_ty);
+
+ if (operand_ty.vectorLen(zcu) == mask.len) {
+ // input length equals mask/output length, so we lower to one instruction
+ for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| {
+ llvm_elem.* = switch (mask_elem.unwrap()) {
+ .elem => |idx| try o.builder.intConst(.i32, idx),
+ .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: {
+ break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx);
+ } else llvm_poison_mask_elem,
+ };
}
+ return fg.wip.shuffleVector(
+ operand,
+ comptime_and_poison,
+ try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
+ "",
+ );
+ }
+
+ for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| {
+ llvm_elem.* = switch (mask_elem.unwrap()) {
+ .elem => |idx| try o.builder.intConst(.i32, idx),
+ .value => llvm_poison_mask_elem,
+ };
+ }
+ // This vector is like our result, but all comptime-known elements are poison.
+ const runtime_and_poison = try fg.wip.shuffleVector(
+ operand,
+ try o.builder.poisonValue(llvm_operand_ty),
+ try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
+ "",
+ );
+
+ if (!any_defined_comptime_value) {
+ // `comptime_and_poison` is just poison; a second shuffle would be a nop.
+ return runtime_and_poison;
+ }
+
+ // In this second shuffle, the inputs, the mask, and the output all have the same length.
+ for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| {
+ llvm_elem.* = switch (mask_elem.unwrap()) {
+ .elem => try o.builder.intConst(.i32, elem_idx),
+ .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: {
+ break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx);
+ } else llvm_poison_mask_elem,
+ };
}
+ // Merge the runtime and comptime elements with the mask we just built.
+ return fg.wip.shuffleVector(
+ runtime_and_poison,
+ comptime_and_poison,
+ try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
+ "",
+ );
+ }
+
+ fn airShuffleTwo(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+ const o = fg.ng.object;
+ const pt = o.pt;
+ const zcu = pt.zcu;
+ const gpa = zcu.gpa;
+
+ const unwrapped = fg.air.unwrapShuffleTwo(zcu, inst);
+
+ const mask = unwrapped.mask;
+ const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu));
+ const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32);
+ const llvm_poison_mask_elem = try o.builder.poisonConst(.i32);
+
+ // This is kind of simpler than in `airShuffleOne`. We extend the shorter vector to the
+ // length of the longer one with an initial `shufflevector` if necessary, and then do the
+ // actual computation with a second `shufflevector`.
+
+ const operand_a_len = fg.typeOf(unwrapped.operand_a).vectorLen(zcu);
+ const operand_b_len = fg.typeOf(unwrapped.operand_b).vectorLen(zcu);
+ const operand_len: u32 = @max(operand_a_len, operand_b_len);
+
+ // If we need to extend an operand, this is the type that mask will have.
+ const llvm_operand_mask_ty = try o.builder.vectorType(.normal, operand_len, .i32);
+
+ const llvm_elem_buf = try gpa.alloc(Builder.Constant, @max(mask.len, operand_len));
+ defer gpa.free(llvm_elem_buf);
- const llvm_mask_value = try o.builder.vectorValue(
- try o.builder.vectorType(.normal, mask_len, .i32),
- values,
+ const operand_a: Builder.Value = extend: {
+ const raw = try fg.resolveInst(unwrapped.operand_a);
+ if (operand_a_len == operand_len) break :extend raw;
+ // Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>`
+ const mask_elems = llvm_elem_buf[0..operand_len];
+ for (mask_elems[0..operand_a_len], 0..) |*llvm_elem, elem_idx| {
+ llvm_elem.* = try o.builder.intConst(.i32, elem_idx);
+ }
+ @memset(mask_elems[operand_a_len..], llvm_poison_mask_elem);
+ const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_a_len, llvm_elem_ty);
+ break :extend try fg.wip.shuffleVector(
+ raw,
+ try o.builder.poisonValue(llvm_this_operand_ty),
+ try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems),
+ "",
+ );
+ };
+ const operand_b: Builder.Value = extend: {
+ const raw = try fg.resolveInst(unwrapped.operand_b);
+ if (operand_b_len == operand_len) break :extend raw;
+ // Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>`
+ const mask_elems = llvm_elem_buf[0..operand_len];
+ for (mask_elems[0..operand_b_len], 0..) |*llvm_elem, elem_idx| {
+ llvm_elem.* = try o.builder.intConst(.i32, elem_idx);
+ }
+ @memset(mask_elems[operand_b_len..], llvm_poison_mask_elem);
+ const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_b_len, llvm_elem_ty);
+ break :extend try fg.wip.shuffleVector(
+ raw,
+ try o.builder.poisonValue(llvm_this_operand_ty),
+ try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems),
+ "",
+ );
+ };
+
+ // `operand_a` and `operand_b` now have the same length (we've extended the shorter one with
+ // an initial shuffle if necessary). Now for the easy bit.
+
+ const mask_elems = llvm_elem_buf[0..mask.len];
+ for (mask, mask_elems) |mask_elem, *llvm_mask_elem| {
+ llvm_mask_elem.* = switch (mask_elem.unwrap()) {
+ .a_elem => |idx| try o.builder.intConst(.i32, idx),
+ .b_elem => |idx| try o.builder.intConst(.i32, operand_len + idx),
+ .undef => llvm_poison_mask_elem,
+ };
+ }
+ return fg.wip.shuffleVector(
+ operand_a,
+ operand_b,
+ try o.builder.vectorValue(llvm_mask_ty, mask_elems),
+ "",
);
- return self.wip.shuffleVector(a, b, llvm_mask_value, "");
}
/// Reduce a vector by repeatedly applying `llvm_fn` to produce an accumulated result.
src/codegen/spirv.zig
@@ -3252,7 +3252,8 @@ const NavGen = struct {
.splat => try self.airSplat(inst),
.reduce, .reduce_optimized => try self.airReduce(inst),
- .shuffle => try self.airShuffle(inst),
+ .shuffle_one => try self.airShuffleOne(inst),
+ .shuffle_two => try self.airShuffleTwo(inst),
.ptr_add => try self.airPtrAdd(inst),
.ptr_sub => try self.airPtrSub(inst),
@@ -4047,40 +4048,57 @@ const NavGen = struct {
return result_id;
}
- fn airShuffle(self: *NavGen, inst: Air.Inst.Index) !?IdRef {
- const pt = self.pt;
+ fn airShuffleOne(ng: *NavGen, inst: Air.Inst.Index) !?IdRef {
+ const pt = ng.pt;
const zcu = pt.zcu;
- const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
- const a = try self.resolve(extra.a);
- const b = try self.resolve(extra.b);
- const mask = Value.fromInterned(extra.mask);
+ const gpa = zcu.gpa;
- // Note: number of components in the result, a, and b may differ.
- const result_ty = self.typeOfIndex(inst);
- const scalar_ty = result_ty.scalarType(zcu);
- const scalar_ty_id = try self.resolveType(scalar_ty, .direct);
+ const unwrapped = ng.air.unwrapShuffleOne(zcu, inst);
+ const mask = unwrapped.mask;
+ const result_ty = unwrapped.result_ty;
+ const elem_ty = result_ty.childType(zcu);
+ const operand = try ng.resolve(unwrapped.operand);
- const constituents = try self.gpa.alloc(IdRef, result_ty.vectorLen(zcu));
- defer self.gpa.free(constituents);
+ const constituents = try gpa.alloc(IdRef, mask.len);
+ defer gpa.free(constituents);
- for (constituents, 0..) |*id, i| {
- const elem = try mask.elemValue(pt, i);
- if (elem.isUndef(zcu)) {
- id.* = try self.spv.constUndef(scalar_ty_id);
- continue;
- }
+ for (constituents, mask) |*id, mask_elem| {
+ id.* = switch (mask_elem.unwrap()) {
+ .elem => |idx| try ng.extractVectorComponent(elem_ty, operand, idx),
+ .value => |val| try ng.constant(elem_ty, .fromInterned(val), .direct),
+ };
+ }
- const index = elem.toSignedInt(zcu);
- if (index >= 0) {
- id.* = try self.extractVectorComponent(scalar_ty, a, @intCast(index));
- } else {
- id.* = try self.extractVectorComponent(scalar_ty, b, @intCast(~index));
- }
+ const result_ty_id = try ng.resolveType(result_ty, .direct);
+ return try ng.constructComposite(result_ty_id, constituents);
+ }
+
+ fn airShuffleTwo(ng: *NavGen, inst: Air.Inst.Index) !?IdRef {
+ const pt = ng.pt;
+ const zcu = pt.zcu;
+ const gpa = zcu.gpa;
+
+ const unwrapped = ng.air.unwrapShuffleTwo(zcu, inst);
+ const mask = unwrapped.mask;
+ const result_ty = unwrapped.result_ty;
+ const elem_ty = result_ty.childType(zcu);
+ const elem_ty_id = try ng.resolveType(elem_ty, .direct);
+ const operand_a = try ng.resolve(unwrapped.operand_a);
+ const operand_b = try ng.resolve(unwrapped.operand_b);
+
+ const constituents = try gpa.alloc(IdRef, mask.len);
+ defer gpa.free(constituents);
+
+ for (constituents, mask) |*id, mask_elem| {
+ id.* = switch (mask_elem.unwrap()) {
+ .a_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_a, idx),
+ .b_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_b, idx),
+ .undef => try ng.spv.constUndef(elem_ty_id),
+ };
}
- const result_ty_id = try self.resolveType(result_ty, .direct);
- return try self.constructComposite(result_ty_id, constituents);
+ const result_ty_id = try ng.resolveType(result_ty, .direct);
+ return try ng.constructComposite(result_ty_id, constituents);
}
fn indicesToIds(self: *NavGen, indices: []const u32) ![]IdRef {
src/Zcu/PerThread.zig
@@ -1745,7 +1745,7 @@ pub fn linkerUpdateFunc(pt: Zcu.PerThread, func_index: InternPool.Index, air: *A
try air.legalize(pt, @import("../codegen.zig").legalizeFeatures(pt, nav_index) orelse break :legalize);
}
- var liveness = try Air.Liveness.analyze(gpa, air.*, ip);
+ var liveness = try Air.Liveness.analyze(zcu, air.*, ip);
defer liveness.deinit(gpa);
if (build_options.enable_debug_extensions and comp.verbose_air) {
@@ -1757,6 +1757,7 @@ pub fn linkerUpdateFunc(pt: Zcu.PerThread, func_index: InternPool.Index, air: *A
if (std.debug.runtime_safety) {
var verify: Air.Liveness.Verify = .{
.gpa = gpa,
+ .zcu = zcu,
.air = air.*,
.liveness = liveness,
.intern_pool = ip,
src/Air.zig
@@ -699,9 +699,21 @@ pub const Inst = struct {
/// equal to the scalar value.
/// Uses the `ty_op` field.
splat,
- /// Constructs a vector by selecting elements from `a` and `b` based on `mask`.
- /// Uses the `ty_pl` field with payload `Shuffle`.
- shuffle,
+ /// Constructs a vector by selecting elements from a single vector based on a mask. Each
+ /// mask element is either an index into the vector, or a comptime-known value, or "undef".
+ /// Uses the `ty_pl` field, where the payload index points to:
+ /// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
+ /// 2. operand: Ref // guaranteed not to be an interned value
+ /// See `unwrapShufleOne`.
+ shuffle_one,
+ /// Constructs a vector by selecting elements from two vectors based on a mask. Each mask
+ /// element is either an index into one of the vectors, or "undef".
+ /// Uses the `ty_pl` field, where the payload index points to:
+ /// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
+ /// 2. operand_a: Ref // guaranteed not to be an interned value
+ /// 3. operand_b: Ref // guaranteed not to be an interned value
+ /// See `unwrapShufleTwo`.
+ shuffle_two,
/// Constructs a vector element-wise from `a` or `b` based on `pred`.
/// Uses the `pl_op` field with `pred` as operand, and payload `Bin`.
select,
@@ -1299,13 +1311,6 @@ pub const FieldParentPtr = struct {
field_index: u32,
};
-pub const Shuffle = struct {
- a: Inst.Ref,
- b: Inst.Ref,
- mask: InternPool.Index,
- mask_len: u32,
-};
-
pub const VectorCmp = struct {
lhs: Inst.Ref,
rhs: Inst.Ref,
@@ -1320,6 +1325,64 @@ pub const VectorCmp = struct {
}
};
+/// Used by `Inst.Tag.shuffle_one`. Represents a mask element which either indexes into a
+/// runtime-known vector, or is a comptime-known value.
+pub const ShuffleOneMask = packed struct(u32) {
+ index: u31,
+ kind: enum(u1) { elem, value },
+ pub fn elem(idx: u32) ShuffleOneMask {
+ return .{ .index = @intCast(idx), .kind = .elem };
+ }
+ pub fn value(val: Value) ShuffleOneMask {
+ return .{ .index = @intCast(@intFromEnum(val.toIntern())), .kind = .value };
+ }
+ pub const Unwrapped = union(enum) {
+ /// The resulting element is this index into the runtime vector.
+ elem: u32,
+ /// The resulting element is this comptime-known value.
+ /// It is correctly typed. It might be `undefined`.
+ value: InternPool.Index,
+ };
+ pub fn unwrap(raw: ShuffleOneMask) Unwrapped {
+ return switch (raw.kind) {
+ .elem => .{ .elem = raw.index },
+ .value => .{ .value = @enumFromInt(raw.index) },
+ };
+ }
+};
+
+/// Used by `Inst.Tag.shuffle_two`. Represents a mask element which either indexes into one
+/// of two runtime-known vectors, or is undefined.
+pub const ShuffleTwoMask = enum(u32) {
+ undef = std.math.maxInt(u32),
+ _,
+ pub fn aElem(idx: u32) ShuffleTwoMask {
+ return @enumFromInt(idx << 1);
+ }
+ pub fn bElem(idx: u32) ShuffleTwoMask {
+ return @enumFromInt(idx << 1 | 1);
+ }
+ pub const Unwrapped = union(enum) {
+ /// The resulting element is this index into the first runtime vector.
+ a_elem: u32,
+ /// The resulting element is this index into the second runtime vector.
+ b_elem: u32,
+ /// The resulting element is `undefined`.
+ undef,
+ };
+ pub fn unwrap(raw: ShuffleTwoMask) Unwrapped {
+ switch (raw) {
+ .undef => return .undef,
+ _ => {},
+ }
+ const x = @intFromEnum(raw);
+ return switch (@as(u1, @truncate(x))) {
+ 0 => .{ .a_elem = x >> 1 },
+ 1 => .{ .b_elem = x >> 1 },
+ };
+ }
+};
+
/// Trailing:
/// 0. `Inst.Ref` for every outputs_len
/// 1. `Inst.Ref` for every inputs_len
@@ -1503,7 +1566,6 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
.cmpxchg_weak,
.cmpxchg_strong,
.slice,
- .shuffle,
.aggregate_init,
.union_init,
.field_parent_ptr,
@@ -1517,6 +1579,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
.ptr_sub,
.try_ptr,
.try_ptr_cold,
+ .shuffle_one,
+ .shuffle_two,
=> return datas[@intFromEnum(inst)].ty_pl.ty.toType(),
.not,
@@ -1903,7 +1967,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
.reduce,
.reduce_optimized,
.splat,
- .shuffle,
+ .shuffle_one,
+ .shuffle_two,
.select,
.is_named_enum_value,
.tag_name,
@@ -2030,6 +2095,48 @@ pub fn unwrapSwitch(air: *const Air, switch_inst: Inst.Index) UnwrappedSwitch {
};
}
+pub fn unwrapShuffleOne(air: *const Air, zcu: *const Zcu, inst_index: Inst.Index) struct {
+ result_ty: Type,
+ operand: Inst.Ref,
+ mask: []const ShuffleOneMask,
+} {
+ const inst = air.instructions.get(@intFromEnum(inst_index));
+ switch (inst.tag) {
+ .shuffle_one => {},
+ else => unreachable, // assertion failure
+ }
+ const result_ty: Type = .fromInterned(inst.data.ty_pl.ty.toInterned().?);
+ const mask_len: u32 = result_ty.vectorLen(zcu);
+ const extra_idx = inst.data.ty_pl.payload;
+ return .{
+ .result_ty = result_ty,
+ .operand = @enumFromInt(air.extra.items[extra_idx + mask_len]),
+ .mask = @ptrCast(air.extra.items[extra_idx..][0..mask_len]),
+ };
+}
+
+pub fn unwrapShuffleTwo(air: *const Air, zcu: *const Zcu, inst_index: Inst.Index) struct {
+ result_ty: Type,
+ operand_a: Inst.Ref,
+ operand_b: Inst.Ref,
+ mask: []const ShuffleTwoMask,
+} {
+ const inst = air.instructions.get(@intFromEnum(inst_index));
+ switch (inst.tag) {
+ .shuffle_two => {},
+ else => unreachable, // assertion failure
+ }
+ const result_ty: Type = .fromInterned(inst.data.ty_pl.ty.toInterned().?);
+ const mask_len: u32 = result_ty.vectorLen(zcu);
+ const extra_idx = inst.data.ty_pl.payload;
+ return .{
+ .result_ty = result_ty,
+ .operand_a = @enumFromInt(air.extra.items[extra_idx + mask_len + 0]),
+ .operand_b = @enumFromInt(air.extra.items[extra_idx + mask_len + 1]),
+ .mask = @ptrCast(air.extra.items[extra_idx..][0..mask_len]),
+ };
+}
+
pub const typesFullyResolved = types_resolved.typesFullyResolved;
pub const typeFullyResolved = types_resolved.checkType;
pub const valFullyResolved = types_resolved.checkVal;
src/print_air.zig
@@ -315,7 +315,8 @@ const Writer = struct {
.wasm_memory_grow => try w.writeWasmMemoryGrow(s, inst),
.mul_add => try w.writeMulAdd(s, inst),
.select => try w.writeSelect(s, inst),
- .shuffle => try w.writeShuffle(s, inst),
+ .shuffle_one => try w.writeShuffleOne(s, inst),
+ .shuffle_two => try w.writeShuffleTwo(s, inst),
.reduce, .reduce_optimized => try w.writeReduce(s, inst),
.cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst),
.vector_store_elem => try w.writeVectorStoreElem(s, inst),
@@ -499,14 +500,39 @@ const Writer = struct {
try w.writeOperand(s, inst, 2, pl_op.operand);
}
- fn writeShuffle(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
- const ty_pl = w.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- const extra = w.air.extraData(Air.Shuffle, ty_pl.payload).data;
+ fn writeShuffleOne(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+ const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst);
+ try w.writeType(s, unwrapped.result_ty);
+ try s.writeAll(", ");
+ try w.writeOperand(s, inst, 0, unwrapped.operand);
+ try s.writeAll(", [");
+ for (unwrapped.mask, 0..) |mask_elem, mask_idx| {
+ if (mask_idx > 0) try s.writeAll(", ");
+ switch (mask_elem.unwrap()) {
+ .elem => |idx| try s.print("elem {d}", .{idx}),
+ .value => |val| try s.print("val {}", .{Value.fromInterned(val).fmtValue(w.pt)}),
+ }
+ }
+ try s.writeByte(']');
+ }
- try w.writeOperand(s, inst, 0, extra.a);
+ fn writeShuffleTwo(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+ const unwrapped = w.air.unwrapShuffleTwo(w.pt.zcu, inst);
+ try w.writeType(s, unwrapped.result_ty);
+ try s.writeAll(", ");
+ try w.writeOperand(s, inst, 0, unwrapped.operand_a);
try s.writeAll(", ");
- try w.writeOperand(s, inst, 1, extra.b);
- try s.print(", mask {d}, len {d}", .{ extra.mask, extra.mask_len });
+ try w.writeOperand(s, inst, 1, unwrapped.operand_b);
+ try s.writeAll(", [");
+ for (unwrapped.mask, 0..) |mask_elem, mask_idx| {
+ if (mask_idx > 0) try s.writeAll(", ");
+ switch (mask_elem.unwrap()) {
+ .a_elem => |idx| try s.print("a_elem {d}", .{idx}),
+ .b_elem => |idx| try s.print("b_elem {d}", .{idx}),
+ .undef => try s.writeAll("undef"),
+ }
+ }
+ try s.writeByte(']');
}
fn writeSelect(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
src/Sema.zig
@@ -24256,8 +24256,8 @@ fn analyzeShuffle(
block: *Block,
src_node: std.zig.Ast.Node.Offset,
elem_ty: Type,
- a_arg: Air.Inst.Ref,
- b_arg: Air.Inst.Ref,
+ a_uncoerced: Air.Inst.Ref,
+ b_uncoerced: Air.Inst.Ref,
mask: Value,
mask_len: u32,
) CompileError!Air.Inst.Ref {
@@ -24266,150 +24266,154 @@ fn analyzeShuffle(
const a_src = block.builtinCallArgSrc(src_node, 1);
const b_src = block.builtinCallArgSrc(src_node, 2);
const mask_src = block.builtinCallArgSrc(src_node, 3);
- var a = a_arg;
- var b = b_arg;
- const res_ty = try pt.vectorType(.{
- .len = mask_len,
- .child = elem_ty.toIntern(),
- });
-
- const maybe_a_len = switch (sema.typeOf(a).zigTypeTag(zcu)) {
- .array, .vector => sema.typeOf(a).arrayLen(zcu),
- .undefined => null,
- else => return sema.fail(block, a_src, "expected vector or array with element type '{}', found '{}'", .{
- elem_ty.fmt(pt),
- sema.typeOf(a).fmt(pt),
- }),
- };
- const maybe_b_len = switch (sema.typeOf(b).zigTypeTag(zcu)) {
- .array, .vector => sema.typeOf(b).arrayLen(zcu),
- .undefined => null,
- else => return sema.fail(block, b_src, "expected vector or array with element type '{}', found '{}'", .{
- elem_ty.fmt(pt),
- sema.typeOf(b).fmt(pt),
- }),
- };
- if (maybe_a_len == null and maybe_b_len == null) {
- return pt.undefRef(res_ty);
- }
- const a_len: u32 = @intCast(maybe_a_len orelse maybe_b_len.?);
- const b_len: u32 = @intCast(maybe_b_len orelse a_len);
-
- const a_ty = try pt.vectorType(.{
- .len = a_len,
- .child = elem_ty.toIntern(),
- });
- const b_ty = try pt.vectorType(.{
- .len = b_len,
- .child = elem_ty.toIntern(),
- });
-
- if (maybe_a_len == null) a = try pt.undefRef(a_ty) else a = try sema.coerce(block, a_ty, a, a_src);
- if (maybe_b_len == null) b = try pt.undefRef(b_ty) else b = try sema.coerce(block, b_ty, b, b_src);
-
- const operand_info = [2]std.meta.Tuple(&.{ u64, LazySrcLoc, Type }){
- .{ a_len, a_src, a_ty },
- .{ b_len, b_src, b_ty },
- };
-
- for (0..@intCast(mask_len)) |i| {
- const elem = try mask.elemValue(pt, i);
- if (elem.isUndef(zcu)) continue;
- const elem_resolved = try sema.resolveLazyValue(elem);
- const int = elem_resolved.toSignedInt(zcu);
- var unsigned: u32 = undefined;
- var chosen: u32 = undefined;
- if (int >= 0) {
- unsigned = @intCast(int);
- chosen = 0;
- } else {
- unsigned = @intCast(~int);
- chosen = 1;
+ // If the type of `a` is `@Type(.undefined)`, i.e. the argument is untyped, this is 0, because it is an error to index into this vector.
+ const a_len: u32 = switch (sema.typeOf(a_uncoerced).zigTypeTag(zcu)) {
+ .array, .vector => @intCast(sema.typeOf(a_uncoerced).arrayLen(zcu)),
+ .undefined => 0,
+ else => return sema.fail(block, a_src, "expected vector of '{}', found '{}'", .{ elem_ty.fmt(pt), sema.typeOf(a_uncoerced).fmt(pt) }),
+ };
+ const a_ty = try pt.vectorType(.{ .len = a_len, .child = elem_ty.toIntern() });
+ const a_coerced = try sema.coerce(block, a_ty, a_uncoerced, a_src);
+
+ // If the type of `b` is `@Type(.undefined)`, i.e. the argument is untyped, this is 0, because it is an error to index into this vector.
+ const b_len: u32 = switch (sema.typeOf(b_uncoerced).zigTypeTag(zcu)) {
+ .array, .vector => @intCast(sema.typeOf(b_uncoerced).arrayLen(zcu)),
+ .undefined => 0,
+ else => return sema.fail(block, b_src, "expected vector of '{}', found '{}'", .{ elem_ty.fmt(pt), sema.typeOf(b_uncoerced).fmt(pt) }),
+ };
+ const b_ty = try pt.vectorType(.{ .len = b_len, .child = elem_ty.toIntern() });
+ const b_coerced = try sema.coerce(block, b_ty, b_uncoerced, b_src);
+
+ const result_ty = try pt.vectorType(.{ .len = mask_len, .child = elem_ty.toIntern() });
+
+ // We're going to pre-emptively reserve space in `sema.air_extra`. The reason for this is we need
+ // a `u32` buffer of length `mask_len` anyway, and putting it in `sema.air_extra` avoids a copy
+ // in the runtime case. If the result is comptime-known, we'll shrink `air_extra` back.
+ const air_extra_idx: u32 = @intCast(sema.air_extra.items.len);
+ const air_mask_buf = try sema.air_extra.addManyAsSlice(sema.gpa, mask_len);
+
+ // We want to interpret that buffer in `air_extra` in a few ways. Initially, we'll consider its
+ // elements as `Air.Inst.ShuffleTwoMask`, essentially representing the raw mask values; then, we'll
+ // convert it to `InternPool.Index` or `Air.Inst.ShuffleOneMask` if there are comptime-known operands.
+ const mask_ip_index: []InternPool.Index = @ptrCast(air_mask_buf);
+ const mask_shuffle_one: []Air.ShuffleOneMask = @ptrCast(air_mask_buf);
+ const mask_shuffle_two: []Air.ShuffleTwoMask = @ptrCast(air_mask_buf);
+
+ // Initial loop: check mask elements, populate `mask_shuffle_two`.
+ var a_used = false;
+ var b_used = false;
+ for (mask_shuffle_two, 0..mask_len) |*out, mask_idx| {
+ const mask_val = try mask.elemValue(pt, mask_idx);
+ if (mask_val.isUndef(zcu)) {
+ out.* = .undef;
+ continue;
}
- if (unsigned >= operand_info[chosen][0]) {
- const msg = msg: {
- const msg = try sema.errMsg(mask_src, "mask index '{d}' has out-of-bounds selection", .{i});
+ // Safe because mask elements are `i32` and we already checked for undef:
+ const raw = (try sema.resolveLazyValue(mask_val)).toSignedInt(zcu);
+ if (raw >= 0) {
+ const idx: u32 = @intCast(raw);
+ a_used = true;
+ out.* = .aElem(idx);
+ if (idx >= a_len) return sema.failWithOwnedErrorMsg(block, msg: {
+ const msg = try sema.errMsg(mask_src, "mask element at index '{d}' selects out-of-bounds index", .{mask_idx});
errdefer msg.destroy(sema.gpa);
-
- try sema.errNote(operand_info[chosen][1], msg, "selected index '{d}' out of bounds of '{}'", .{
- unsigned,
- operand_info[chosen][2].fmt(pt),
- });
-
- if (chosen == 0) {
- try sema.errNote(b_src, msg, "selections from the second vector are specified with negative numbers", .{});
+ try sema.errNote(a_src, msg, "index '{d}' exceeds bounds of '{}' given here", .{ idx, a_ty.fmt(pt) });
+ if (idx < b_len) {
+ try sema.errNote(b_src, msg, "use '~@as(u32, {d})' to index into second vector given here", .{idx});
}
-
break :msg msg;
- };
- return sema.failWithOwnedErrorMsg(block, msg);
+ });
+ } else {
+ const idx: u32 = @intCast(~raw);
+ b_used = true;
+ out.* = .bElem(idx);
+ if (idx >= b_len) return sema.failWithOwnedErrorMsg(block, msg: {
+ const msg = try sema.errMsg(mask_src, "mask element at index '{d}' selects out-of-bounds index", .{mask_idx});
+ errdefer msg.destroy(sema.gpa);
+ try sema.errNote(b_src, msg, "index '{d}' exceeds bounds of '{}' given here", .{ idx, b_ty.fmt(pt) });
+ break :msg msg;
+ });
}
}
- if (try sema.resolveValue(a)) |a_val| {
- if (try sema.resolveValue(b)) |b_val| {
- const values = try sema.arena.alloc(InternPool.Index, mask_len);
- for (values, 0..) |*value, i| {
- const mask_elem_val = try mask.elemValue(pt, i);
- if (mask_elem_val.isUndef(zcu)) {
- value.* = try pt.intern(.{ .undef = elem_ty.toIntern() });
- continue;
- }
- const int = mask_elem_val.toSignedInt(zcu);
- const unsigned: u32 = @intCast(if (int >= 0) int else ~int);
- values[i] = (try (if (int >= 0) a_val else b_val).elemValue(pt, unsigned)).toIntern();
- }
- return Air.internedToRef((try pt.intern(.{ .aggregate = .{
- .ty = res_ty.toIntern(),
- .storage = .{ .elems = values },
- } })));
- }
- }
+ const maybe_a_val = try sema.resolveValue(a_coerced);
+ const maybe_b_val = try sema.resolveValue(b_coerced);
- // All static analysis passed, and not comptime.
- // For runtime codegen, vectors a and b must be the same length. Here we
- // recursively @shuffle the smaller vector to append undefined elements
- // to it up to the length of the longer vector. This recursion terminates
- // in 1 call because these calls to analyzeShuffle guarantee a_len == b_len.
- if (a_len != b_len) {
- const min_len = @min(a_len, b_len);
- const max_src = if (a_len > b_len) a_src else b_src;
- const max_len = try sema.usizeCast(block, max_src, @max(a_len, b_len));
+ const a_rt = a_used and maybe_a_val == null;
+ const b_rt = b_used and maybe_b_val == null;
- const expand_mask_values = try sema.arena.alloc(InternPool.Index, max_len);
- for (@intCast(0)..@intCast(min_len)) |i| {
- expand_mask_values[i] = (try pt.intValue(.comptime_int, i)).toIntern();
+ if (a_rt and b_rt) {
+ // Both operands are needed and runtime-known. We need a `[]ShuffleTwomask`... which is
+ // exactly what we already have in `mask_shuffle_two`! So, we're basically done already.
+ // We just need to append the two operands.
+ try sema.air_extra.ensureUnusedCapacity(sema.gpa, 2);
+ sema.appendRefsAssumeCapacity(&.{ a_coerced, b_coerced });
+ return block.addInst(.{
+ .tag = .shuffle_two,
+ .data = .{ .ty_pl = .{
+ .ty = Air.internedToRef(result_ty.toIntern()),
+ .payload = air_extra_idx,
+ } },
+ });
+ } else if (a_rt) {
+ // We need to convert the `ShuffleTwoMask` values to `ShuffleOneMask`.
+ for (mask_shuffle_two, mask_shuffle_one) |in, *out| {
+ out.* = switch (in.unwrap()) {
+ .undef => .value(try pt.undefValue(elem_ty)),
+ .a_elem => |idx| .elem(idx),
+ .b_elem => |idx| .value(try maybe_b_val.?.elemValue(pt, idx)),
+ };
}
- for (@intCast(min_len)..@intCast(max_len)) |i| {
- expand_mask_values[i] = .negative_one;
+ // Now just append our single runtime operand, and we're done.
+ try sema.air_extra.ensureUnusedCapacity(sema.gpa, 1);
+ sema.appendRefsAssumeCapacity(&.{a_coerced});
+ return block.addInst(.{
+ .tag = .shuffle_one,
+ .data = .{ .ty_pl = .{
+ .ty = Air.internedToRef(result_ty.toIntern()),
+ .payload = air_extra_idx,
+ } },
+ });
+ } else if (b_rt) {
+ // We need to convert the `ShuffleTwoMask` values to `ShuffleOneMask`.
+ for (mask_shuffle_two, mask_shuffle_one) |in, *out| {
+ out.* = switch (in.unwrap()) {
+ .undef => .value(try pt.undefValue(elem_ty)),
+ .a_elem => |idx| .value(try maybe_a_val.?.elemValue(pt, idx)),
+ .b_elem => |idx| .elem(idx),
+ };
}
- const expand_mask = try pt.intern(.{ .aggregate = .{
- .ty = (try pt.vectorType(.{ .len = @intCast(max_len), .child = .comptime_int_type })).toIntern(),
- .storage = .{ .elems = expand_mask_values },
- } });
-
- if (a_len < b_len) {
- const undef = try pt.undefRef(a_ty);
- a = try sema.analyzeShuffle(block, src_node, elem_ty, a, undef, Value.fromInterned(expand_mask), @intCast(max_len));
- } else {
- const undef = try pt.undefRef(b_ty);
- b = try sema.analyzeShuffle(block, src_node, elem_ty, b, undef, Value.fromInterned(expand_mask), @intCast(max_len));
+ // Now just append our single runtime operand, and we're done.
+ try sema.air_extra.ensureUnusedCapacity(sema.gpa, 1);
+ sema.appendRefsAssumeCapacity(&.{b_coerced});
+ return block.addInst(.{
+ .tag = .shuffle_one,
+ .data = .{ .ty_pl = .{
+ .ty = Air.internedToRef(result_ty.toIntern()),
+ .payload = air_extra_idx,
+ } },
+ });
+ } else {
+ // The result will be comptime-known. We must convert the `ShuffleTwoMask` values to
+ // `InternPool.Index` values using the known operands.
+ for (mask_shuffle_two, mask_ip_index) |in, *out| {
+ const val: Value = switch (in.unwrap()) {
+ .undef => try pt.undefValue(elem_ty),
+ .a_elem => |idx| try maybe_a_val.?.elemValue(pt, idx),
+ .b_elem => |idx| try maybe_b_val.?.elemValue(pt, idx),
+ };
+ out.* = val.toIntern();
}
+ const res = try pt.intern(.{ .aggregate = .{
+ .ty = result_ty.toIntern(),
+ .storage = .{ .elems = mask_ip_index },
+ } });
+ // We have a comptime-known result, so didn't need `air_mask_buf` -- remove it from `sema.air_extra`.
+ assert(sema.air_extra.items.len == air_extra_idx + air_mask_buf.len);
+ sema.air_extra.shrinkRetainingCapacity(air_extra_idx);
+ return Air.internedToRef(res);
}
-
- return block.addInst(.{
- .tag = .shuffle,
- .data = .{ .ty_pl = .{
- .ty = Air.internedToRef(res_ty.toIntern()),
- .payload = try block.sema.addExtra(Air.Shuffle{
- .a = a,
- .b = b,
- .mask = mask.toIntern(),
- .mask_len = mask_len,
- }),
- } },
- });
}
fn zirSelect(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!Air.Inst.Ref {
test/cases/compile_errors/shuffle_with_selected_index_past_first_vector_length.zig
@@ -1,14 +1,20 @@
-export fn entry() void {
- const v: @Vector(4, u32) = [4]u32{ 10, 11, 12, 13 };
- const x: @Vector(4, u32) = [4]u32{ 14, 15, 16, 17 };
- const z = @shuffle(u32, v, x, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
- _ = z;
+export fn foo() void {
+ // Here, the bad index ('7') is not less than 'b.len', so the error shouldn't have a note suggesting a negative index.
+ const a: @Vector(4, u32) = .{ 10, 11, 12, 13 };
+ const b: @Vector(4, u32) = .{ 14, 15, 16, 17 };
+ _ = @shuffle(u32, a, b, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
+}
+export fn bar() void {
+ // Here, the bad index ('7') *is* less than 'b.len', so the error *should* have a note suggesting a negative index.
+ const a: @Vector(4, u32) = .{ 10, 11, 12, 13 };
+ const b: @Vector(9, u32) = .{ 14, 15, 16, 17, 18, 19, 20, 21, 22 };
+ _ = @shuffle(u32, a, b, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
}
// error
-// backend=stage2
-// target=native
//
-// :4:41: error: mask index '4' has out-of-bounds selection
-// :4:29: note: selected index '7' out of bounds of '@Vector(4, u32)'
-// :4:32: note: selections from the second vector are specified with negative numbers
+// :5:35: error: mask element at index '4' selects out-of-bounds index
+// :5:23: note: index '7' exceeds bounds of '@Vector(4, u32)' given here
+// :11:35: error: mask element at index '4' selects out-of-bounds index
+// :11:23: note: index '7' exceeds bounds of '@Vector(4, u32)' given here
+// :11:26: note: use '~@as(u32, 7)' to index into second vector given here