zig/src/codegen/spirv/Assembler.zig at master

   1const std = @import("std");
   2const Allocator = std.mem.Allocator;
   3const assert = std.debug.assert;
   4
   5const CodeGen = @import("CodeGen.zig");
   6const Decl = @import("Module.zig").Decl;
   7
   8const spec = @import("spec.zig");
   9const Opcode = spec.Opcode;
  10const Word = spec.Word;
  11const Id = spec.Id;
  12const StorageClass = spec.StorageClass;
  13
  14const Assembler = @This();
  15
  16cg: *CodeGen,
  17errors: std.ArrayList(ErrorMsg) = .empty,
  18src: []const u8 = undefined,
  19/// `ass.src` tokenized.
  20tokens: std.ArrayList(Token) = .empty,
  21current_token: u32 = 0,
  22/// The instruction that is currently being parsed or has just been parsed.
  23inst: struct {
  24    opcode: Opcode = undefined,
  25    operands: std.ArrayList(Operand) = .empty,
  26    string_bytes: std.ArrayList(u8) = .empty,
  27
  28    fn result(ass: @This()) ?AsmValue.Ref {
  29        for (ass.operands.items[0..@min(ass.operands.items.len, 2)]) |op| {
  30            switch (op) {
  31                .result_id => |index| return index,
  32                else => {},
  33            }
  34        }
  35        return null;
  36    }
  37} = .{},
  38value_map: std.StringArrayHashMapUnmanaged(AsmValue) = .{},
  39inst_map: std.StringArrayHashMapUnmanaged(void) = .empty,
  40
  41const Operand = union(enum) {
  42    /// Any 'simple' 32-bit value. This could be a mask or
  43    /// enumerant, etc, depending on the operands.
  44    value: u32,
  45    /// An int- or float literal encoded as 1 word.
  46    literal32: u32,
  47    /// An int- or float literal encoded as 2 words.
  48    literal64: u64,
  49    /// A result-id which is assigned to in this instruction.
  50    /// If present, this is the first operand of the instruction.
  51    result_id: AsmValue.Ref,
  52    /// A result-id which referred to (not assigned to) in this instruction.
  53    ref_id: AsmValue.Ref,
  54    /// Offset into `inst.string_bytes`. The string ends at the next zero-terminator.
  55    string: u32,
  56};
  57
  58pub fn deinit(ass: *Assembler) void {
  59    const gpa = ass.cg.module.gpa;
  60    for (ass.errors.items) |err| gpa.free(err.msg);
  61    ass.tokens.deinit(gpa);
  62    ass.errors.deinit(gpa);
  63    ass.inst.operands.deinit(gpa);
  64    ass.inst.string_bytes.deinit(gpa);
  65    ass.value_map.deinit(gpa);
  66    ass.inst_map.deinit(gpa);
  67}
  68
  69const Error = error{ AssembleFail, OutOfMemory };
  70
  71pub fn assemble(ass: *Assembler, src: []const u8) Error!void {
  72    const gpa = ass.cg.module.gpa;
  73
  74    ass.src = src;
  75    ass.errors.clearRetainingCapacity();
  76
  77    // Populate the opcode map if it isn't already
  78    if (ass.inst_map.count() == 0) {
  79        const instructions = spec.InstructionSet.core.instructions();
  80        try ass.inst_map.ensureUnusedCapacity(gpa, @intCast(instructions.len));
  81        for (spec.InstructionSet.core.instructions(), 0..) |inst, i| {
  82            const entry = try ass.inst_map.getOrPut(gpa, inst.name);
  83            assert(entry.index == i);
  84        }
  85    }
  86
  87    try ass.tokenize();
  88    while (!ass.testToken(.eof)) {
  89        try ass.parseInstruction();
  90        try ass.processInstruction();
  91    }
  92
  93    if (ass.errors.items.len > 0) return error.AssembleFail;
  94}
  95
  96const ErrorMsg = struct {
  97    /// The offset in bytes from the start of `src` that this error occured.
  98    byte_offset: u32,
  99    msg: []const u8,
 100};
 101
 102fn addError(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) !void {
 103    const gpa = ass.cg.module.gpa;
 104    const msg = try std.fmt.allocPrint(gpa, fmt, args);
 105    errdefer gpa.free(msg);
 106    try ass.errors.append(gpa, .{
 107        .byte_offset = offset,
 108        .msg = msg,
 109    });
 110}
 111
 112fn fail(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) Error {
 113    try ass.addError(offset, fmt, args);
 114    return error.AssembleFail;
 115}
 116
 117fn todo(ass: *Assembler, comptime fmt: []const u8, args: anytype) Error {
 118    return ass.fail(0, "todo: " ++ fmt, args);
 119}
 120
 121const AsmValue = union(enum) {
 122    /// The results are stored in an array hash map, and can be referred
 123    /// to either by name (without the %), or by values of this index type.
 124    pub const Ref = u32;
 125
 126    /// The RHS of the current instruction.
 127    just_declared,
 128    /// A placeholder for ref-ids of which the result-id is not yet known.
 129    /// It will be further resolved at a later stage to a more concrete forward reference.
 130    unresolved_forward_reference,
 131    /// A normal result produced by a different instruction.
 132    value: Id,
 133    /// A type registered into the module's type system.
 134    ty: Id,
 135    /// A pre-supplied constant integer value.
 136    constant: u32,
 137    string: []const u8,
 138
 139    /// Retrieve the result-id of this AsmValue. Asserts that this AsmValue
 140    /// is of a variant that allows the result to be obtained (not an unresolved
 141    /// forward declaration, not in the process of being declared, etc).
 142    pub fn resultId(value: AsmValue) Id {
 143        return switch (value) {
 144            .just_declared,
 145            .unresolved_forward_reference,
 146            // TODO: Lower this value as constant?
 147            .constant,
 148            .string,
 149            => unreachable,
 150            .value => |result| result,
 151            .ty => |result| result,
 152        };
 153    }
 154};
 155
 156/// Attempt to process the instruction currently in `ass.inst`.
 157/// This for example emits the instruction in the module or function, or
 158/// records type definitions.
 159/// If this function returns `error.AssembleFail`, an explanatory
 160/// error message has already been emitted into `ass.errors`.
 161fn processInstruction(ass: *Assembler) !void {
 162    const module = ass.cg.module;
 163    const result: AsmValue = switch (ass.inst.opcode) {
 164        .OpEntryPoint => {
 165            return ass.fail(ass.currentToken().start, "cannot export entry points in assembly", .{});
 166        },
 167        .OpExecutionMode, .OpExecutionModeId => {
 168            return ass.fail(ass.currentToken().start, "cannot set execution mode in assembly", .{});
 169        },
 170        .OpCapability => {
 171            try module.addCapability(@enumFromInt(ass.inst.operands.items[0].value));
 172            return;
 173        },
 174        .OpExtension => {
 175            const ext_name_offset = ass.inst.operands.items[0].string;
 176            const ext_name = std.mem.sliceTo(ass.inst.string_bytes.items[ext_name_offset..], 0);
 177            try module.addExtension(ext_name);
 178            return;
 179        },
 180        .OpExtInstImport => blk: {
 181            const set_name_offset = ass.inst.operands.items[1].string;
 182            const set_name = std.mem.sliceTo(ass.inst.string_bytes.items[set_name_offset..], 0);
 183            const set_tag = std.meta.stringToEnum(spec.InstructionSet, set_name) orelse {
 184                return ass.fail(set_name_offset, "unknown instruction set: {s}", .{set_name});
 185            };
 186            break :blk .{ .value = try module.importInstructionSet(set_tag) };
 187        },
 188        else => switch (ass.inst.opcode.class()) {
 189            .type_declaration => try ass.processTypeInstruction(),
 190            else => (try ass.processGenericInstruction()) orelse return,
 191        },
 192    };
 193
 194    const result_ref = ass.inst.result().?;
 195    switch (ass.value_map.values()[result_ref]) {
 196        .just_declared => ass.value_map.values()[result_ref] = result,
 197        else => {
 198            // TODO: Improve source location.
 199            const name = ass.value_map.keys()[result_ref];
 200            return ass.fail(0, "duplicate definition of %{s}", .{name});
 201        },
 202    }
 203}
 204
 205fn processTypeInstruction(ass: *Assembler) !AsmValue {
 206    const cg = ass.cg;
 207    const gpa = cg.module.gpa;
 208    const module = cg.module;
 209    const operands = ass.inst.operands.items;
 210    const section = &module.sections.globals;
 211    const id = switch (ass.inst.opcode) {
 212        .OpTypeVoid => try module.voidType(),
 213        .OpTypeBool => try module.boolType(),
 214        .OpTypeInt => blk: {
 215            const signedness: std.builtin.Signedness = switch (operands[2].literal32) {
 216                0 => .unsigned,
 217                1 => .signed,
 218                else => {
 219                    // TODO: Improve source location.
 220                    return ass.fail(0, "{} is not a valid signedness (expected 0 or 1)", .{operands[2].literal32});
 221                },
 222            };
 223            const width = std.math.cast(u16, operands[1].literal32) orelse {
 224                return ass.fail(0, "int type of {} bits is too large", .{operands[1].literal32});
 225            };
 226            break :blk try module.intType(signedness, width);
 227        },
 228        .OpTypeFloat => blk: {
 229            const bits = operands[1].literal32;
 230            switch (bits) {
 231                16, 32, 64 => {},
 232                else => {
 233                    return ass.fail(0, "{} is not a valid bit count for floats (expected 16, 32 or 64)", .{bits});
 234                },
 235            }
 236            break :blk try module.floatType(@intCast(bits));
 237        },
 238        .OpTypeVector => blk: {
 239            const child_type = try ass.resolveRefId(operands[1].ref_id);
 240            break :blk try module.vectorType(operands[2].literal32, child_type);
 241        },
 242        .OpTypeArray => {
 243            // TODO: The length of an OpTypeArray is determined by a constant (which may be a spec constant),
 244            // and so some consideration must be taken when entering this in the type system.
 245            return ass.todo("process OpTypeArray", .{});
 246        },
 247        .OpTypeRuntimeArray => blk: {
 248            const element_type = try ass.resolveRefId(operands[1].ref_id);
 249            const result_id = module.allocId();
 250            try section.emit(module.gpa, .OpTypeRuntimeArray, .{
 251                .id_result = result_id,
 252                .element_type = element_type,
 253            });
 254            break :blk result_id;
 255        },
 256        .OpTypePointer => blk: {
 257            const storage_class: StorageClass = @enumFromInt(operands[1].value);
 258            const child_type = try ass.resolveRefId(operands[2].ref_id);
 259            const result_id = module.allocId();
 260            try section.emit(module.gpa, .OpTypePointer, .{
 261                .id_result = result_id,
 262                .storage_class = storage_class,
 263                .type = child_type,
 264            });
 265            break :blk result_id;
 266        },
 267        .OpTypeStruct => blk: {
 268            const scratch_top = cg.id_scratch.items.len;
 269            defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
 270            const ids = try cg.id_scratch.addManyAsSlice(gpa, operands[1..].len);
 271            for (operands[1..], ids) |op, *id| id.* = try ass.resolveRefId(op.ref_id);
 272            break :blk try module.structType(ids, null, null, .none);
 273        },
 274        .OpTypeImage => blk: {
 275            const sampled_type = try ass.resolveRefId(operands[1].ref_id);
 276            const result_id = module.allocId();
 277            try section.emit(gpa, .OpTypeImage, .{
 278                .id_result = result_id,
 279                .sampled_type = sampled_type,
 280                .dim = @enumFromInt(operands[2].value),
 281                .depth = operands[3].literal32,
 282                .arrayed = operands[4].literal32,
 283                .ms = operands[5].literal32,
 284                .sampled = operands[6].literal32,
 285                .image_format = @enumFromInt(operands[7].value),
 286            });
 287            break :blk result_id;
 288        },
 289        .OpTypeSampler => blk: {
 290            const result_id = module.allocId();
 291            try section.emit(gpa, .OpTypeSampler, .{ .id_result = result_id });
 292            break :blk result_id;
 293        },
 294        .OpTypeSampledImage => blk: {
 295            const image_type = try ass.resolveRefId(operands[1].ref_id);
 296            const result_id = module.allocId();
 297            try section.emit(gpa, .OpTypeSampledImage, .{ .id_result = result_id, .image_type = image_type });
 298            break :blk result_id;
 299        },
 300        .OpTypeFunction => blk: {
 301            const param_operands = operands[2..];
 302            const return_type = try ass.resolveRefId(operands[1].ref_id);
 303
 304            const scratch_top = cg.id_scratch.items.len;
 305            defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
 306            const param_types = try cg.id_scratch.addManyAsSlice(gpa, param_operands.len);
 307
 308            for (param_types, param_operands) |*param, operand| {
 309                param.* = try ass.resolveRefId(operand.ref_id);
 310            }
 311            const result_id = module.allocId();
 312            try section.emit(module.gpa, .OpTypeFunction, .{
 313                .id_result = result_id,
 314                .return_type = return_type,
 315                .id_ref_2 = param_types,
 316            });
 317            break :blk result_id;
 318        },
 319        else => return ass.todo("process type instruction {s}", .{@tagName(ass.inst.opcode)}),
 320    };
 321
 322    return .{ .ty = id };
 323}
 324
 325/// - No forward references are allowed in operands.
 326/// - Target section is determined from instruction type.
 327fn processGenericInstruction(ass: *Assembler) !?AsmValue {
 328    const module = ass.cg.module;
 329    const target = module.zcu.getTarget();
 330    const operands = ass.inst.operands.items;
 331    var maybe_spv_decl_index: ?Decl.Index = null;
 332    const section = switch (ass.inst.opcode.class()) {
 333        .constant_creation => &module.sections.globals,
 334        .annotation => &module.sections.annotations,
 335        .type_declaration => unreachable, // Handled elsewhere.
 336        else => switch (ass.inst.opcode) {
 337            .OpEntryPoint => unreachable,
 338            .OpExecutionMode, .OpExecutionModeId => &module.sections.execution_modes,
 339            .OpVariable => section: {
 340                const storage_class: spec.StorageClass = @enumFromInt(operands[2].value);
 341                if (storage_class == .function) break :section &ass.cg.prologue;
 342                maybe_spv_decl_index = try module.allocDecl(.global);
 343                if (!target.cpu.has(.spirv, .v1_4) and storage_class != .input and storage_class != .output) {
 344                    // Before version 1.4, the interface’s storage classes are limited to the Input and Output
 345                    break :section &module.sections.globals;
 346                }
 347                try ass.cg.module.decl_deps.append(module.gpa, maybe_spv_decl_index.?);
 348                break :section &module.sections.globals;
 349            },
 350            else => &ass.cg.body,
 351        },
 352    };
 353
 354    var maybe_result_id: ?Id = null;
 355    const first_word = section.instructions.items.len;
 356    // At this point we're not quite sure how many operands this instruction is
 357    // going to have, so insert 0 and patch up the actual opcode word later.
 358    try section.ensureUnusedCapacity(module.gpa, 1);
 359    section.writeWord(0);
 360
 361    for (operands) |operand| {
 362        switch (operand) {
 363            .value, .literal32 => |word| {
 364                try section.ensureUnusedCapacity(module.gpa, 1);
 365                section.writeWord(word);
 366            },
 367            .literal64 => |dword| {
 368                try section.ensureUnusedCapacity(module.gpa, 2);
 369                section.writeDoubleWord(dword);
 370            },
 371            .result_id => {
 372                maybe_result_id = if (maybe_spv_decl_index) |spv_decl_index|
 373                    module.declPtr(spv_decl_index).result_id
 374                else
 375                    module.allocId();
 376                try section.ensureUnusedCapacity(module.gpa, 1);
 377                section.writeOperand(Id, maybe_result_id.?);
 378            },
 379            .ref_id => |index| {
 380                const result = try ass.resolveRef(index);
 381                try section.ensureUnusedCapacity(module.gpa, 1);
 382                section.writeOperand(spec.Id, result.resultId());
 383            },
 384            .string => |offset| {
 385                const text = std.mem.sliceTo(ass.inst.string_bytes.items[offset..], 0);
 386                const size = std.math.divCeil(usize, text.len + 1, @sizeOf(Word)) catch unreachable;
 387                try section.ensureUnusedCapacity(module.gpa, size);
 388                section.writeOperand(spec.LiteralString, text);
 389            },
 390        }
 391    }
 392
 393    const actual_word_count = section.instructions.items.len - first_word;
 394    section.instructions.items[first_word] |= @as(u32, @as(u16, @intCast(actual_word_count))) << 16 | @intFromEnum(ass.inst.opcode);
 395
 396    if (maybe_result_id) |result| return .{ .value = result };
 397    return null;
 398}
 399
 400fn resolveMaybeForwardRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
 401    const value = ass.value_map.values()[ref];
 402    switch (value) {
 403        .just_declared => {
 404            const name = ass.value_map.keys()[ref];
 405            // TODO: Improve source location.
 406            return ass.fail(0, "ass-referential parameter %{s}", .{name});
 407        },
 408        else => return value,
 409    }
 410}
 411
 412fn resolveRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
 413    const value = try ass.resolveMaybeForwardRef(ref);
 414    switch (value) {
 415        .just_declared => unreachable,
 416        .unresolved_forward_reference => {
 417            const name = ass.value_map.keys()[ref];
 418            // TODO: Improve source location.
 419            return ass.fail(0, "reference to undeclared result-id %{s}", .{name});
 420        },
 421        else => return value,
 422    }
 423}
 424
 425fn resolveRefId(ass: *Assembler, ref: AsmValue.Ref) !Id {
 426    const value = try ass.resolveRef(ref);
 427    return value.resultId();
 428}
 429
 430fn parseInstruction(ass: *Assembler) !void {
 431    const gpa = ass.cg.module.gpa;
 432
 433    ass.inst.opcode = undefined;
 434    ass.inst.operands.clearRetainingCapacity();
 435    ass.inst.string_bytes.clearRetainingCapacity();
 436
 437    const lhs_result_tok = ass.currentToken();
 438    const maybe_lhs_result: ?AsmValue.Ref = if (ass.eatToken(.result_id_assign)) blk: {
 439        const name = ass.tokenText(lhs_result_tok)[1..];
 440        const entry = try ass.value_map.getOrPut(gpa, name);
 441        try ass.expectToken(.equals);
 442        if (!entry.found_existing) {
 443            entry.value_ptr.* = .just_declared;
 444        }
 445        break :blk @intCast(entry.index);
 446    } else null;
 447
 448    const opcode_tok = ass.currentToken();
 449    if (maybe_lhs_result != null) {
 450        try ass.expectToken(.opcode);
 451    } else if (!ass.eatToken(.opcode)) {
 452        return ass.fail(opcode_tok.start, "expected start of instruction, found {s}", .{opcode_tok.tag.name()});
 453    }
 454
 455    const opcode_text = ass.tokenText(opcode_tok);
 456    const index = ass.inst_map.getIndex(opcode_text) orelse {
 457        return ass.fail(opcode_tok.start, "invalid opcode '{s}'", .{opcode_text});
 458    };
 459
 460    const inst = spec.InstructionSet.core.instructions()[index];
 461    ass.inst.opcode = @enumFromInt(inst.opcode);
 462
 463    const expected_operands = inst.operands;
 464    // This is a loop because the result-id is not always the first operand.
 465    const requires_lhs_result = for (expected_operands) |op| {
 466        if (op.kind == .id_result) break true;
 467    } else false;
 468
 469    if (requires_lhs_result and maybe_lhs_result == null) {
 470        return ass.fail(opcode_tok.start, "opcode '{s}' expects result on left-hand side", .{@tagName(ass.inst.opcode)});
 471    } else if (!requires_lhs_result and maybe_lhs_result != null) {
 472        return ass.fail(
 473            lhs_result_tok.start,
 474            "opcode '{s}' does not expect a result-id on the left-hand side",
 475            .{@tagName(ass.inst.opcode)},
 476        );
 477    }
 478
 479    for (expected_operands) |operand| {
 480        if (operand.kind == .id_result) {
 481            try ass.inst.operands.append(gpa, .{ .result_id = maybe_lhs_result.? });
 482            continue;
 483        }
 484
 485        switch (operand.quantifier) {
 486            .required => if (ass.isAtInstructionBoundary()) {
 487                return ass.fail(
 488                    ass.currentToken().start,
 489                    "missing required operand", // TODO: Operand name?
 490                    .{},
 491                );
 492            } else {
 493                try ass.parseOperand(operand.kind);
 494            },
 495            .optional => if (!ass.isAtInstructionBoundary()) {
 496                try ass.parseOperand(operand.kind);
 497            },
 498            .variadic => while (!ass.isAtInstructionBoundary()) {
 499                try ass.parseOperand(operand.kind);
 500            },
 501        }
 502    }
 503}
 504
 505fn parseOperand(ass: *Assembler, kind: spec.OperandKind) Error!void {
 506    switch (kind.category()) {
 507        .bit_enum => try ass.parseBitEnum(kind),
 508        .value_enum => try ass.parseValueEnum(kind),
 509        .id => try ass.parseRefId(),
 510        else => switch (kind) {
 511            .literal_integer => try ass.parseLiteralInteger(),
 512            .literal_string => try ass.parseString(),
 513            .literal_context_dependent_number => try ass.parseContextDependentNumber(),
 514            .literal_ext_inst_integer => try ass.parseLiteralExtInstInteger(),
 515            .pair_id_ref_id_ref => try ass.parsePhiSource(),
 516            else => return ass.todo("parse operand of type {s}", .{@tagName(kind)}),
 517        },
 518    }
 519}
 520
 521/// Also handles parsing any required extra operands.
 522fn parseBitEnum(ass: *Assembler, kind: spec.OperandKind) !void {
 523    const gpa = ass.cg.module.gpa;
 524
 525    var tok = ass.currentToken();
 526    try ass.expectToken(.value);
 527
 528    var text = ass.tokenText(tok);
 529    if (std.mem.eql(u8, text, "None")) {
 530        try ass.inst.operands.append(gpa, .{ .value = 0 });
 531        return;
 532    }
 533
 534    const enumerants = kind.enumerants();
 535    var mask: u32 = 0;
 536    while (true) {
 537        const enumerant = for (enumerants) |enumerant| {
 538            if (std.mem.eql(u8, enumerant.name, text))
 539                break enumerant;
 540        } else {
 541            return ass.fail(tok.start, "'{s}' is not a valid flag for bitmask {s}", .{ text, @tagName(kind) });
 542        };
 543        mask |= enumerant.value;
 544        if (!ass.eatToken(.pipe))
 545            break;
 546
 547        tok = ass.currentToken();
 548        try ass.expectToken(.value);
 549        text = ass.tokenText(tok);
 550    }
 551
 552    try ass.inst.operands.append(gpa, .{ .value = mask });
 553
 554    // Assume values are sorted.
 555    // TODO: ensure in generator.
 556    for (enumerants) |enumerant| {
 557        if ((mask & enumerant.value) == 0)
 558            continue;
 559
 560        for (enumerant.parameters) |param_kind| {
 561            if (ass.isAtInstructionBoundary()) {
 562                return ass.fail(ass.currentToken().start, "missing required parameter for bit flag '{s}'", .{enumerant.name});
 563            }
 564
 565            try ass.parseOperand(param_kind);
 566        }
 567    }
 568}
 569
 570/// Also handles parsing any required extra operands.
 571fn parseValueEnum(ass: *Assembler, kind: spec.OperandKind) !void {
 572    const gpa = ass.cg.module.gpa;
 573
 574    const tok = ass.currentToken();
 575    if (ass.eatToken(.placeholder)) {
 576        const name = ass.tokenText(tok)[1..];
 577        const value = ass.value_map.get(name) orelse {
 578            return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
 579        };
 580        switch (value) {
 581            .constant => |literal32| {
 582                try ass.inst.operands.append(gpa, .{ .value = literal32 });
 583            },
 584            .string => |str| {
 585                const enumerant = for (kind.enumerants()) |enumerant| {
 586                    if (std.mem.eql(u8, enumerant.name, str)) break enumerant;
 587                } else {
 588                    return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ str, @tagName(kind) });
 589                };
 590                try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
 591            },
 592            else => return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name}),
 593        }
 594        return;
 595    }
 596
 597    try ass.expectToken(.value);
 598
 599    const text = ass.tokenText(tok);
 600    const int_value = std.fmt.parseInt(u32, text, 0) catch null;
 601    const enumerant = for (kind.enumerants()) |enumerant| {
 602        if (int_value) |v| {
 603            if (v == enumerant.value) break enumerant;
 604        } else {
 605            if (std.mem.eql(u8, enumerant.name, text)) break enumerant;
 606        }
 607    } else {
 608        return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) });
 609    };
 610
 611    try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
 612
 613    for (enumerant.parameters) |param_kind| {
 614        if (ass.isAtInstructionBoundary()) {
 615            return ass.fail(ass.currentToken().start, "missing required parameter for enum variant '{s}'", .{enumerant.name});
 616        }
 617
 618        try ass.parseOperand(param_kind);
 619    }
 620}
 621
 622fn parseRefId(ass: *Assembler) !void {
 623    const gpa = ass.cg.module.gpa;
 624
 625    const tok = ass.currentToken();
 626    try ass.expectToken(.result_id);
 627
 628    const name = ass.tokenText(tok)[1..];
 629    const entry = try ass.value_map.getOrPut(gpa, name);
 630    if (!entry.found_existing) {
 631        entry.value_ptr.* = .unresolved_forward_reference;
 632    }
 633
 634    const index: AsmValue.Ref = @intCast(entry.index);
 635    try ass.inst.operands.append(gpa, .{ .ref_id = index });
 636}
 637
 638fn parseLiteralInteger(ass: *Assembler) !void {
 639    const gpa = ass.cg.module.gpa;
 640
 641    const tok = ass.currentToken();
 642    if (ass.eatToken(.placeholder)) {
 643        const name = ass.tokenText(tok)[1..];
 644        const value = ass.value_map.get(name) orelse {
 645            return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
 646        };
 647        switch (value) {
 648            .constant => |literal32| {
 649                try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
 650            },
 651            else => {
 652                return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
 653            },
 654        }
 655        return;
 656    }
 657
 658    try ass.expectToken(.value);
 659    // According to the SPIR-V machine readable grammar, a LiteralInteger
 660    // may consist of one or more words. From the SPIR-V docs it seems like there
 661    // only one instruction where multiple words are allowed, the literals that make up the
 662    // switch cases of OpSwitch. This case is handled separately, and so we just assume
 663    // everything is a 32-bit integer in this function.
 664    const text = ass.tokenText(tok);
 665    const value = std.fmt.parseInt(u32, text, 0) catch {
 666        return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
 667    };
 668    try ass.inst.operands.append(gpa, .{ .literal32 = value });
 669}
 670
 671fn parseLiteralExtInstInteger(ass: *Assembler) !void {
 672    const gpa = ass.cg.module.gpa;
 673
 674    const tok = ass.currentToken();
 675    if (ass.eatToken(.placeholder)) {
 676        const name = ass.tokenText(tok)[1..];
 677        const value = ass.value_map.get(name) orelse {
 678            return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
 679        };
 680        switch (value) {
 681            .constant => |literal32| {
 682                try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
 683            },
 684            else => {
 685                return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
 686            },
 687        }
 688        return;
 689    }
 690
 691    try ass.expectToken(.value);
 692    const text = ass.tokenText(tok);
 693    const value = std.fmt.parseInt(u32, text, 0) catch {
 694        return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
 695    };
 696    try ass.inst.operands.append(gpa, .{ .literal32 = value });
 697}
 698
 699fn parseString(ass: *Assembler) !void {
 700    const gpa = ass.cg.module.gpa;
 701
 702    const tok = ass.currentToken();
 703    try ass.expectToken(.string);
 704    // Note, the string might not have a closing quote. In this case,
 705    // an error is already emitted but we are trying to continue processing
 706    // anyway, so in this function we have to deal with that situation.
 707    const text = ass.tokenText(tok);
 708    assert(text.len > 0 and text[0] == '"');
 709    const literal = if (text.len != 1 and text[text.len - 1] == '"')
 710        text[1 .. text.len - 1]
 711    else
 712        text[1..];
 713
 714    const string_offset: u32 = @intCast(ass.inst.string_bytes.items.len);
 715    try ass.inst.string_bytes.ensureUnusedCapacity(gpa, literal.len + 1);
 716    ass.inst.string_bytes.appendSliceAssumeCapacity(literal);
 717    ass.inst.string_bytes.appendAssumeCapacity(0);
 718
 719    try ass.inst.operands.append(gpa, .{ .string = string_offset });
 720}
 721
 722fn parseContextDependentNumber(ass: *Assembler) !void {
 723    const module = ass.cg.module;
 724
 725    // For context dependent numbers, the actual type to parse is determined by the instruction.
 726    // Currently, this operand appears in OpConstant and OpSpecConstant, where the too-be-parsed type
 727    // is determined by the result type. That means that in this instructions we have to resolve the
 728    // operand type early and look at the result to see how we need to proceed.
 729    assert(ass.inst.opcode == .OpConstant or ass.inst.opcode == .OpSpecConstant);
 730
 731    const tok = ass.currentToken();
 732    const result = try ass.resolveRef(ass.inst.operands.items[0].ref_id);
 733    const result_id = result.resultId();
 734    // We are going to cheat a little bit: The types we are interested in, int and float,
 735    // are added to the module and cached via module.intType and module.floatType. Therefore,
 736    // we can determine the width of these types by directly checking the cache.
 737    // This only works if the Assembler and codegen both use spv.intType and spv.floatType though.
 738    // We don't expect there to be many of these types, so just look it up every time.
 739    // TODO: Count be improved to be a little bit more efficent.
 740
 741    {
 742        var it = module.cache.int_types.iterator();
 743        while (it.next()) |entry| {
 744            const id = entry.value_ptr.*;
 745            if (id != result_id) continue;
 746            const info = entry.key_ptr.*;
 747            return try ass.parseContextDependentInt(info.signedness, info.bits);
 748        }
 749    }
 750
 751    {
 752        var it = module.cache.float_types.iterator();
 753        while (it.next()) |entry| {
 754            const id = entry.value_ptr.*;
 755            if (id != result_id) continue;
 756            const info = entry.key_ptr.*;
 757            switch (info.bits) {
 758                16 => try ass.parseContextDependentFloat(16),
 759                32 => try ass.parseContextDependentFloat(32),
 760                64 => try ass.parseContextDependentFloat(64),
 761                else => return ass.fail(tok.start, "cannot parse {}-bit info literal", .{info.bits}),
 762            }
 763        }
 764    }
 765
 766    return ass.fail(tok.start, "cannot parse literal constant", .{});
 767}
 768
 769fn parseContextDependentInt(ass: *Assembler, signedness: std.builtin.Signedness, width: u32) !void {
 770    const gpa = ass.cg.module.gpa;
 771
 772    const tok = ass.currentToken();
 773    if (ass.eatToken(.placeholder)) {
 774        const name = ass.tokenText(tok)[1..];
 775        const value = ass.value_map.get(name) orelse {
 776            return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
 777        };
 778        switch (value) {
 779            .constant => |literal32| {
 780                try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
 781            },
 782            else => {
 783                return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
 784            },
 785        }
 786        return;
 787    }
 788
 789    try ass.expectToken(.value);
 790
 791    if (width == 0 or width > 2 * @bitSizeOf(spec.Word)) {
 792        return ass.fail(tok.start, "cannot parse {}-bit integer literal", .{width});
 793    }
 794
 795    const text = ass.tokenText(tok);
 796    invalid: {
 797        // Just parse the integer as the next larger integer type, and check if it overflows afterwards.
 798        const int = std.fmt.parseInt(i128, text, 0) catch break :invalid;
 799        const min = switch (signedness) {
 800            .unsigned => 0,
 801            .signed => -(@as(i128, 1) << (@as(u7, @intCast(width)) - 1)),
 802        };
 803        const max = (@as(i128, 1) << (@as(u7, @intCast(width)) - @intFromBool(signedness == .signed))) - 1;
 804        if (int < min or int > max) {
 805            break :invalid;
 806        }
 807
 808        // Note, we store the sign-extended version here.
 809        if (width <= @bitSizeOf(spec.Word)) {
 810            try ass.inst.operands.append(gpa, .{ .literal32 = @truncate(@as(u128, @bitCast(int))) });
 811        } else {
 812            try ass.inst.operands.append(gpa, .{ .literal64 = @truncate(@as(u128, @bitCast(int))) });
 813        }
 814        return;
 815    }
 816
 817    return ass.fail(tok.start, "'{s}' is not a valid {s} {}-bit int literal", .{ text, @tagName(signedness), width });
 818}
 819
 820fn parseContextDependentFloat(ass: *Assembler, comptime width: u16) !void {
 821    const gpa = ass.cg.module.gpa;
 822
 823    const Float = std.meta.Float(width);
 824    const Int = std.meta.Int(.unsigned, width);
 825
 826    const tok = ass.currentToken();
 827    try ass.expectToken(.value);
 828
 829    const text = ass.tokenText(tok);
 830
 831    const value = std.fmt.parseFloat(Float, text) catch {
 832        return ass.fail(tok.start, "'{s}' is not a valid {}-bit float literal", .{ text, width });
 833    };
 834
 835    const float_bits: Int = @bitCast(value);
 836    if (width <= @bitSizeOf(spec.Word)) {
 837        try ass.inst.operands.append(gpa, .{ .literal32 = float_bits });
 838    } else {
 839        assert(width <= 2 * @bitSizeOf(spec.Word));
 840        try ass.inst.operands.append(gpa, .{ .literal64 = float_bits });
 841    }
 842}
 843
 844fn parsePhiSource(ass: *Assembler) !void {
 845    try ass.parseRefId();
 846    if (ass.isAtInstructionBoundary()) {
 847        return ass.fail(ass.currentToken().start, "missing phi block parent", .{});
 848    }
 849    try ass.parseRefId();
 850}
 851
 852/// Returns whether the `current_token` cursor
 853/// is currently pointing at the start of a new instruction.
 854fn isAtInstructionBoundary(ass: Assembler) bool {
 855    return switch (ass.currentToken().tag) {
 856        .opcode, .result_id_assign, .eof => true,
 857        else => false,
 858    };
 859}
 860
 861fn expectToken(ass: *Assembler, tag: Token.Tag) !void {
 862    if (ass.eatToken(tag))
 863        return;
 864
 865    return ass.fail(ass.currentToken().start, "unexpected {s}, expected {s}", .{
 866        ass.currentToken().tag.name(),
 867        tag.name(),
 868    });
 869}
 870
 871fn eatToken(ass: *Assembler, tag: Token.Tag) bool {
 872    if (ass.testToken(tag)) {
 873        ass.current_token += 1;
 874        return true;
 875    }
 876    return false;
 877}
 878
 879fn testToken(ass: Assembler, tag: Token.Tag) bool {
 880    return ass.currentToken().tag == tag;
 881}
 882
 883fn currentToken(ass: Assembler) Token {
 884    return ass.tokens.items[ass.current_token];
 885}
 886
 887fn tokenText(ass: Assembler, tok: Token) []const u8 {
 888    return ass.src[tok.start..tok.end];
 889}
 890
 891/// Tokenize `ass.src` and put the tokens in `ass.tokens`.
 892/// Any errors encountered are appended to `ass.errors`.
 893fn tokenize(ass: *Assembler) !void {
 894    const gpa = ass.cg.module.gpa;
 895
 896    ass.tokens.clearRetainingCapacity();
 897
 898    var offset: u32 = 0;
 899    while (true) {
 900        const tok = try ass.nextToken(offset);
 901        // Resolve result-id assignment now.
 902        // NOTE: If the previous token wasn't a result-id, just ignore it,
 903        // we will catch it while parsing.
 904        if (tok.tag == .equals and ass.tokens.items[ass.tokens.items.len - 1].tag == .result_id) {
 905            ass.tokens.items[ass.tokens.items.len - 1].tag = .result_id_assign;
 906        }
 907        try ass.tokens.append(gpa, tok);
 908        if (tok.tag == .eof)
 909            break;
 910        offset = tok.end;
 911    }
 912}
 913
 914const Token = struct {
 915    tag: Tag,
 916    start: u32,
 917    end: u32,
 918
 919    const Tag = enum {
 920        /// Returned when there was no more input to match.
 921        eof,
 922        /// %identifier
 923        result_id,
 924        /// %identifier when appearing on the LHS of an equals sign.
 925        /// While not technically a token, its relatively easy to resolve
 926        /// this during lexical analysis and relieves a bunch of headaches
 927        /// during parsing.
 928        result_id_assign,
 929        /// Mask, int, or float. These are grouped together as some
 930        /// SPIR-V enumerants look a bit like integers as well (for example
 931        /// "3D"), and so it is easier to just interpret them as the expected
 932        /// type when resolving an instruction's operands.
 933        value,
 934        /// An enumerant that looks like an opcode, that is, OpXxxx.
 935        /// Not necessarily a *valid* opcode.
 936        opcode,
 937        /// String literals.
 938        /// Note, this token is also returned for unterminated
 939        /// strings. In this case the closing " is not present.
 940        string,
 941        /// |.
 942        pipe,
 943        /// =.
 944        equals,
 945        /// $identifier. This is used (for now) for constant values, like integers.
 946        /// These can be used in place of a normal `value`.
 947        placeholder,
 948
 949        fn name(tag: Tag) []const u8 {
 950            return switch (tag) {
 951                .eof => "<end of input>",
 952                .result_id => "<result-id>",
 953                .result_id_assign => "<assigned result-id>",
 954                .value => "<value>",
 955                .opcode => "<opcode>",
 956                .string => "<string literal>",
 957                .pipe => "'|'",
 958                .equals => "'='",
 959                .placeholder => "<placeholder>",
 960            };
 961        }
 962    };
 963};
 964
 965/// Retrieve the next token from the input. This function will assert
 966/// that the token is surrounded by whitespace if required, but will not
 967/// interpret the token yet.
 968/// NOTE: This function doesn't handle .result_id_assign - this is handled in tokenize().
 969fn nextToken(ass: *Assembler, start_offset: u32) !Token {
 970    // We generally separate the input into the following types:
 971    // - Whitespace. Generally ignored, but also used as delimiter for some
 972    //   tokens.
 973    // - Values. This entails integers, floats, enums - anything that
 974    //   consists of alphanumeric characters, delimited by whitespace.
 975    // - Result-IDs. This entails anything that consists of alphanumeric characters and _, and
 976    //   starts with a %. In contrast to values, this entity can be checked for complete correctness
 977    //   relatively easily here.
 978    // - Strings. This entails quote-delimited text such as "abc".
 979    //   SPIR-V strings have only two escapes, \" and \\.
 980    // - Sigils, = and |. In this assembler, these are not required to have whitespace
 981    //   around them (they act as delimiters) as they do in SPIRV-Tools.
 982
 983    var state: enum {
 984        start,
 985        value,
 986        result_id,
 987        string,
 988        string_end,
 989        escape,
 990        placeholder,
 991    } = .start;
 992    var token_start = start_offset;
 993    var offset = start_offset;
 994    var tag = Token.Tag.eof;
 995    while (offset < ass.src.len) : (offset += 1) {
 996        const c = ass.src[offset];
 997        switch (state) {
 998            .start => switch (c) {
 999                ' ', '\t', '\r', '\n' => token_start = offset + 1,
1000                '"' => {
1001                    state = .string;
1002                    tag = .string;
1003                },
1004                '%' => {
1005                    state = .result_id;
1006                    tag = .result_id;
1007                },
1008                '|' => {
1009                    tag = .pipe;
1010                    offset += 1;
1011                    break;
1012                },
1013                '=' => {
1014                    tag = .equals;
1015                    offset += 1;
1016                    break;
1017                },
1018                '$' => {
1019                    state = .placeholder;
1020                    tag = .placeholder;
1021                },
1022                else => {
1023                    state = .value;
1024                    tag = .value;
1025                },
1026            },
1027            .value => switch (c) {
1028                '"' => {
1029                    try ass.addError(offset, "unexpected string literal", .{});
1030                    // The user most likely just forgot a delimiter here - keep
1031                    // the tag as value.
1032                    break;
1033                },
1034                ' ', '\t', '\r', '\n', '=', '|' => break,
1035                else => {},
1036            },
1037            .result_id, .placeholder => switch (c) {
1038                '_', 'a'...'z', 'A'...'Z', '0'...'9' => {},
1039                ' ', '\t', '\r', '\n', '=', '|' => break,
1040                else => {
1041                    try ass.addError(offset, "illegal character in result-id or placeholder", .{});
1042                    // Again, probably a forgotten delimiter here.
1043                    break;
1044                },
1045            },
1046            .string => switch (c) {
1047                '\\' => state = .escape,
1048                '"' => state = .string_end,
1049                else => {}, // Note, strings may include newlines
1050            },
1051            .string_end => switch (c) {
1052                ' ', '\t', '\r', '\n', '=', '|' => break,
1053                else => {
1054                    try ass.addError(offset, "unexpected character after string literal", .{});
1055                    // The token is still unmistakibly a string.
1056                    break;
1057                },
1058            },
1059            // Escapes simply skip the next char.
1060            .escape => state = .string,
1061        }
1062    }
1063
1064    var tok: Token = .{
1065        .tag = tag,
1066        .start = token_start,
1067        .end = offset,
1068    };
1069
1070    switch (state) {
1071        .string, .escape => {
1072            try ass.addError(token_start, "unterminated string", .{});
1073        },
1074        .result_id => if (offset - token_start == 1) {
1075            try ass.addError(token_start, "result-id must have at least one name character", .{});
1076        },
1077        .value => {
1078            const text = ass.tokenText(tok);
1079            const prefix = "Op";
1080            const looks_like_opcode = text.len > prefix.len and
1081                std.mem.startsWith(u8, text, prefix) and
1082                std.ascii.isUpper(text[prefix.len]);
1083            if (looks_like_opcode)
1084                tok.tag = .opcode;
1085        },
1086        else => {},
1087    }
1088
1089    return tok;
1090}