master
  1const std = @import("std");
  2const assert = std.debug.assert;
  3const Allocator = std.mem.Allocator;
  4const log = std.log.scoped(.spirv_parse);
  5
  6const spec = @import("../../codegen/spirv/spec.zig");
  7const Opcode = spec.Opcode;
  8const Word = spec.Word;
  9const InstructionSet = spec.InstructionSet;
 10const ResultId = spec.Id;
 11
 12const BinaryModule = @This();
 13
 14pub const header_words = 5;
 15
 16/// The module SPIR-V version.
 17version: spec.Version,
 18
 19/// The generator magic number.
 20generator_magic: u32,
 21
 22/// The result-id bound of this SPIR-V module.
 23id_bound: u32,
 24
 25/// The instructions of this module. This does not contain the header.
 26instructions: []const Word,
 27
 28/// Maps OpExtInstImport result-ids to their InstructionSet.
 29ext_inst_map: std.AutoHashMapUnmanaged(ResultId, InstructionSet),
 30
 31/// This map contains the width of arithmetic types (OpTypeInt and
 32/// OpTypeFloat). We need this information to correctly parse the operands
 33/// of Op(Spec)Constant and OpSwitch.
 34arith_type_width: std.AutoHashMapUnmanaged(ResultId, u16),
 35
 36/// The starting offsets of some sections
 37sections: struct {
 38    functions: usize,
 39},
 40
 41pub fn deinit(self: *BinaryModule, a: Allocator) void {
 42    self.ext_inst_map.deinit(a);
 43    self.arith_type_width.deinit(a);
 44    self.* = undefined;
 45}
 46
 47pub fn iterateInstructions(self: BinaryModule) Instruction.Iterator {
 48    return Instruction.Iterator.init(self.instructions, 0);
 49}
 50
 51pub fn iterateInstructionsFrom(self: BinaryModule, offset: usize) Instruction.Iterator {
 52    return Instruction.Iterator.init(self.instructions, offset);
 53}
 54
 55pub fn instructionAt(self: BinaryModule, offset: usize) Instruction {
 56    var it = self.iterateInstructionsFrom(offset);
 57    return it.next().?;
 58}
 59
 60pub fn finalize(self: BinaryModule, a: Allocator) ![]Word {
 61    const result = try a.alloc(Word, 5 + self.instructions.len);
 62    errdefer a.free(result);
 63
 64    result[0] = spec.magic_number;
 65    result[1] = @bitCast(self.version);
 66    result[2] = @bitCast(self.generator_magic);
 67    result[3] = self.id_bound;
 68    result[4] = 0; // Schema
 69
 70    @memcpy(result[5..], self.instructions);
 71    return result;
 72}
 73
 74/// Errors that can be raised when the module is not correct.
 75/// Note that the parser doesn't validate SPIR-V modules by a
 76/// long shot. It only yields errors that critically prevent
 77/// further analysis of the module.
 78pub const ParseError = error{
 79    /// Raised when the module doesn't start with the SPIR-V magic.
 80    /// This usually means that the module isn't actually SPIR-V.
 81    InvalidMagic,
 82    /// Raised when the module has an invalid "physical" format:
 83    /// For example when the header is incomplete, or an instruction
 84    /// has an illegal format.
 85    InvalidPhysicalFormat,
 86    /// OpExtInstImport was used with an unknown extension string.
 87    InvalidExtInstImport,
 88    /// The module had an instruction with an invalid (unknown) opcode.
 89    InvalidOpcode,
 90    /// An instruction's operands did not conform to the SPIR-V specification
 91    /// for that instruction.
 92    InvalidOperands,
 93    /// A result-id was declared more than once.
 94    DuplicateId,
 95    /// Some ID did not resolve.
 96    InvalidId,
 97    /// This opcode or instruction is not supported yet.
 98    UnsupportedOperation,
 99    /// Parser ran out of memory.
100    OutOfMemory,
101};
102
103pub const Instruction = struct {
104    pub const Iterator = struct {
105        words: []const Word,
106        index: usize = 0,
107        offset: usize = 0,
108
109        pub fn init(words: []const Word, start_offset: usize) Iterator {
110            return .{ .words = words, .offset = start_offset };
111        }
112
113        pub fn next(self: *Iterator) ?Instruction {
114            if (self.offset >= self.words.len) return null;
115
116            const instruction_len = self.words[self.offset] >> 16;
117            defer self.offset += instruction_len;
118            defer self.index += 1;
119            assert(instruction_len != 0);
120            assert(self.offset < self.words.len);
121
122            return Instruction{
123                .opcode = @enumFromInt(self.words[self.offset] & 0xFFFF),
124                .index = self.index,
125                .offset = self.offset,
126                .operands = self.words[self.offset..][1..instruction_len],
127            };
128        }
129    };
130
131    /// The opcode for this instruction.
132    opcode: Opcode,
133    /// The instruction's index.
134    index: usize,
135    /// The instruction's word offset in the module.
136    offset: usize,
137    /// The raw (unparsed) operands for this instruction.
138    operands: []const Word,
139};
140
141/// This parser contains information (acceleration tables)
142/// that can be persisted across different modules. This is
143/// used to initialize the module, and is also used when
144/// further analyzing it.
145pub const Parser = struct {
146    /// The allocator used to allocate this parser's structures,
147    /// and also the structures of any parsed module.
148    a: Allocator,
149
150    /// Maps (instruction set, opcode) => instruction index (for instruction set)
151    opcode_table: std.AutoHashMapUnmanaged(u32, u16) = .empty,
152
153    pub fn init(a: Allocator) !Parser {
154        var self = Parser{
155            .a = a,
156        };
157        errdefer self.deinit();
158
159        inline for (std.meta.tags(InstructionSet)) |set| {
160            const instructions = set.instructions();
161            try self.opcode_table.ensureUnusedCapacity(a, @intCast(instructions.len));
162            for (instructions, 0..) |inst, i| {
163                // Note: Some instructions may alias another. In this case we don't really care
164                // which one is first: they all (should) have the same operands anyway. Just pick
165                // the first, which is usually the core, KHR or EXT variant.
166                const entry = self.opcode_table.getOrPutAssumeCapacity(mapSetAndOpcode(set, @intCast(inst.opcode)));
167                if (!entry.found_existing) {
168                    entry.value_ptr.* = @intCast(i);
169                }
170            }
171        }
172
173        return self;
174    }
175
176    pub fn deinit(self: *Parser) void {
177        self.opcode_table.deinit(self.a);
178    }
179
180    fn mapSetAndOpcode(set: InstructionSet, opcode: u16) u32 {
181        return (@as(u32, @intFromEnum(set)) << 16) | opcode;
182    }
183
184    pub fn getInstSpec(self: Parser, opcode: Opcode) ?spec.Instruction {
185        const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(opcode))) orelse return null;
186        return InstructionSet.core.instructions()[index];
187    }
188
189    pub fn parse(self: *Parser, module: []const u32) ParseError!BinaryModule {
190        if (module[0] != spec.magic_number) {
191            return error.InvalidMagic;
192        } else if (module.len < header_words) {
193            log.err("module only has {}/{} header words", .{ module.len, header_words });
194            return error.InvalidPhysicalFormat;
195        }
196
197        var binary = BinaryModule{
198            .version = @bitCast(module[1]),
199            .generator_magic = @bitCast(module[2]),
200            .id_bound = module[3],
201            .instructions = module[header_words..],
202            .ext_inst_map = .{},
203            .arith_type_width = .{},
204            .sections = undefined,
205        };
206
207        var maybe_function_section: ?usize = null;
208
209        // First pass through the module to verify basic structure and
210        // to gather some initial stuff for more detailed analysis.
211        // We want to check some stuff that Instruction.Iterator is no good for,
212        // so just iterate manually.
213        var offset: usize = 0;
214        while (offset < binary.instructions.len) {
215            const len = binary.instructions[offset] >> 16;
216            if (len == 0 or len + offset > binary.instructions.len) {
217                log.err("invalid instruction format: len={}, end={}, module len={}", .{ len, len + offset, binary.instructions.len });
218                return error.InvalidPhysicalFormat;
219            }
220            defer offset += len;
221
222            // We can't really efficiently use non-exhaustive enums here, because we would
223            // need to manually write out all valid cases. Since we have this map anyway, just
224            // use that.
225            const opcode: Opcode = @enumFromInt(@as(u16, @truncate(binary.instructions[offset])));
226            const inst_spec = self.getInstSpec(opcode) orelse {
227                log.err("invalid opcode for core set: {}", .{@intFromEnum(opcode)});
228                return error.InvalidOpcode;
229            };
230
231            const operands = binary.instructions[offset..][1..len];
232            switch (opcode) {
233                .OpExtInstImport => {
234                    const set_name = std.mem.sliceTo(std.mem.sliceAsBytes(operands[1..]), 0);
235                    const set = std.meta.stringToEnum(InstructionSet, set_name) orelse {
236                        log.err("invalid instruction set '{s}'", .{set_name});
237                        return error.InvalidExtInstImport;
238                    };
239                    if (set == .core) return error.InvalidExtInstImport;
240                    try binary.ext_inst_map.put(self.a, @enumFromInt(operands[0]), set);
241                },
242                .OpTypeInt, .OpTypeFloat => {
243                    const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[0]));
244                    if (entry.found_existing) return error.DuplicateId;
245                    entry.value_ptr.* = std.math.cast(u16, operands[1]) orelse return error.InvalidOperands;
246                },
247                .OpFunction => if (maybe_function_section == null) {
248                    maybe_function_section = offset;
249                },
250                else => {},
251            }
252
253            // OpSwitch takes a value as argument, not an OpType... hence we need to populate arith_type_width
254            // with ALL operations that return an int or float.
255            const spec_operands = inst_spec.operands;
256            if (spec_operands.len >= 2 and
257                spec_operands[0].kind == .id_result_type and
258                spec_operands[1].kind == .id_result)
259            {
260                if (operands.len < 2) return error.InvalidOperands;
261                if (binary.arith_type_width.get(@enumFromInt(operands[0]))) |width| {
262                    const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[1]));
263                    if (entry.found_existing) return error.DuplicateId;
264                    entry.value_ptr.* = width;
265                }
266            }
267        }
268
269        binary.sections = .{
270            .functions = maybe_function_section orelse binary.instructions.len,
271        };
272
273        return binary;
274    }
275
276    /// Parse offsets in the instruction that contain result-ids.
277    /// Returned offsets are relative to inst.operands.
278    /// Returns in an arraylist to armortize allocations.
279    pub fn parseInstructionResultIds(
280        self: *Parser,
281        binary: BinaryModule,
282        inst: Instruction,
283        offsets: *std.array_list.Managed(u16),
284    ) !void {
285        const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(inst.opcode))).?;
286        const operands = InstructionSet.core.instructions()[index].operands;
287
288        var offset: usize = 0;
289        switch (inst.opcode) {
290            .OpSpecConstantOp => {
291                assert(operands[0].kind == .id_result_type);
292                assert(operands[1].kind == .id_result);
293                offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
294
295                if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
296                const spec_opcode = std.math.cast(u16, inst.operands[offset]) orelse return error.InvalidPhysicalFormat;
297                const spec_index = self.opcode_table.get(mapSetAndOpcode(.core, spec_opcode)) orelse
298                    return error.InvalidPhysicalFormat;
299                const spec_operands = InstructionSet.core.instructions()[spec_index].operands;
300                assert(spec_operands[0].kind == .id_result_type);
301                assert(spec_operands[1].kind == .id_result);
302                offset = try self.parseOperandsResultIds(binary, inst, spec_operands[2..], offset + 1, offsets);
303            },
304            .OpExtInst => {
305                assert(operands[0].kind == .id_result_type);
306                assert(operands[1].kind == .id_result);
307                offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
308
309                if (offset + 1 >= inst.operands.len) return error.InvalidPhysicalFormat;
310                const set_id: ResultId = @enumFromInt(inst.operands[offset]);
311                try offsets.append(@intCast(offset));
312                const set = binary.ext_inst_map.get(set_id) orelse {
313                    log.err("invalid instruction set {}", .{@intFromEnum(set_id)});
314                    return error.InvalidId;
315                };
316                const ext_opcode = std.math.cast(u16, inst.operands[offset + 1]) orelse return error.InvalidPhysicalFormat;
317                const ext_index = self.opcode_table.get(mapSetAndOpcode(set, ext_opcode)) orelse
318                    return error.InvalidPhysicalFormat;
319                const ext_operands = set.instructions()[ext_index].operands;
320                offset = try self.parseOperandsResultIds(binary, inst, ext_operands, offset + 2, offsets);
321            },
322            else => {
323                offset = try self.parseOperandsResultIds(binary, inst, operands, offset, offsets);
324            },
325        }
326
327        if (offset != inst.operands.len) return error.InvalidPhysicalFormat;
328    }
329
330    fn parseOperandsResultIds(
331        self: *Parser,
332        binary: BinaryModule,
333        inst: Instruction,
334        operands: []const spec.Operand,
335        start_offset: usize,
336        offsets: *std.array_list.Managed(u16),
337    ) !usize {
338        var offset = start_offset;
339        for (operands) |operand| {
340            offset = try self.parseOperandResultIds(binary, inst, operand, offset, offsets);
341        }
342        return offset;
343    }
344
345    fn parseOperandResultIds(
346        self: *Parser,
347        binary: BinaryModule,
348        inst: Instruction,
349        operand: spec.Operand,
350        start_offset: usize,
351        offsets: *std.array_list.Managed(u16),
352    ) !usize {
353        var offset = start_offset;
354        switch (operand.quantifier) {
355            .variadic => while (offset < inst.operands.len) {
356                offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
357            },
358            .optional => if (offset < inst.operands.len) {
359                offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
360            },
361            .required => {
362                offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
363            },
364        }
365        return offset;
366    }
367
368    fn parseOperandKindResultIds(
369        self: *Parser,
370        binary: BinaryModule,
371        inst: Instruction,
372        kind: spec.OperandKind,
373        start_offset: usize,
374        offsets: *std.array_list.Managed(u16),
375    ) !usize {
376        var offset = start_offset;
377        if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
378
379        switch (kind.category()) {
380            .bit_enum => {
381                const mask = inst.operands[offset];
382                offset += 1;
383                for (kind.enumerants()) |enumerant| {
384                    if ((mask & enumerant.value) != 0) {
385                        for (enumerant.parameters) |param_kind| {
386                            offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
387                        }
388                    }
389                }
390            },
391            .value_enum => {
392                const value = inst.operands[offset];
393                offset += 1;
394                for (kind.enumerants()) |enumerant| {
395                    if (value == enumerant.value) {
396                        for (enumerant.parameters) |param_kind| {
397                            offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
398                        }
399                        break;
400                    }
401                }
402            },
403            .id => {
404                try offsets.append(@intCast(offset));
405                offset += 1;
406            },
407            else => switch (kind) {
408                .literal_integer, .literal_float => offset += 1,
409                .literal_string => while (true) {
410                    if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
411                    const word = inst.operands[offset];
412                    offset += 1;
413
414                    if (word & 0xFF000000 == 0 or
415                        word & 0x00FF0000 == 0 or
416                        word & 0x0000FF00 == 0 or
417                        word & 0x000000FF == 0)
418                    {
419                        break;
420                    }
421                },
422                .literal_context_dependent_number => {
423                    assert(inst.opcode == .OpConstant or inst.opcode == .OpSpecConstantOp);
424                    const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
425                        log.err("invalid LiteralContextDependentNumber type {}", .{inst.operands[0]});
426                        return error.InvalidId;
427                    };
428                    offset += switch (bit_width) {
429                        1...32 => 1,
430                        33...64 => 2,
431                        else => unreachable,
432                    };
433                },
434                .literal_ext_inst_integer => unreachable,
435                .literal_spec_constant_op_integer => unreachable,
436                .pair_literal_integer_id_ref => { // Switch case
437                    assert(inst.opcode == .OpSwitch);
438                    const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
439                        log.err("invalid OpSwitch type {}", .{inst.operands[0]});
440                        return error.InvalidId;
441                    };
442                    offset += switch (bit_width) {
443                        1...32 => 1,
444                        33...64 => 2,
445                        else => unreachable,
446                    };
447                    try offsets.append(@intCast(offset));
448                    offset += 1;
449                },
450                .pair_id_ref_literal_integer => {
451                    try offsets.append(@intCast(offset));
452                    offset += 2;
453                },
454                .pair_id_ref_id_ref => {
455                    try offsets.append(@intCast(offset));
456                    try offsets.append(@intCast(offset + 1));
457                    offset += 2;
458                },
459                else => unreachable,
460            },
461        }
462        return offset;
463    }
464};