master
1const std = @import("std");
2const assert = std.debug.assert;
3const Allocator = std.mem.Allocator;
4const log = std.log.scoped(.spirv_parse);
5
6const spec = @import("../../codegen/spirv/spec.zig");
7const Opcode = spec.Opcode;
8const Word = spec.Word;
9const InstructionSet = spec.InstructionSet;
10const ResultId = spec.Id;
11
12const BinaryModule = @This();
13
14pub const header_words = 5;
15
16/// The module SPIR-V version.
17version: spec.Version,
18
19/// The generator magic number.
20generator_magic: u32,
21
22/// The result-id bound of this SPIR-V module.
23id_bound: u32,
24
25/// The instructions of this module. This does not contain the header.
26instructions: []const Word,
27
28/// Maps OpExtInstImport result-ids to their InstructionSet.
29ext_inst_map: std.AutoHashMapUnmanaged(ResultId, InstructionSet),
30
31/// This map contains the width of arithmetic types (OpTypeInt and
32/// OpTypeFloat). We need this information to correctly parse the operands
33/// of Op(Spec)Constant and OpSwitch.
34arith_type_width: std.AutoHashMapUnmanaged(ResultId, u16),
35
36/// The starting offsets of some sections
37sections: struct {
38 functions: usize,
39},
40
41pub fn deinit(self: *BinaryModule, a: Allocator) void {
42 self.ext_inst_map.deinit(a);
43 self.arith_type_width.deinit(a);
44 self.* = undefined;
45}
46
47pub fn iterateInstructions(self: BinaryModule) Instruction.Iterator {
48 return Instruction.Iterator.init(self.instructions, 0);
49}
50
51pub fn iterateInstructionsFrom(self: BinaryModule, offset: usize) Instruction.Iterator {
52 return Instruction.Iterator.init(self.instructions, offset);
53}
54
55pub fn instructionAt(self: BinaryModule, offset: usize) Instruction {
56 var it = self.iterateInstructionsFrom(offset);
57 return it.next().?;
58}
59
60pub fn finalize(self: BinaryModule, a: Allocator) ![]Word {
61 const result = try a.alloc(Word, 5 + self.instructions.len);
62 errdefer a.free(result);
63
64 result[0] = spec.magic_number;
65 result[1] = @bitCast(self.version);
66 result[2] = @bitCast(self.generator_magic);
67 result[3] = self.id_bound;
68 result[4] = 0; // Schema
69
70 @memcpy(result[5..], self.instructions);
71 return result;
72}
73
74/// Errors that can be raised when the module is not correct.
75/// Note that the parser doesn't validate SPIR-V modules by a
76/// long shot. It only yields errors that critically prevent
77/// further analysis of the module.
78pub const ParseError = error{
79 /// Raised when the module doesn't start with the SPIR-V magic.
80 /// This usually means that the module isn't actually SPIR-V.
81 InvalidMagic,
82 /// Raised when the module has an invalid "physical" format:
83 /// For example when the header is incomplete, or an instruction
84 /// has an illegal format.
85 InvalidPhysicalFormat,
86 /// OpExtInstImport was used with an unknown extension string.
87 InvalidExtInstImport,
88 /// The module had an instruction with an invalid (unknown) opcode.
89 InvalidOpcode,
90 /// An instruction's operands did not conform to the SPIR-V specification
91 /// for that instruction.
92 InvalidOperands,
93 /// A result-id was declared more than once.
94 DuplicateId,
95 /// Some ID did not resolve.
96 InvalidId,
97 /// This opcode or instruction is not supported yet.
98 UnsupportedOperation,
99 /// Parser ran out of memory.
100 OutOfMemory,
101};
102
103pub const Instruction = struct {
104 pub const Iterator = struct {
105 words: []const Word,
106 index: usize = 0,
107 offset: usize = 0,
108
109 pub fn init(words: []const Word, start_offset: usize) Iterator {
110 return .{ .words = words, .offset = start_offset };
111 }
112
113 pub fn next(self: *Iterator) ?Instruction {
114 if (self.offset >= self.words.len) return null;
115
116 const instruction_len = self.words[self.offset] >> 16;
117 defer self.offset += instruction_len;
118 defer self.index += 1;
119 assert(instruction_len != 0);
120 assert(self.offset < self.words.len);
121
122 return Instruction{
123 .opcode = @enumFromInt(self.words[self.offset] & 0xFFFF),
124 .index = self.index,
125 .offset = self.offset,
126 .operands = self.words[self.offset..][1..instruction_len],
127 };
128 }
129 };
130
131 /// The opcode for this instruction.
132 opcode: Opcode,
133 /// The instruction's index.
134 index: usize,
135 /// The instruction's word offset in the module.
136 offset: usize,
137 /// The raw (unparsed) operands for this instruction.
138 operands: []const Word,
139};
140
141/// This parser contains information (acceleration tables)
142/// that can be persisted across different modules. This is
143/// used to initialize the module, and is also used when
144/// further analyzing it.
145pub const Parser = struct {
146 /// The allocator used to allocate this parser's structures,
147 /// and also the structures of any parsed module.
148 a: Allocator,
149
150 /// Maps (instruction set, opcode) => instruction index (for instruction set)
151 opcode_table: std.AutoHashMapUnmanaged(u32, u16) = .empty,
152
153 pub fn init(a: Allocator) !Parser {
154 var self = Parser{
155 .a = a,
156 };
157 errdefer self.deinit();
158
159 inline for (std.meta.tags(InstructionSet)) |set| {
160 const instructions = set.instructions();
161 try self.opcode_table.ensureUnusedCapacity(a, @intCast(instructions.len));
162 for (instructions, 0..) |inst, i| {
163 // Note: Some instructions may alias another. In this case we don't really care
164 // which one is first: they all (should) have the same operands anyway. Just pick
165 // the first, which is usually the core, KHR or EXT variant.
166 const entry = self.opcode_table.getOrPutAssumeCapacity(mapSetAndOpcode(set, @intCast(inst.opcode)));
167 if (!entry.found_existing) {
168 entry.value_ptr.* = @intCast(i);
169 }
170 }
171 }
172
173 return self;
174 }
175
176 pub fn deinit(self: *Parser) void {
177 self.opcode_table.deinit(self.a);
178 }
179
180 fn mapSetAndOpcode(set: InstructionSet, opcode: u16) u32 {
181 return (@as(u32, @intFromEnum(set)) << 16) | opcode;
182 }
183
184 pub fn getInstSpec(self: Parser, opcode: Opcode) ?spec.Instruction {
185 const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(opcode))) orelse return null;
186 return InstructionSet.core.instructions()[index];
187 }
188
189 pub fn parse(self: *Parser, module: []const u32) ParseError!BinaryModule {
190 if (module[0] != spec.magic_number) {
191 return error.InvalidMagic;
192 } else if (module.len < header_words) {
193 log.err("module only has {}/{} header words", .{ module.len, header_words });
194 return error.InvalidPhysicalFormat;
195 }
196
197 var binary = BinaryModule{
198 .version = @bitCast(module[1]),
199 .generator_magic = @bitCast(module[2]),
200 .id_bound = module[3],
201 .instructions = module[header_words..],
202 .ext_inst_map = .{},
203 .arith_type_width = .{},
204 .sections = undefined,
205 };
206
207 var maybe_function_section: ?usize = null;
208
209 // First pass through the module to verify basic structure and
210 // to gather some initial stuff for more detailed analysis.
211 // We want to check some stuff that Instruction.Iterator is no good for,
212 // so just iterate manually.
213 var offset: usize = 0;
214 while (offset < binary.instructions.len) {
215 const len = binary.instructions[offset] >> 16;
216 if (len == 0 or len + offset > binary.instructions.len) {
217 log.err("invalid instruction format: len={}, end={}, module len={}", .{ len, len + offset, binary.instructions.len });
218 return error.InvalidPhysicalFormat;
219 }
220 defer offset += len;
221
222 // We can't really efficiently use non-exhaustive enums here, because we would
223 // need to manually write out all valid cases. Since we have this map anyway, just
224 // use that.
225 const opcode: Opcode = @enumFromInt(@as(u16, @truncate(binary.instructions[offset])));
226 const inst_spec = self.getInstSpec(opcode) orelse {
227 log.err("invalid opcode for core set: {}", .{@intFromEnum(opcode)});
228 return error.InvalidOpcode;
229 };
230
231 const operands = binary.instructions[offset..][1..len];
232 switch (opcode) {
233 .OpExtInstImport => {
234 const set_name = std.mem.sliceTo(std.mem.sliceAsBytes(operands[1..]), 0);
235 const set = std.meta.stringToEnum(InstructionSet, set_name) orelse {
236 log.err("invalid instruction set '{s}'", .{set_name});
237 return error.InvalidExtInstImport;
238 };
239 if (set == .core) return error.InvalidExtInstImport;
240 try binary.ext_inst_map.put(self.a, @enumFromInt(operands[0]), set);
241 },
242 .OpTypeInt, .OpTypeFloat => {
243 const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[0]));
244 if (entry.found_existing) return error.DuplicateId;
245 entry.value_ptr.* = std.math.cast(u16, operands[1]) orelse return error.InvalidOperands;
246 },
247 .OpFunction => if (maybe_function_section == null) {
248 maybe_function_section = offset;
249 },
250 else => {},
251 }
252
253 // OpSwitch takes a value as argument, not an OpType... hence we need to populate arith_type_width
254 // with ALL operations that return an int or float.
255 const spec_operands = inst_spec.operands;
256 if (spec_operands.len >= 2 and
257 spec_operands[0].kind == .id_result_type and
258 spec_operands[1].kind == .id_result)
259 {
260 if (operands.len < 2) return error.InvalidOperands;
261 if (binary.arith_type_width.get(@enumFromInt(operands[0]))) |width| {
262 const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[1]));
263 if (entry.found_existing) return error.DuplicateId;
264 entry.value_ptr.* = width;
265 }
266 }
267 }
268
269 binary.sections = .{
270 .functions = maybe_function_section orelse binary.instructions.len,
271 };
272
273 return binary;
274 }
275
276 /// Parse offsets in the instruction that contain result-ids.
277 /// Returned offsets are relative to inst.operands.
278 /// Returns in an arraylist to armortize allocations.
279 pub fn parseInstructionResultIds(
280 self: *Parser,
281 binary: BinaryModule,
282 inst: Instruction,
283 offsets: *std.array_list.Managed(u16),
284 ) !void {
285 const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(inst.opcode))).?;
286 const operands = InstructionSet.core.instructions()[index].operands;
287
288 var offset: usize = 0;
289 switch (inst.opcode) {
290 .OpSpecConstantOp => {
291 assert(operands[0].kind == .id_result_type);
292 assert(operands[1].kind == .id_result);
293 offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
294
295 if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
296 const spec_opcode = std.math.cast(u16, inst.operands[offset]) orelse return error.InvalidPhysicalFormat;
297 const spec_index = self.opcode_table.get(mapSetAndOpcode(.core, spec_opcode)) orelse
298 return error.InvalidPhysicalFormat;
299 const spec_operands = InstructionSet.core.instructions()[spec_index].operands;
300 assert(spec_operands[0].kind == .id_result_type);
301 assert(spec_operands[1].kind == .id_result);
302 offset = try self.parseOperandsResultIds(binary, inst, spec_operands[2..], offset + 1, offsets);
303 },
304 .OpExtInst => {
305 assert(operands[0].kind == .id_result_type);
306 assert(operands[1].kind == .id_result);
307 offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
308
309 if (offset + 1 >= inst.operands.len) return error.InvalidPhysicalFormat;
310 const set_id: ResultId = @enumFromInt(inst.operands[offset]);
311 try offsets.append(@intCast(offset));
312 const set = binary.ext_inst_map.get(set_id) orelse {
313 log.err("invalid instruction set {}", .{@intFromEnum(set_id)});
314 return error.InvalidId;
315 };
316 const ext_opcode = std.math.cast(u16, inst.operands[offset + 1]) orelse return error.InvalidPhysicalFormat;
317 const ext_index = self.opcode_table.get(mapSetAndOpcode(set, ext_opcode)) orelse
318 return error.InvalidPhysicalFormat;
319 const ext_operands = set.instructions()[ext_index].operands;
320 offset = try self.parseOperandsResultIds(binary, inst, ext_operands, offset + 2, offsets);
321 },
322 else => {
323 offset = try self.parseOperandsResultIds(binary, inst, operands, offset, offsets);
324 },
325 }
326
327 if (offset != inst.operands.len) return error.InvalidPhysicalFormat;
328 }
329
330 fn parseOperandsResultIds(
331 self: *Parser,
332 binary: BinaryModule,
333 inst: Instruction,
334 operands: []const spec.Operand,
335 start_offset: usize,
336 offsets: *std.array_list.Managed(u16),
337 ) !usize {
338 var offset = start_offset;
339 for (operands) |operand| {
340 offset = try self.parseOperandResultIds(binary, inst, operand, offset, offsets);
341 }
342 return offset;
343 }
344
345 fn parseOperandResultIds(
346 self: *Parser,
347 binary: BinaryModule,
348 inst: Instruction,
349 operand: spec.Operand,
350 start_offset: usize,
351 offsets: *std.array_list.Managed(u16),
352 ) !usize {
353 var offset = start_offset;
354 switch (operand.quantifier) {
355 .variadic => while (offset < inst.operands.len) {
356 offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
357 },
358 .optional => if (offset < inst.operands.len) {
359 offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
360 },
361 .required => {
362 offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
363 },
364 }
365 return offset;
366 }
367
368 fn parseOperandKindResultIds(
369 self: *Parser,
370 binary: BinaryModule,
371 inst: Instruction,
372 kind: spec.OperandKind,
373 start_offset: usize,
374 offsets: *std.array_list.Managed(u16),
375 ) !usize {
376 var offset = start_offset;
377 if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
378
379 switch (kind.category()) {
380 .bit_enum => {
381 const mask = inst.operands[offset];
382 offset += 1;
383 for (kind.enumerants()) |enumerant| {
384 if ((mask & enumerant.value) != 0) {
385 for (enumerant.parameters) |param_kind| {
386 offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
387 }
388 }
389 }
390 },
391 .value_enum => {
392 const value = inst.operands[offset];
393 offset += 1;
394 for (kind.enumerants()) |enumerant| {
395 if (value == enumerant.value) {
396 for (enumerant.parameters) |param_kind| {
397 offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
398 }
399 break;
400 }
401 }
402 },
403 .id => {
404 try offsets.append(@intCast(offset));
405 offset += 1;
406 },
407 else => switch (kind) {
408 .literal_integer, .literal_float => offset += 1,
409 .literal_string => while (true) {
410 if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
411 const word = inst.operands[offset];
412 offset += 1;
413
414 if (word & 0xFF000000 == 0 or
415 word & 0x00FF0000 == 0 or
416 word & 0x0000FF00 == 0 or
417 word & 0x000000FF == 0)
418 {
419 break;
420 }
421 },
422 .literal_context_dependent_number => {
423 assert(inst.opcode == .OpConstant or inst.opcode == .OpSpecConstantOp);
424 const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
425 log.err("invalid LiteralContextDependentNumber type {}", .{inst.operands[0]});
426 return error.InvalidId;
427 };
428 offset += switch (bit_width) {
429 1...32 => 1,
430 33...64 => 2,
431 else => unreachable,
432 };
433 },
434 .literal_ext_inst_integer => unreachable,
435 .literal_spec_constant_op_integer => unreachable,
436 .pair_literal_integer_id_ref => { // Switch case
437 assert(inst.opcode == .OpSwitch);
438 const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
439 log.err("invalid OpSwitch type {}", .{inst.operands[0]});
440 return error.InvalidId;
441 };
442 offset += switch (bit_width) {
443 1...32 => 1,
444 33...64 => 2,
445 else => unreachable,
446 };
447 try offsets.append(@intCast(offset));
448 offset += 1;
449 },
450 .pair_id_ref_literal_integer => {
451 try offsets.append(@intCast(offset));
452 offset += 2;
453 },
454 .pair_id_ref_id_ref => {
455 try offsets.append(@intCast(offset));
456 try offsets.append(@intCast(offset + 1));
457 offset += 2;
458 },
459 else => unreachable,
460 },
461 }
462 return offset;
463 }
464};