master
1const std = @import("std");
2const Allocator = std.mem.Allocator;
3const assert = std.debug.assert;
4
5const CodeGen = @import("CodeGen.zig");
6const Decl = @import("Module.zig").Decl;
7
8const spec = @import("spec.zig");
9const Opcode = spec.Opcode;
10const Word = spec.Word;
11const Id = spec.Id;
12const StorageClass = spec.StorageClass;
13
14const Assembler = @This();
15
16cg: *CodeGen,
17errors: std.ArrayList(ErrorMsg) = .empty,
18src: []const u8 = undefined,
19/// `ass.src` tokenized.
20tokens: std.ArrayList(Token) = .empty,
21current_token: u32 = 0,
22/// The instruction that is currently being parsed or has just been parsed.
23inst: struct {
24 opcode: Opcode = undefined,
25 operands: std.ArrayList(Operand) = .empty,
26 string_bytes: std.ArrayList(u8) = .empty,
27
28 fn result(ass: @This()) ?AsmValue.Ref {
29 for (ass.operands.items[0..@min(ass.operands.items.len, 2)]) |op| {
30 switch (op) {
31 .result_id => |index| return index,
32 else => {},
33 }
34 }
35 return null;
36 }
37} = .{},
38value_map: std.StringArrayHashMapUnmanaged(AsmValue) = .{},
39inst_map: std.StringArrayHashMapUnmanaged(void) = .empty,
40
41const Operand = union(enum) {
42 /// Any 'simple' 32-bit value. This could be a mask or
43 /// enumerant, etc, depending on the operands.
44 value: u32,
45 /// An int- or float literal encoded as 1 word.
46 literal32: u32,
47 /// An int- or float literal encoded as 2 words.
48 literal64: u64,
49 /// A result-id which is assigned to in this instruction.
50 /// If present, this is the first operand of the instruction.
51 result_id: AsmValue.Ref,
52 /// A result-id which referred to (not assigned to) in this instruction.
53 ref_id: AsmValue.Ref,
54 /// Offset into `inst.string_bytes`. The string ends at the next zero-terminator.
55 string: u32,
56};
57
58pub fn deinit(ass: *Assembler) void {
59 const gpa = ass.cg.module.gpa;
60 for (ass.errors.items) |err| gpa.free(err.msg);
61 ass.tokens.deinit(gpa);
62 ass.errors.deinit(gpa);
63 ass.inst.operands.deinit(gpa);
64 ass.inst.string_bytes.deinit(gpa);
65 ass.value_map.deinit(gpa);
66 ass.inst_map.deinit(gpa);
67}
68
69const Error = error{ AssembleFail, OutOfMemory };
70
71pub fn assemble(ass: *Assembler, src: []const u8) Error!void {
72 const gpa = ass.cg.module.gpa;
73
74 ass.src = src;
75 ass.errors.clearRetainingCapacity();
76
77 // Populate the opcode map if it isn't already
78 if (ass.inst_map.count() == 0) {
79 const instructions = spec.InstructionSet.core.instructions();
80 try ass.inst_map.ensureUnusedCapacity(gpa, @intCast(instructions.len));
81 for (spec.InstructionSet.core.instructions(), 0..) |inst, i| {
82 const entry = try ass.inst_map.getOrPut(gpa, inst.name);
83 assert(entry.index == i);
84 }
85 }
86
87 try ass.tokenize();
88 while (!ass.testToken(.eof)) {
89 try ass.parseInstruction();
90 try ass.processInstruction();
91 }
92
93 if (ass.errors.items.len > 0) return error.AssembleFail;
94}
95
96const ErrorMsg = struct {
97 /// The offset in bytes from the start of `src` that this error occured.
98 byte_offset: u32,
99 msg: []const u8,
100};
101
102fn addError(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) !void {
103 const gpa = ass.cg.module.gpa;
104 const msg = try std.fmt.allocPrint(gpa, fmt, args);
105 errdefer gpa.free(msg);
106 try ass.errors.append(gpa, .{
107 .byte_offset = offset,
108 .msg = msg,
109 });
110}
111
112fn fail(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) Error {
113 try ass.addError(offset, fmt, args);
114 return error.AssembleFail;
115}
116
117fn todo(ass: *Assembler, comptime fmt: []const u8, args: anytype) Error {
118 return ass.fail(0, "todo: " ++ fmt, args);
119}
120
121const AsmValue = union(enum) {
122 /// The results are stored in an array hash map, and can be referred
123 /// to either by name (without the %), or by values of this index type.
124 pub const Ref = u32;
125
126 /// The RHS of the current instruction.
127 just_declared,
128 /// A placeholder for ref-ids of which the result-id is not yet known.
129 /// It will be further resolved at a later stage to a more concrete forward reference.
130 unresolved_forward_reference,
131 /// A normal result produced by a different instruction.
132 value: Id,
133 /// A type registered into the module's type system.
134 ty: Id,
135 /// A pre-supplied constant integer value.
136 constant: u32,
137 string: []const u8,
138
139 /// Retrieve the result-id of this AsmValue. Asserts that this AsmValue
140 /// is of a variant that allows the result to be obtained (not an unresolved
141 /// forward declaration, not in the process of being declared, etc).
142 pub fn resultId(value: AsmValue) Id {
143 return switch (value) {
144 .just_declared,
145 .unresolved_forward_reference,
146 // TODO: Lower this value as constant?
147 .constant,
148 .string,
149 => unreachable,
150 .value => |result| result,
151 .ty => |result| result,
152 };
153 }
154};
155
156/// Attempt to process the instruction currently in `ass.inst`.
157/// This for example emits the instruction in the module or function, or
158/// records type definitions.
159/// If this function returns `error.AssembleFail`, an explanatory
160/// error message has already been emitted into `ass.errors`.
161fn processInstruction(ass: *Assembler) !void {
162 const module = ass.cg.module;
163 const result: AsmValue = switch (ass.inst.opcode) {
164 .OpEntryPoint => {
165 return ass.fail(ass.currentToken().start, "cannot export entry points in assembly", .{});
166 },
167 .OpExecutionMode, .OpExecutionModeId => {
168 return ass.fail(ass.currentToken().start, "cannot set execution mode in assembly", .{});
169 },
170 .OpCapability => {
171 try module.addCapability(@enumFromInt(ass.inst.operands.items[0].value));
172 return;
173 },
174 .OpExtension => {
175 const ext_name_offset = ass.inst.operands.items[0].string;
176 const ext_name = std.mem.sliceTo(ass.inst.string_bytes.items[ext_name_offset..], 0);
177 try module.addExtension(ext_name);
178 return;
179 },
180 .OpExtInstImport => blk: {
181 const set_name_offset = ass.inst.operands.items[1].string;
182 const set_name = std.mem.sliceTo(ass.inst.string_bytes.items[set_name_offset..], 0);
183 const set_tag = std.meta.stringToEnum(spec.InstructionSet, set_name) orelse {
184 return ass.fail(set_name_offset, "unknown instruction set: {s}", .{set_name});
185 };
186 break :blk .{ .value = try module.importInstructionSet(set_tag) };
187 },
188 else => switch (ass.inst.opcode.class()) {
189 .type_declaration => try ass.processTypeInstruction(),
190 else => (try ass.processGenericInstruction()) orelse return,
191 },
192 };
193
194 const result_ref = ass.inst.result().?;
195 switch (ass.value_map.values()[result_ref]) {
196 .just_declared => ass.value_map.values()[result_ref] = result,
197 else => {
198 // TODO: Improve source location.
199 const name = ass.value_map.keys()[result_ref];
200 return ass.fail(0, "duplicate definition of %{s}", .{name});
201 },
202 }
203}
204
205fn processTypeInstruction(ass: *Assembler) !AsmValue {
206 const cg = ass.cg;
207 const gpa = cg.module.gpa;
208 const module = cg.module;
209 const operands = ass.inst.operands.items;
210 const section = &module.sections.globals;
211 const id = switch (ass.inst.opcode) {
212 .OpTypeVoid => try module.voidType(),
213 .OpTypeBool => try module.boolType(),
214 .OpTypeInt => blk: {
215 const signedness: std.builtin.Signedness = switch (operands[2].literal32) {
216 0 => .unsigned,
217 1 => .signed,
218 else => {
219 // TODO: Improve source location.
220 return ass.fail(0, "{} is not a valid signedness (expected 0 or 1)", .{operands[2].literal32});
221 },
222 };
223 const width = std.math.cast(u16, operands[1].literal32) orelse {
224 return ass.fail(0, "int type of {} bits is too large", .{operands[1].literal32});
225 };
226 break :blk try module.intType(signedness, width);
227 },
228 .OpTypeFloat => blk: {
229 const bits = operands[1].literal32;
230 switch (bits) {
231 16, 32, 64 => {},
232 else => {
233 return ass.fail(0, "{} is not a valid bit count for floats (expected 16, 32 or 64)", .{bits});
234 },
235 }
236 break :blk try module.floatType(@intCast(bits));
237 },
238 .OpTypeVector => blk: {
239 const child_type = try ass.resolveRefId(operands[1].ref_id);
240 break :blk try module.vectorType(operands[2].literal32, child_type);
241 },
242 .OpTypeArray => {
243 // TODO: The length of an OpTypeArray is determined by a constant (which may be a spec constant),
244 // and so some consideration must be taken when entering this in the type system.
245 return ass.todo("process OpTypeArray", .{});
246 },
247 .OpTypeRuntimeArray => blk: {
248 const element_type = try ass.resolveRefId(operands[1].ref_id);
249 const result_id = module.allocId();
250 try section.emit(module.gpa, .OpTypeRuntimeArray, .{
251 .id_result = result_id,
252 .element_type = element_type,
253 });
254 break :blk result_id;
255 },
256 .OpTypePointer => blk: {
257 const storage_class: StorageClass = @enumFromInt(operands[1].value);
258 const child_type = try ass.resolveRefId(operands[2].ref_id);
259 const result_id = module.allocId();
260 try section.emit(module.gpa, .OpTypePointer, .{
261 .id_result = result_id,
262 .storage_class = storage_class,
263 .type = child_type,
264 });
265 break :blk result_id;
266 },
267 .OpTypeStruct => blk: {
268 const scratch_top = cg.id_scratch.items.len;
269 defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
270 const ids = try cg.id_scratch.addManyAsSlice(gpa, operands[1..].len);
271 for (operands[1..], ids) |op, *id| id.* = try ass.resolveRefId(op.ref_id);
272 break :blk try module.structType(ids, null, null, .none);
273 },
274 .OpTypeImage => blk: {
275 const sampled_type = try ass.resolveRefId(operands[1].ref_id);
276 const result_id = module.allocId();
277 try section.emit(gpa, .OpTypeImage, .{
278 .id_result = result_id,
279 .sampled_type = sampled_type,
280 .dim = @enumFromInt(operands[2].value),
281 .depth = operands[3].literal32,
282 .arrayed = operands[4].literal32,
283 .ms = operands[5].literal32,
284 .sampled = operands[6].literal32,
285 .image_format = @enumFromInt(operands[7].value),
286 });
287 break :blk result_id;
288 },
289 .OpTypeSampler => blk: {
290 const result_id = module.allocId();
291 try section.emit(gpa, .OpTypeSampler, .{ .id_result = result_id });
292 break :blk result_id;
293 },
294 .OpTypeSampledImage => blk: {
295 const image_type = try ass.resolveRefId(operands[1].ref_id);
296 const result_id = module.allocId();
297 try section.emit(gpa, .OpTypeSampledImage, .{ .id_result = result_id, .image_type = image_type });
298 break :blk result_id;
299 },
300 .OpTypeFunction => blk: {
301 const param_operands = operands[2..];
302 const return_type = try ass.resolveRefId(operands[1].ref_id);
303
304 const scratch_top = cg.id_scratch.items.len;
305 defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
306 const param_types = try cg.id_scratch.addManyAsSlice(gpa, param_operands.len);
307
308 for (param_types, param_operands) |*param, operand| {
309 param.* = try ass.resolveRefId(operand.ref_id);
310 }
311 const result_id = module.allocId();
312 try section.emit(module.gpa, .OpTypeFunction, .{
313 .id_result = result_id,
314 .return_type = return_type,
315 .id_ref_2 = param_types,
316 });
317 break :blk result_id;
318 },
319 else => return ass.todo("process type instruction {s}", .{@tagName(ass.inst.opcode)}),
320 };
321
322 return .{ .ty = id };
323}
324
325/// - No forward references are allowed in operands.
326/// - Target section is determined from instruction type.
327fn processGenericInstruction(ass: *Assembler) !?AsmValue {
328 const module = ass.cg.module;
329 const target = module.zcu.getTarget();
330 const operands = ass.inst.operands.items;
331 var maybe_spv_decl_index: ?Decl.Index = null;
332 const section = switch (ass.inst.opcode.class()) {
333 .constant_creation => &module.sections.globals,
334 .annotation => &module.sections.annotations,
335 .type_declaration => unreachable, // Handled elsewhere.
336 else => switch (ass.inst.opcode) {
337 .OpEntryPoint => unreachable,
338 .OpExecutionMode, .OpExecutionModeId => &module.sections.execution_modes,
339 .OpVariable => section: {
340 const storage_class: spec.StorageClass = @enumFromInt(operands[2].value);
341 if (storage_class == .function) break :section &ass.cg.prologue;
342 maybe_spv_decl_index = try module.allocDecl(.global);
343 if (!target.cpu.has(.spirv, .v1_4) and storage_class != .input and storage_class != .output) {
344 // Before version 1.4, the interface’s storage classes are limited to the Input and Output
345 break :section &module.sections.globals;
346 }
347 try ass.cg.module.decl_deps.append(module.gpa, maybe_spv_decl_index.?);
348 break :section &module.sections.globals;
349 },
350 else => &ass.cg.body,
351 },
352 };
353
354 var maybe_result_id: ?Id = null;
355 const first_word = section.instructions.items.len;
356 // At this point we're not quite sure how many operands this instruction is
357 // going to have, so insert 0 and patch up the actual opcode word later.
358 try section.ensureUnusedCapacity(module.gpa, 1);
359 section.writeWord(0);
360
361 for (operands) |operand| {
362 switch (operand) {
363 .value, .literal32 => |word| {
364 try section.ensureUnusedCapacity(module.gpa, 1);
365 section.writeWord(word);
366 },
367 .literal64 => |dword| {
368 try section.ensureUnusedCapacity(module.gpa, 2);
369 section.writeDoubleWord(dword);
370 },
371 .result_id => {
372 maybe_result_id = if (maybe_spv_decl_index) |spv_decl_index|
373 module.declPtr(spv_decl_index).result_id
374 else
375 module.allocId();
376 try section.ensureUnusedCapacity(module.gpa, 1);
377 section.writeOperand(Id, maybe_result_id.?);
378 },
379 .ref_id => |index| {
380 const result = try ass.resolveRef(index);
381 try section.ensureUnusedCapacity(module.gpa, 1);
382 section.writeOperand(spec.Id, result.resultId());
383 },
384 .string => |offset| {
385 const text = std.mem.sliceTo(ass.inst.string_bytes.items[offset..], 0);
386 const size = std.math.divCeil(usize, text.len + 1, @sizeOf(Word)) catch unreachable;
387 try section.ensureUnusedCapacity(module.gpa, size);
388 section.writeOperand(spec.LiteralString, text);
389 },
390 }
391 }
392
393 const actual_word_count = section.instructions.items.len - first_word;
394 section.instructions.items[first_word] |= @as(u32, @as(u16, @intCast(actual_word_count))) << 16 | @intFromEnum(ass.inst.opcode);
395
396 if (maybe_result_id) |result| return .{ .value = result };
397 return null;
398}
399
400fn resolveMaybeForwardRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
401 const value = ass.value_map.values()[ref];
402 switch (value) {
403 .just_declared => {
404 const name = ass.value_map.keys()[ref];
405 // TODO: Improve source location.
406 return ass.fail(0, "ass-referential parameter %{s}", .{name});
407 },
408 else => return value,
409 }
410}
411
412fn resolveRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
413 const value = try ass.resolveMaybeForwardRef(ref);
414 switch (value) {
415 .just_declared => unreachable,
416 .unresolved_forward_reference => {
417 const name = ass.value_map.keys()[ref];
418 // TODO: Improve source location.
419 return ass.fail(0, "reference to undeclared result-id %{s}", .{name});
420 },
421 else => return value,
422 }
423}
424
425fn resolveRefId(ass: *Assembler, ref: AsmValue.Ref) !Id {
426 const value = try ass.resolveRef(ref);
427 return value.resultId();
428}
429
430fn parseInstruction(ass: *Assembler) !void {
431 const gpa = ass.cg.module.gpa;
432
433 ass.inst.opcode = undefined;
434 ass.inst.operands.clearRetainingCapacity();
435 ass.inst.string_bytes.clearRetainingCapacity();
436
437 const lhs_result_tok = ass.currentToken();
438 const maybe_lhs_result: ?AsmValue.Ref = if (ass.eatToken(.result_id_assign)) blk: {
439 const name = ass.tokenText(lhs_result_tok)[1..];
440 const entry = try ass.value_map.getOrPut(gpa, name);
441 try ass.expectToken(.equals);
442 if (!entry.found_existing) {
443 entry.value_ptr.* = .just_declared;
444 }
445 break :blk @intCast(entry.index);
446 } else null;
447
448 const opcode_tok = ass.currentToken();
449 if (maybe_lhs_result != null) {
450 try ass.expectToken(.opcode);
451 } else if (!ass.eatToken(.opcode)) {
452 return ass.fail(opcode_tok.start, "expected start of instruction, found {s}", .{opcode_tok.tag.name()});
453 }
454
455 const opcode_text = ass.tokenText(opcode_tok);
456 const index = ass.inst_map.getIndex(opcode_text) orelse {
457 return ass.fail(opcode_tok.start, "invalid opcode '{s}'", .{opcode_text});
458 };
459
460 const inst = spec.InstructionSet.core.instructions()[index];
461 ass.inst.opcode = @enumFromInt(inst.opcode);
462
463 const expected_operands = inst.operands;
464 // This is a loop because the result-id is not always the first operand.
465 const requires_lhs_result = for (expected_operands) |op| {
466 if (op.kind == .id_result) break true;
467 } else false;
468
469 if (requires_lhs_result and maybe_lhs_result == null) {
470 return ass.fail(opcode_tok.start, "opcode '{s}' expects result on left-hand side", .{@tagName(ass.inst.opcode)});
471 } else if (!requires_lhs_result and maybe_lhs_result != null) {
472 return ass.fail(
473 lhs_result_tok.start,
474 "opcode '{s}' does not expect a result-id on the left-hand side",
475 .{@tagName(ass.inst.opcode)},
476 );
477 }
478
479 for (expected_operands) |operand| {
480 if (operand.kind == .id_result) {
481 try ass.inst.operands.append(gpa, .{ .result_id = maybe_lhs_result.? });
482 continue;
483 }
484
485 switch (operand.quantifier) {
486 .required => if (ass.isAtInstructionBoundary()) {
487 return ass.fail(
488 ass.currentToken().start,
489 "missing required operand", // TODO: Operand name?
490 .{},
491 );
492 } else {
493 try ass.parseOperand(operand.kind);
494 },
495 .optional => if (!ass.isAtInstructionBoundary()) {
496 try ass.parseOperand(operand.kind);
497 },
498 .variadic => while (!ass.isAtInstructionBoundary()) {
499 try ass.parseOperand(operand.kind);
500 },
501 }
502 }
503}
504
505fn parseOperand(ass: *Assembler, kind: spec.OperandKind) Error!void {
506 switch (kind.category()) {
507 .bit_enum => try ass.parseBitEnum(kind),
508 .value_enum => try ass.parseValueEnum(kind),
509 .id => try ass.parseRefId(),
510 else => switch (kind) {
511 .literal_integer => try ass.parseLiteralInteger(),
512 .literal_string => try ass.parseString(),
513 .literal_context_dependent_number => try ass.parseContextDependentNumber(),
514 .literal_ext_inst_integer => try ass.parseLiteralExtInstInteger(),
515 .pair_id_ref_id_ref => try ass.parsePhiSource(),
516 else => return ass.todo("parse operand of type {s}", .{@tagName(kind)}),
517 },
518 }
519}
520
521/// Also handles parsing any required extra operands.
522fn parseBitEnum(ass: *Assembler, kind: spec.OperandKind) !void {
523 const gpa = ass.cg.module.gpa;
524
525 var tok = ass.currentToken();
526 try ass.expectToken(.value);
527
528 var text = ass.tokenText(tok);
529 if (std.mem.eql(u8, text, "None")) {
530 try ass.inst.operands.append(gpa, .{ .value = 0 });
531 return;
532 }
533
534 const enumerants = kind.enumerants();
535 var mask: u32 = 0;
536 while (true) {
537 const enumerant = for (enumerants) |enumerant| {
538 if (std.mem.eql(u8, enumerant.name, text))
539 break enumerant;
540 } else {
541 return ass.fail(tok.start, "'{s}' is not a valid flag for bitmask {s}", .{ text, @tagName(kind) });
542 };
543 mask |= enumerant.value;
544 if (!ass.eatToken(.pipe))
545 break;
546
547 tok = ass.currentToken();
548 try ass.expectToken(.value);
549 text = ass.tokenText(tok);
550 }
551
552 try ass.inst.operands.append(gpa, .{ .value = mask });
553
554 // Assume values are sorted.
555 // TODO: ensure in generator.
556 for (enumerants) |enumerant| {
557 if ((mask & enumerant.value) == 0)
558 continue;
559
560 for (enumerant.parameters) |param_kind| {
561 if (ass.isAtInstructionBoundary()) {
562 return ass.fail(ass.currentToken().start, "missing required parameter for bit flag '{s}'", .{enumerant.name});
563 }
564
565 try ass.parseOperand(param_kind);
566 }
567 }
568}
569
570/// Also handles parsing any required extra operands.
571fn parseValueEnum(ass: *Assembler, kind: spec.OperandKind) !void {
572 const gpa = ass.cg.module.gpa;
573
574 const tok = ass.currentToken();
575 if (ass.eatToken(.placeholder)) {
576 const name = ass.tokenText(tok)[1..];
577 const value = ass.value_map.get(name) orelse {
578 return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
579 };
580 switch (value) {
581 .constant => |literal32| {
582 try ass.inst.operands.append(gpa, .{ .value = literal32 });
583 },
584 .string => |str| {
585 const enumerant = for (kind.enumerants()) |enumerant| {
586 if (std.mem.eql(u8, enumerant.name, str)) break enumerant;
587 } else {
588 return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ str, @tagName(kind) });
589 };
590 try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
591 },
592 else => return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name}),
593 }
594 return;
595 }
596
597 try ass.expectToken(.value);
598
599 const text = ass.tokenText(tok);
600 const int_value = std.fmt.parseInt(u32, text, 0) catch null;
601 const enumerant = for (kind.enumerants()) |enumerant| {
602 if (int_value) |v| {
603 if (v == enumerant.value) break enumerant;
604 } else {
605 if (std.mem.eql(u8, enumerant.name, text)) break enumerant;
606 }
607 } else {
608 return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) });
609 };
610
611 try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
612
613 for (enumerant.parameters) |param_kind| {
614 if (ass.isAtInstructionBoundary()) {
615 return ass.fail(ass.currentToken().start, "missing required parameter for enum variant '{s}'", .{enumerant.name});
616 }
617
618 try ass.parseOperand(param_kind);
619 }
620}
621
622fn parseRefId(ass: *Assembler) !void {
623 const gpa = ass.cg.module.gpa;
624
625 const tok = ass.currentToken();
626 try ass.expectToken(.result_id);
627
628 const name = ass.tokenText(tok)[1..];
629 const entry = try ass.value_map.getOrPut(gpa, name);
630 if (!entry.found_existing) {
631 entry.value_ptr.* = .unresolved_forward_reference;
632 }
633
634 const index: AsmValue.Ref = @intCast(entry.index);
635 try ass.inst.operands.append(gpa, .{ .ref_id = index });
636}
637
638fn parseLiteralInteger(ass: *Assembler) !void {
639 const gpa = ass.cg.module.gpa;
640
641 const tok = ass.currentToken();
642 if (ass.eatToken(.placeholder)) {
643 const name = ass.tokenText(tok)[1..];
644 const value = ass.value_map.get(name) orelse {
645 return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
646 };
647 switch (value) {
648 .constant => |literal32| {
649 try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
650 },
651 else => {
652 return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
653 },
654 }
655 return;
656 }
657
658 try ass.expectToken(.value);
659 // According to the SPIR-V machine readable grammar, a LiteralInteger
660 // may consist of one or more words. From the SPIR-V docs it seems like there
661 // only one instruction where multiple words are allowed, the literals that make up the
662 // switch cases of OpSwitch. This case is handled separately, and so we just assume
663 // everything is a 32-bit integer in this function.
664 const text = ass.tokenText(tok);
665 const value = std.fmt.parseInt(u32, text, 0) catch {
666 return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
667 };
668 try ass.inst.operands.append(gpa, .{ .literal32 = value });
669}
670
671fn parseLiteralExtInstInteger(ass: *Assembler) !void {
672 const gpa = ass.cg.module.gpa;
673
674 const tok = ass.currentToken();
675 if (ass.eatToken(.placeholder)) {
676 const name = ass.tokenText(tok)[1..];
677 const value = ass.value_map.get(name) orelse {
678 return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
679 };
680 switch (value) {
681 .constant => |literal32| {
682 try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
683 },
684 else => {
685 return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
686 },
687 }
688 return;
689 }
690
691 try ass.expectToken(.value);
692 const text = ass.tokenText(tok);
693 const value = std.fmt.parseInt(u32, text, 0) catch {
694 return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
695 };
696 try ass.inst.operands.append(gpa, .{ .literal32 = value });
697}
698
699fn parseString(ass: *Assembler) !void {
700 const gpa = ass.cg.module.gpa;
701
702 const tok = ass.currentToken();
703 try ass.expectToken(.string);
704 // Note, the string might not have a closing quote. In this case,
705 // an error is already emitted but we are trying to continue processing
706 // anyway, so in this function we have to deal with that situation.
707 const text = ass.tokenText(tok);
708 assert(text.len > 0 and text[0] == '"');
709 const literal = if (text.len != 1 and text[text.len - 1] == '"')
710 text[1 .. text.len - 1]
711 else
712 text[1..];
713
714 const string_offset: u32 = @intCast(ass.inst.string_bytes.items.len);
715 try ass.inst.string_bytes.ensureUnusedCapacity(gpa, literal.len + 1);
716 ass.inst.string_bytes.appendSliceAssumeCapacity(literal);
717 ass.inst.string_bytes.appendAssumeCapacity(0);
718
719 try ass.inst.operands.append(gpa, .{ .string = string_offset });
720}
721
722fn parseContextDependentNumber(ass: *Assembler) !void {
723 const module = ass.cg.module;
724
725 // For context dependent numbers, the actual type to parse is determined by the instruction.
726 // Currently, this operand appears in OpConstant and OpSpecConstant, where the too-be-parsed type
727 // is determined by the result type. That means that in this instructions we have to resolve the
728 // operand type early and look at the result to see how we need to proceed.
729 assert(ass.inst.opcode == .OpConstant or ass.inst.opcode == .OpSpecConstant);
730
731 const tok = ass.currentToken();
732 const result = try ass.resolveRef(ass.inst.operands.items[0].ref_id);
733 const result_id = result.resultId();
734 // We are going to cheat a little bit: The types we are interested in, int and float,
735 // are added to the module and cached via module.intType and module.floatType. Therefore,
736 // we can determine the width of these types by directly checking the cache.
737 // This only works if the Assembler and codegen both use spv.intType and spv.floatType though.
738 // We don't expect there to be many of these types, so just look it up every time.
739 // TODO: Count be improved to be a little bit more efficent.
740
741 {
742 var it = module.cache.int_types.iterator();
743 while (it.next()) |entry| {
744 const id = entry.value_ptr.*;
745 if (id != result_id) continue;
746 const info = entry.key_ptr.*;
747 return try ass.parseContextDependentInt(info.signedness, info.bits);
748 }
749 }
750
751 {
752 var it = module.cache.float_types.iterator();
753 while (it.next()) |entry| {
754 const id = entry.value_ptr.*;
755 if (id != result_id) continue;
756 const info = entry.key_ptr.*;
757 switch (info.bits) {
758 16 => try ass.parseContextDependentFloat(16),
759 32 => try ass.parseContextDependentFloat(32),
760 64 => try ass.parseContextDependentFloat(64),
761 else => return ass.fail(tok.start, "cannot parse {}-bit info literal", .{info.bits}),
762 }
763 }
764 }
765
766 return ass.fail(tok.start, "cannot parse literal constant", .{});
767}
768
769fn parseContextDependentInt(ass: *Assembler, signedness: std.builtin.Signedness, width: u32) !void {
770 const gpa = ass.cg.module.gpa;
771
772 const tok = ass.currentToken();
773 if (ass.eatToken(.placeholder)) {
774 const name = ass.tokenText(tok)[1..];
775 const value = ass.value_map.get(name) orelse {
776 return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
777 };
778 switch (value) {
779 .constant => |literal32| {
780 try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
781 },
782 else => {
783 return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
784 },
785 }
786 return;
787 }
788
789 try ass.expectToken(.value);
790
791 if (width == 0 or width > 2 * @bitSizeOf(spec.Word)) {
792 return ass.fail(tok.start, "cannot parse {}-bit integer literal", .{width});
793 }
794
795 const text = ass.tokenText(tok);
796 invalid: {
797 // Just parse the integer as the next larger integer type, and check if it overflows afterwards.
798 const int = std.fmt.parseInt(i128, text, 0) catch break :invalid;
799 const min = switch (signedness) {
800 .unsigned => 0,
801 .signed => -(@as(i128, 1) << (@as(u7, @intCast(width)) - 1)),
802 };
803 const max = (@as(i128, 1) << (@as(u7, @intCast(width)) - @intFromBool(signedness == .signed))) - 1;
804 if (int < min or int > max) {
805 break :invalid;
806 }
807
808 // Note, we store the sign-extended version here.
809 if (width <= @bitSizeOf(spec.Word)) {
810 try ass.inst.operands.append(gpa, .{ .literal32 = @truncate(@as(u128, @bitCast(int))) });
811 } else {
812 try ass.inst.operands.append(gpa, .{ .literal64 = @truncate(@as(u128, @bitCast(int))) });
813 }
814 return;
815 }
816
817 return ass.fail(tok.start, "'{s}' is not a valid {s} {}-bit int literal", .{ text, @tagName(signedness), width });
818}
819
820fn parseContextDependentFloat(ass: *Assembler, comptime width: u16) !void {
821 const gpa = ass.cg.module.gpa;
822
823 const Float = std.meta.Float(width);
824 const Int = std.meta.Int(.unsigned, width);
825
826 const tok = ass.currentToken();
827 try ass.expectToken(.value);
828
829 const text = ass.tokenText(tok);
830
831 const value = std.fmt.parseFloat(Float, text) catch {
832 return ass.fail(tok.start, "'{s}' is not a valid {}-bit float literal", .{ text, width });
833 };
834
835 const float_bits: Int = @bitCast(value);
836 if (width <= @bitSizeOf(spec.Word)) {
837 try ass.inst.operands.append(gpa, .{ .literal32 = float_bits });
838 } else {
839 assert(width <= 2 * @bitSizeOf(spec.Word));
840 try ass.inst.operands.append(gpa, .{ .literal64 = float_bits });
841 }
842}
843
844fn parsePhiSource(ass: *Assembler) !void {
845 try ass.parseRefId();
846 if (ass.isAtInstructionBoundary()) {
847 return ass.fail(ass.currentToken().start, "missing phi block parent", .{});
848 }
849 try ass.parseRefId();
850}
851
852/// Returns whether the `current_token` cursor
853/// is currently pointing at the start of a new instruction.
854fn isAtInstructionBoundary(ass: Assembler) bool {
855 return switch (ass.currentToken().tag) {
856 .opcode, .result_id_assign, .eof => true,
857 else => false,
858 };
859}
860
861fn expectToken(ass: *Assembler, tag: Token.Tag) !void {
862 if (ass.eatToken(tag))
863 return;
864
865 return ass.fail(ass.currentToken().start, "unexpected {s}, expected {s}", .{
866 ass.currentToken().tag.name(),
867 tag.name(),
868 });
869}
870
871fn eatToken(ass: *Assembler, tag: Token.Tag) bool {
872 if (ass.testToken(tag)) {
873 ass.current_token += 1;
874 return true;
875 }
876 return false;
877}
878
879fn testToken(ass: Assembler, tag: Token.Tag) bool {
880 return ass.currentToken().tag == tag;
881}
882
883fn currentToken(ass: Assembler) Token {
884 return ass.tokens.items[ass.current_token];
885}
886
887fn tokenText(ass: Assembler, tok: Token) []const u8 {
888 return ass.src[tok.start..tok.end];
889}
890
891/// Tokenize `ass.src` and put the tokens in `ass.tokens`.
892/// Any errors encountered are appended to `ass.errors`.
893fn tokenize(ass: *Assembler) !void {
894 const gpa = ass.cg.module.gpa;
895
896 ass.tokens.clearRetainingCapacity();
897
898 var offset: u32 = 0;
899 while (true) {
900 const tok = try ass.nextToken(offset);
901 // Resolve result-id assignment now.
902 // NOTE: If the previous token wasn't a result-id, just ignore it,
903 // we will catch it while parsing.
904 if (tok.tag == .equals and ass.tokens.items[ass.tokens.items.len - 1].tag == .result_id) {
905 ass.tokens.items[ass.tokens.items.len - 1].tag = .result_id_assign;
906 }
907 try ass.tokens.append(gpa, tok);
908 if (tok.tag == .eof)
909 break;
910 offset = tok.end;
911 }
912}
913
914const Token = struct {
915 tag: Tag,
916 start: u32,
917 end: u32,
918
919 const Tag = enum {
920 /// Returned when there was no more input to match.
921 eof,
922 /// %identifier
923 result_id,
924 /// %identifier when appearing on the LHS of an equals sign.
925 /// While not technically a token, its relatively easy to resolve
926 /// this during lexical analysis and relieves a bunch of headaches
927 /// during parsing.
928 result_id_assign,
929 /// Mask, int, or float. These are grouped together as some
930 /// SPIR-V enumerants look a bit like integers as well (for example
931 /// "3D"), and so it is easier to just interpret them as the expected
932 /// type when resolving an instruction's operands.
933 value,
934 /// An enumerant that looks like an opcode, that is, OpXxxx.
935 /// Not necessarily a *valid* opcode.
936 opcode,
937 /// String literals.
938 /// Note, this token is also returned for unterminated
939 /// strings. In this case the closing " is not present.
940 string,
941 /// |.
942 pipe,
943 /// =.
944 equals,
945 /// $identifier. This is used (for now) for constant values, like integers.
946 /// These can be used in place of a normal `value`.
947 placeholder,
948
949 fn name(tag: Tag) []const u8 {
950 return switch (tag) {
951 .eof => "<end of input>",
952 .result_id => "<result-id>",
953 .result_id_assign => "<assigned result-id>",
954 .value => "<value>",
955 .opcode => "<opcode>",
956 .string => "<string literal>",
957 .pipe => "'|'",
958 .equals => "'='",
959 .placeholder => "<placeholder>",
960 };
961 }
962 };
963};
964
965/// Retrieve the next token from the input. This function will assert
966/// that the token is surrounded by whitespace if required, but will not
967/// interpret the token yet.
968/// NOTE: This function doesn't handle .result_id_assign - this is handled in tokenize().
969fn nextToken(ass: *Assembler, start_offset: u32) !Token {
970 // We generally separate the input into the following types:
971 // - Whitespace. Generally ignored, but also used as delimiter for some
972 // tokens.
973 // - Values. This entails integers, floats, enums - anything that
974 // consists of alphanumeric characters, delimited by whitespace.
975 // - Result-IDs. This entails anything that consists of alphanumeric characters and _, and
976 // starts with a %. In contrast to values, this entity can be checked for complete correctness
977 // relatively easily here.
978 // - Strings. This entails quote-delimited text such as "abc".
979 // SPIR-V strings have only two escapes, \" and \\.
980 // - Sigils, = and |. In this assembler, these are not required to have whitespace
981 // around them (they act as delimiters) as they do in SPIRV-Tools.
982
983 var state: enum {
984 start,
985 value,
986 result_id,
987 string,
988 string_end,
989 escape,
990 placeholder,
991 } = .start;
992 var token_start = start_offset;
993 var offset = start_offset;
994 var tag = Token.Tag.eof;
995 while (offset < ass.src.len) : (offset += 1) {
996 const c = ass.src[offset];
997 switch (state) {
998 .start => switch (c) {
999 ' ', '\t', '\r', '\n' => token_start = offset + 1,
1000 '"' => {
1001 state = .string;
1002 tag = .string;
1003 },
1004 '%' => {
1005 state = .result_id;
1006 tag = .result_id;
1007 },
1008 '|' => {
1009 tag = .pipe;
1010 offset += 1;
1011 break;
1012 },
1013 '=' => {
1014 tag = .equals;
1015 offset += 1;
1016 break;
1017 },
1018 '$' => {
1019 state = .placeholder;
1020 tag = .placeholder;
1021 },
1022 else => {
1023 state = .value;
1024 tag = .value;
1025 },
1026 },
1027 .value => switch (c) {
1028 '"' => {
1029 try ass.addError(offset, "unexpected string literal", .{});
1030 // The user most likely just forgot a delimiter here - keep
1031 // the tag as value.
1032 break;
1033 },
1034 ' ', '\t', '\r', '\n', '=', '|' => break,
1035 else => {},
1036 },
1037 .result_id, .placeholder => switch (c) {
1038 '_', 'a'...'z', 'A'...'Z', '0'...'9' => {},
1039 ' ', '\t', '\r', '\n', '=', '|' => break,
1040 else => {
1041 try ass.addError(offset, "illegal character in result-id or placeholder", .{});
1042 // Again, probably a forgotten delimiter here.
1043 break;
1044 },
1045 },
1046 .string => switch (c) {
1047 '\\' => state = .escape,
1048 '"' => state = .string_end,
1049 else => {}, // Note, strings may include newlines
1050 },
1051 .string_end => switch (c) {
1052 ' ', '\t', '\r', '\n', '=', '|' => break,
1053 else => {
1054 try ass.addError(offset, "unexpected character after string literal", .{});
1055 // The token is still unmistakibly a string.
1056 break;
1057 },
1058 },
1059 // Escapes simply skip the next char.
1060 .escape => state = .string,
1061 }
1062 }
1063
1064 var tok: Token = .{
1065 .tag = tag,
1066 .start = token_start,
1067 .end = offset,
1068 };
1069
1070 switch (state) {
1071 .string, .escape => {
1072 try ass.addError(token_start, "unterminated string", .{});
1073 },
1074 .result_id => if (offset - token_start == 1) {
1075 try ass.addError(token_start, "result-id must have at least one name character", .{});
1076 },
1077 .value => {
1078 const text = ass.tokenText(tok);
1079 const prefix = "Op";
1080 const looks_like_opcode = text.len > prefix.len and
1081 std.mem.startsWith(u8, text, prefix) and
1082 std.ascii.isUpper(text[prefix.len]);
1083 if (looks_like_opcode)
1084 tok.tag = .opcode;
1085 },
1086 else => {},
1087 }
1088
1089 return tok;
1090}