master
1const std = @import("std");
2const Allocator = std.mem.Allocator;
3const utils = @import("utils.zig");
4const UncheckedSliceWriter = utils.UncheckedSliceWriter;
5
6pub const ParseLineCommandsResult = struct {
7 result: []u8,
8 mappings: SourceMappings,
9};
10
11const CurrentMapping = struct {
12 line_num: usize = 1,
13 filename: std.ArrayList(u8) = .empty,
14 pending: bool = true,
15 ignore_contents: bool = false,
16};
17
18pub const ParseAndRemoveLineCommandsOptions = struct {
19 initial_filename: ?[]const u8 = null,
20};
21
22/// Parses and removes #line commands as well as all source code that is within a file
23/// with .c or .h extensions.
24///
25/// > RC treats files with the .c and .h extensions in a special manner. It
26/// > assumes that a file with one of these extensions does not contain
27/// > resources. If a file has the .c or .h file name extension, RC ignores all
28/// > lines in the file except the preprocessor directives. Therefore, to
29/// > include a file that contains resources in another resource script, give
30/// > the file to be included an extension other than .c or .h.
31/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives
32///
33/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping
34/// between the lines and their corresponding lines in their original files.
35///
36/// `buf` must be at least as long as `source`
37/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
38///
39/// If `options.initial_filename` is provided, that filename is guaranteed to be
40/// within the `mappings.files` table and `root_filename_offset` will be set appropriately.
41pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) error{ OutOfMemory, InvalidLineCommand, LineNumberOverflow }!ParseLineCommandsResult {
42 var parse_result = ParseLineCommandsResult{
43 .result = undefined,
44 .mappings = .{},
45 };
46 errdefer parse_result.mappings.deinit(allocator);
47
48 var current_mapping: CurrentMapping = .{};
49 defer current_mapping.filename.deinit(allocator);
50
51 if (options.initial_filename) |initial_filename| {
52 try current_mapping.filename.appendSlice(allocator, initial_filename);
53 parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename);
54 }
55
56 // This implementation attempts to be comment and string aware in order
57 // to avoid errant #line <num> "<filename>" within multiline comments
58 // leading to problems in the source mapping after comments are removed,
59 // but it is not a perfect implementation (intentionally).
60 //
61 // The current implementation does not handle cases like
62 // /* foo */ #line ...
63 // #line ... // foo
64 // #line ... /* foo ...
65 // etc
66 //
67 // (the first example will not be recognized as a #line command, the second
68 // and third will error with InvalidLineCommand)
69 //
70 // This is fine, though, since #line commands are generated by the
71 // preprocessor so in normal circumstances they will be well-formed and
72 // consistent. The only realistic way the imperfect implementation could
73 // affect a 'real' use-case would be someone taking the output of a
74 // preprocessor, editing it manually to add comments before/after #line
75 // commands, and then running it through resinator with /:no-preprocess.
76
77 std.debug.assert(buf.len >= source.len);
78 var result = UncheckedSliceWriter{ .slice = buf };
79 const State = enum {
80 line_start,
81 preprocessor,
82 non_preprocessor,
83 forward_slash,
84 line_comment,
85 multiline_comment,
86 multiline_comment_end,
87 single_quoted,
88 single_quoted_escape,
89 double_quoted,
90 double_quoted_escape,
91 };
92 var state: State = .line_start;
93 var index: usize = 0;
94 var pending_start: ?usize = null;
95 var preprocessor_start: usize = 0;
96 var line_number: usize = 1;
97 while (index < source.len) : (index += 1) {
98 var c = source[index];
99 state: switch (state) {
100 .line_start => switch (c) {
101 '#' => {
102 preprocessor_start = index;
103 state = .preprocessor;
104 if (pending_start == null) {
105 pending_start = index;
106 }
107 },
108 '\r', '\n' => {
109 const is_crlf = formsLineEndingPair(source, c, index + 1);
110 if (!current_mapping.ignore_contents) {
111 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
112
113 result.write(c);
114 if (is_crlf) result.write(source[index + 1]);
115 line_number += 1;
116 }
117 if (is_crlf) index += 1;
118 pending_start = null;
119 },
120 ' ', '\t', '\x0b', '\x0c' => {
121 if (pending_start == null) {
122 pending_start = index;
123 }
124 },
125 '/' => {
126 if (!current_mapping.ignore_contents) {
127 result.writeSlice(source[pending_start orelse index .. index + 1]);
128 pending_start = null;
129 }
130 state = .forward_slash;
131 },
132 '\'' => {
133 if (!current_mapping.ignore_contents) {
134 result.writeSlice(source[pending_start orelse index .. index + 1]);
135 pending_start = null;
136 }
137 state = .single_quoted;
138 },
139 '"' => {
140 if (!current_mapping.ignore_contents) {
141 result.writeSlice(source[pending_start orelse index .. index + 1]);
142 pending_start = null;
143 }
144 state = .double_quoted;
145 },
146 else => {
147 state = .non_preprocessor;
148 if (pending_start != null) {
149 if (!current_mapping.ignore_contents) {
150 result.writeSlice(source[pending_start.? .. index + 1]);
151 }
152 pending_start = null;
153 continue;
154 }
155 if (!current_mapping.ignore_contents) {
156 result.write(c);
157 }
158 },
159 },
160 .forward_slash => switch (c) {
161 '\r', '\n' => {
162 const is_crlf = formsLineEndingPair(source, c, index + 1);
163 if (!current_mapping.ignore_contents) {
164 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
165
166 result.write(c);
167 if (is_crlf) result.write(source[index + 1]);
168 line_number += 1;
169 }
170 if (is_crlf) index += 1;
171 state = .line_start;
172 pending_start = null;
173 },
174 '/' => {
175 if (!current_mapping.ignore_contents) {
176 result.write(c);
177 }
178 state = .line_comment;
179 },
180 '*' => {
181 if (!current_mapping.ignore_contents) {
182 result.write(c);
183 }
184 state = .multiline_comment;
185 },
186 else => {
187 if (!current_mapping.ignore_contents) {
188 result.write(c);
189 }
190 state = .non_preprocessor;
191 },
192 },
193 .line_comment => switch (c) {
194 '\r', '\n' => {
195 const is_crlf = formsLineEndingPair(source, c, index + 1);
196 if (!current_mapping.ignore_contents) {
197 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
198
199 result.write(c);
200 if (is_crlf) result.write(source[index + 1]);
201 line_number += 1;
202 }
203 if (is_crlf) index += 1;
204 state = .line_start;
205 pending_start = null;
206 },
207 else => {
208 if (!current_mapping.ignore_contents) {
209 result.write(c);
210 }
211 },
212 },
213 .multiline_comment => switch (c) {
214 '\r', '\n' => {
215 const is_crlf = formsLineEndingPair(source, c, index + 1);
216 if (!current_mapping.ignore_contents) {
217 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
218
219 result.write(c);
220 if (is_crlf) result.write(source[index + 1]);
221 line_number += 1;
222 }
223 if (is_crlf) index += 1;
224 pending_start = null;
225 },
226 '*' => {
227 if (!current_mapping.ignore_contents) {
228 result.write(c);
229 }
230 state = .multiline_comment_end;
231 },
232 else => {
233 if (!current_mapping.ignore_contents) {
234 result.write(c);
235 }
236 },
237 },
238 .multiline_comment_end => switch (c) {
239 '\r', '\n' => {
240 const is_crlf = formsLineEndingPair(source, c, index + 1);
241 if (!current_mapping.ignore_contents) {
242 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
243
244 result.write(c);
245 if (is_crlf) result.write(source[index + 1]);
246 line_number += 1;
247 }
248 if (is_crlf) index += 1;
249 state = .multiline_comment;
250 pending_start = null;
251 },
252 '/' => {
253 if (!current_mapping.ignore_contents) {
254 result.write(c);
255 }
256 state = .non_preprocessor;
257 },
258 '*' => {
259 if (!current_mapping.ignore_contents) {
260 result.write(c);
261 }
262 // stay in multiline_comment_end state
263 },
264 else => {
265 if (!current_mapping.ignore_contents) {
266 result.write(c);
267 }
268 state = .multiline_comment;
269 },
270 },
271 .single_quoted => switch (c) {
272 '\r', '\n' => {
273 const is_crlf = formsLineEndingPair(source, c, index + 1);
274 if (!current_mapping.ignore_contents) {
275 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
276
277 result.write(c);
278 if (is_crlf) result.write(source[index + 1]);
279 line_number += 1;
280 }
281 if (is_crlf) index += 1;
282 state = .line_start;
283 pending_start = null;
284 },
285 '\\' => {
286 if (!current_mapping.ignore_contents) {
287 result.write(c);
288 }
289 state = .single_quoted_escape;
290 },
291 '\'' => {
292 if (!current_mapping.ignore_contents) {
293 result.write(c);
294 }
295 state = .non_preprocessor;
296 },
297 else => {
298 if (!current_mapping.ignore_contents) {
299 result.write(c);
300 }
301 },
302 },
303 .single_quoted_escape => switch (c) {
304 '\r', '\n' => {
305 const is_crlf = formsLineEndingPair(source, c, index + 1);
306 if (!current_mapping.ignore_contents) {
307 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
308
309 result.write(c);
310 if (is_crlf) result.write(source[index + 1]);
311 line_number += 1;
312 }
313 if (is_crlf) index += 1;
314 state = .line_start;
315 pending_start = null;
316 },
317 else => {
318 if (!current_mapping.ignore_contents) {
319 result.write(c);
320 }
321 state = .single_quoted;
322 },
323 },
324 .double_quoted => switch (c) {
325 '\r', '\n' => {
326 const is_crlf = formsLineEndingPair(source, c, index + 1);
327 if (!current_mapping.ignore_contents) {
328 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
329
330 result.write(c);
331 if (is_crlf) result.write(source[index + 1]);
332 line_number += 1;
333 }
334 if (is_crlf) index += 1;
335 state = .line_start;
336 pending_start = null;
337 },
338 '\\' => {
339 if (!current_mapping.ignore_contents) {
340 result.write(c);
341 }
342 state = .double_quoted_escape;
343 },
344 '"' => {
345 if (!current_mapping.ignore_contents) {
346 result.write(c);
347 }
348 state = .non_preprocessor;
349 },
350 else => {
351 if (!current_mapping.ignore_contents) {
352 result.write(c);
353 }
354 },
355 },
356 .double_quoted_escape => switch (c) {
357 '\r', '\n' => {
358 const is_crlf = formsLineEndingPair(source, c, index + 1);
359 if (!current_mapping.ignore_contents) {
360 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
361
362 result.write(c);
363 if (is_crlf) result.write(source[index + 1]);
364 line_number += 1;
365 }
366 if (is_crlf) index += 1;
367 state = .line_start;
368 pending_start = null;
369 },
370 else => {
371 if (!current_mapping.ignore_contents) {
372 result.write(c);
373 }
374 state = .double_quoted;
375 },
376 },
377 .preprocessor => switch (c) {
378 '\r', '\n' => {
379 // Now that we have the full line we can decide what to do with it
380 const preprocessor_str = source[preprocessor_start..index];
381 if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
382 try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
383 const is_crlf = formsLineEndingPair(source, c, index + 1);
384 if (is_crlf) index += 1;
385 state = .line_start;
386 pending_start = null;
387 } else {
388 // Backtrack and reparse the line in the non_preprocessor state,
389 // since it's possible that this line contains a multiline comment
390 // start, etc.
391 state = .non_preprocessor;
392 index = pending_start.?;
393 pending_start = null;
394 // TODO: This is a hacky way to implement this, c needs to be
395 // updated since we're using continue :state here
396 c = source[index];
397 // continue to avoid the index += 1 of the while loop
398 continue :state .non_preprocessor;
399 }
400 },
401 else => {},
402 },
403 .non_preprocessor => switch (c) {
404 '\r', '\n' => {
405 const is_crlf = formsLineEndingPair(source, c, index + 1);
406 if (!current_mapping.ignore_contents) {
407 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
408
409 result.write(c);
410 if (is_crlf) result.write(source[index + 1]);
411 line_number += 1;
412 }
413 if (is_crlf) index += 1;
414 state = .line_start;
415 pending_start = null;
416 },
417 '/' => {
418 if (!current_mapping.ignore_contents) {
419 result.write(c);
420 }
421 state = .forward_slash;
422 },
423 '\'' => {
424 if (!current_mapping.ignore_contents) {
425 result.write(c);
426 }
427 state = .single_quoted;
428 },
429 '"' => {
430 if (!current_mapping.ignore_contents) {
431 result.write(c);
432 }
433 state = .double_quoted;
434 },
435 else => {
436 if (!current_mapping.ignore_contents) {
437 result.write(c);
438 }
439 },
440 },
441 }
442 } else {
443 switch (state) {
444 .line_start => {},
445 .forward_slash,
446 .line_comment,
447 .multiline_comment,
448 .multiline_comment_end,
449 .single_quoted,
450 .single_quoted_escape,
451 .double_quoted,
452 .double_quoted_escape,
453 .non_preprocessor,
454 => {
455 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
456 },
457 .preprocessor => {
458 // Now that we have the full line we can decide what to do with it
459 const preprocessor_str = source[preprocessor_start..index];
460 if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
461 try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
462 } else {
463 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
464 if (!current_mapping.ignore_contents) {
465 result.writeSlice(source[pending_start.?..index]);
466 }
467 }
468 },
469 }
470 }
471
472 parse_result.result = result.getWritten();
473
474 // Remove whitespace from the end of the result. This avoids issues when the
475 // preprocessor adds a newline to the end of the file, since then the
476 // post-preprocessed source could have more lines than the corresponding input source and
477 // the inserted line can't be mapped to any lines in the original file.
478 // There's no way that whitespace at the end of a file can affect the parsing
479 // of the RC script so this is okay to do unconditionally.
480 // TODO: There might be a better way around this
481 while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) {
482 parse_result.result.len -= 1;
483 }
484
485 // If there have been no line mappings at all, then we're dealing with an empty file.
486 // In this case, we want to fake a line mapping just so that we return something
487 // that is useable in the same way that a non-empty mapping would be.
488 if (parse_result.mappings.sources.root == null) {
489 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
490 }
491
492 return parse_result;
493}
494
495/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair
496pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool {
497 if (next_index >= source.len) return false;
498
499 const next_ending = source[next_index];
500 return utils.isLineEndingPair(line_ending, next_ending);
501}
502
503pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void {
504 const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items);
505
506 try mapping.set(post_processed_line_number, current_mapping.line_num, filename_offset);
507
508 current_mapping.line_num = std.math.add(usize, current_mapping.line_num, 1) catch return error.LineNumberOverflow;
509 current_mapping.pending = false;
510}
511
512// TODO: Might want to provide diagnostics on invalid line commands instead of just returning
513pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{ OutOfMemory, InvalidLineCommand }!void {
514 // TODO: Are there other whitespace characters that should be included?
515 var tokenizer = std.mem.tokenizeAny(u8, line_command, " \t");
516 const line_directive = tokenizer.next() orelse return error.InvalidLineCommand; // #line
517 if (!std.mem.eql(u8, line_directive, "#line")) return error.InvalidLineCommand;
518 const linenum_str = tokenizer.next() orelse return error.InvalidLineCommand;
519 const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return error.InvalidLineCommand;
520 if (linenum == 0) return error.InvalidLineCommand;
521
522 var filename_literal = tokenizer.rest();
523 while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) {
524 filename_literal.len -= 1;
525 }
526 if (filename_literal.len < 2) return error.InvalidLineCommand;
527 const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"';
528 if (!is_quoted) return error.InvalidLineCommand;
529 const unquoted_filename = filename_literal[1 .. filename_literal.len - 1];
530
531 // Ignore <builtin> and <command line>
532 if (std.mem.eql(u8, unquoted_filename, "<builtin>") or std.mem.eql(u8, unquoted_filename, "<command line>")) return;
533
534 const filename = parseFilename(allocator, unquoted_filename) catch |err| switch (err) {
535 error.OutOfMemory => |e| return e,
536 else => return error.InvalidLineCommand,
537 };
538 defer allocator.free(filename);
539
540 // \x00 bytes in the filename is incompatible with how StringTable works
541 if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return error.InvalidLineCommand;
542
543 current_mapping.line_num = linenum;
544 current_mapping.filename.clearRetainingCapacity();
545 try current_mapping.filename.appendSlice(allocator, filename);
546 current_mapping.pending = true;
547 current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h");
548}
549
550pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
551 const buf = try allocator.alloc(u8, source.len);
552 errdefer allocator.free(buf);
553 var result = try parseAndRemoveLineCommands(allocator, source, buf, options);
554 result.result = try allocator.realloc(buf, result.result.len);
555 return result;
556}
557
558/// C-style string parsing with a few caveats:
559/// - The str cannot contain newlines or carriage returns
560/// - Hex and octal escape are limited to u8
561/// - No handling/support for L, u, or U prefixed strings
562/// - The start and end double quotes should be omitted from the `str`
563/// - Other than the above, does not assume any validity of the strings (i.e. there
564/// may be unescaped double quotes within the str) and will return error.InvalidString
565/// on any problems found.
566///
567/// The result is a UTF-8 encoded string.
568fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 {
569 const State = enum {
570 string,
571 escape,
572 escape_hex,
573 escape_octal,
574 escape_u,
575 };
576
577 var filename = try std.ArrayList(u8).initCapacity(allocator, str.len);
578 errdefer filename.deinit(allocator);
579 var state: State = .string;
580 var index: usize = 0;
581 var escape_len: usize = undefined;
582 var escape_val: u64 = undefined;
583 var escape_expected_len: u8 = undefined;
584 while (index < str.len) : (index += 1) {
585 const c = str[index];
586 switch (state) {
587 .string => switch (c) {
588 '\\' => state = .escape,
589 '"' => return error.InvalidString,
590 else => filename.appendAssumeCapacity(c),
591 },
592 .escape => switch (c) {
593 '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => {
594 const escaped_c = switch (c) {
595 '\'', '"', '\\', '?' => c,
596 'n' => '\n',
597 'r' => '\r',
598 't' => '\t',
599 'a' => '\x07',
600 'b' => '\x08',
601 'e' => '\x1b', // non-standard
602 'f' => '\x0c',
603 'v' => '\x0b',
604 else => unreachable,
605 };
606 filename.appendAssumeCapacity(escaped_c);
607 state = .string;
608 },
609 'x' => {
610 escape_val = 0;
611 escape_len = 0;
612 state = .escape_hex;
613 },
614 '0'...'7' => {
615 escape_val = std.fmt.charToDigit(c, 8) catch unreachable;
616 escape_len = 1;
617 state = .escape_octal;
618 },
619 'u' => {
620 escape_val = 0;
621 escape_len = 0;
622 state = .escape_u;
623 escape_expected_len = 4;
624 },
625 'U' => {
626 escape_val = 0;
627 escape_len = 0;
628 state = .escape_u;
629 escape_expected_len = 8;
630 },
631 else => return error.InvalidString,
632 },
633 .escape_hex => switch (c) {
634 '0'...'9', 'a'...'f', 'A'...'F' => {
635 const digit = std.fmt.charToDigit(c, 16) catch unreachable;
636 if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString;
637 escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
638 escape_len += 1;
639 },
640 else => {
641 if (escape_len == 0) return error.InvalidString;
642 filename.appendAssumeCapacity(@intCast(escape_val));
643 state = .string;
644 index -= 1; // reconsume
645 },
646 },
647 .escape_octal => switch (c) {
648 '0'...'7' => {
649 const digit = std.fmt.charToDigit(c, 8) catch unreachable;
650 if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString;
651 escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
652 escape_len += 1;
653 if (escape_len == 3) {
654 filename.appendAssumeCapacity(@intCast(escape_val));
655 state = .string;
656 }
657 },
658 else => {
659 if (escape_len == 0) return error.InvalidString;
660 filename.appendAssumeCapacity(@intCast(escape_val));
661 state = .string;
662 index -= 1; // reconsume
663 },
664 },
665 .escape_u => switch (c) {
666 '0'...'9', 'a'...'f', 'A'...'F' => {
667 const digit = std.fmt.charToDigit(c, 16) catch unreachable;
668 if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString;
669 escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString;
670 escape_len += 1;
671 if (escape_len == escape_expected_len) {
672 var buf: [4]u8 = undefined;
673 const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString;
674 filename.appendSliceAssumeCapacity(buf[0..utf8_len]);
675 state = .string;
676 }
677 },
678 // Requires escape_expected_len valid hex digits
679 else => return error.InvalidString,
680 },
681 }
682 } else {
683 switch (state) {
684 .string => {},
685 .escape, .escape_u => return error.InvalidString,
686 .escape_hex => {
687 if (escape_len == 0) return error.InvalidString;
688 filename.appendAssumeCapacity(@intCast(escape_val));
689 },
690 .escape_octal => {
691 filename.appendAssumeCapacity(@intCast(escape_val));
692 },
693 }
694 }
695
696 return filename.toOwnedSlice(allocator);
697}
698
699fn testParseFilename(expected: []const u8, input: []const u8) !void {
700 const parsed = try parseFilename(std.testing.allocator, input);
701 defer std.testing.allocator.free(parsed);
702
703 return std.testing.expectEqualSlices(u8, expected, parsed);
704}
705
706test parseFilename {
707 try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11");
708 try testParseFilename("\xABz\x53", "\\xABz\\123");
709 try testParseFilename("⚡⚡", "\\u26A1\\U000026A1");
710 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\""));
711 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\"));
712 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u"));
713 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U"));
714 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x"));
715 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ"));
716 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF"));
717 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777"));
718}
719
720pub const SourceMappings = struct {
721 sources: Sources = .{},
722 files: StringTable = .{},
723 /// The default assumes that the first filename added is the root file.
724 /// The value should be set to the correct offset if that assumption does not hold.
725 root_filename_offset: u32 = 0,
726 source_node_pool: std.heap.MemoryPool(Sources.Node) = .empty,
727 end_line: usize = 0,
728
729 const sourceCompare = struct {
730 fn compare(a: Source, b: Source) std.math.Order {
731 return std.math.order(a.start_line, b.start_line);
732 }
733 }.compare;
734 const Sources = std.Treap(Source, sourceCompare);
735
736 pub const Source = struct {
737 start_line: usize,
738 span: usize = 0,
739 corresponding_start_line: usize,
740 filename_offset: u32,
741 };
742
743 pub fn deinit(self: *SourceMappings, allocator: Allocator) void {
744 self.files.deinit(allocator);
745 self.source_node_pool.deinit(std.heap.page_allocator);
746 }
747
748 /// Find the node that 'contains' the `line`, i.e. the node's start_line is
749 /// >= `line`
750 fn findNode(self: SourceMappings, line: usize) ?*Sources.Node {
751 var node = self.sources.root;
752 var last_gt: ?*Sources.Node = null;
753
754 var search_key: Source = undefined;
755 search_key.start_line = line;
756 while (node) |current| {
757 const order = sourceCompare(search_key, current.key);
758 if (order == .eq) break;
759 if (order == .gt) last_gt = current;
760
761 node = current.children[@intFromBool(order == .gt)] orelse {
762 // Regardless of the current order, last_gt will contain the
763 // the node we want to return.
764 //
765 // If search key is > current node's key, then last_gt will be
766 // current which we now know is the closest node that is <=
767 // the search key.
768 //
769 //
770 // If the key is < current node's key, we want to jump back to the
771 // node that the search key was most recently greater than.
772 // This is necessary for scenarios like (where the search key is 2):
773 //
774 // 1
775 // \
776 // 6
777 // /
778 // 3
779 //
780 // In this example, we'll get down to the '3' node but ultimately want
781 // to return the '1' node.
782 //
783 // Note: If we've never seen a key that the search key is greater than,
784 // then we know that there's no valid node, so last_gt will be null.
785 return last_gt;
786 };
787 }
788
789 return node;
790 }
791
792 /// Note: `line_num` and `corresponding_line_num` start at 1
793 pub fn set(self: *SourceMappings, line_num: usize, corresponding_line_num: usize, filename_offset: u32) !void {
794 const maybe_node = self.findNode(line_num);
795
796 const need_new_node = need_new_node: {
797 if (maybe_node) |node| {
798 if (node.key.filename_offset != filename_offset) {
799 break :need_new_node true;
800 }
801 // TODO: These use i65 to avoid truncation when any of the line number values
802 // use all 64 bits of the usize. In reality, line numbers can't really
803 // get that large so limiting the line number and using a smaller iX
804 // type here might be a better solution.
805 const exist_delta = @as(i65, @intCast(node.key.corresponding_start_line)) - @as(i65, @intCast(node.key.start_line));
806 const cur_delta = @as(i65, @intCast(corresponding_line_num)) - @as(i65, @intCast(line_num));
807 if (exist_delta != cur_delta) {
808 break :need_new_node true;
809 }
810 break :need_new_node false;
811 }
812 break :need_new_node true;
813 };
814 if (need_new_node) {
815 // spans must not overlap
816 if (maybe_node) |node| {
817 std.debug.assert(node.key.start_line != line_num);
818 }
819
820 const key = Source{
821 .start_line = line_num,
822 .corresponding_start_line = corresponding_line_num,
823 .filename_offset = filename_offset,
824 };
825 var entry = self.sources.getEntryFor(key);
826 var new_node = try self.source_node_pool.create(std.heap.page_allocator);
827 new_node.key = key;
828 entry.set(new_node);
829 }
830 if (line_num > self.end_line) {
831 self.end_line = line_num;
832 }
833 }
834
835 /// Note: `line_num` starts at 1
836 pub fn get(self: SourceMappings, line_num: usize) ?Source {
837 const node = self.findNode(line_num) orelse return null;
838 return node.key;
839 }
840
841 pub const CorrespondingSpan = struct {
842 start_line: usize,
843 end_line: usize,
844 filename_offset: u32,
845 };
846
847 pub fn getCorrespondingSpan(self: SourceMappings, line_num: usize) ?CorrespondingSpan {
848 const source = self.get(line_num) orelse return null;
849 const diff = line_num - source.start_line;
850 const start_line = source.corresponding_start_line + (if (line_num == source.start_line) 0 else source.span + diff);
851 const end_line = start_line + (if (line_num == source.start_line) source.span else 0);
852 return CorrespondingSpan{
853 .start_line = start_line,
854 .end_line = end_line,
855 .filename_offset = source.filename_offset,
856 };
857 }
858
859 pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) !void {
860 std.debug.assert(num_following_lines_to_collapse > 0);
861 var node = self.findNode(line_num).?;
862 const span_diff = num_following_lines_to_collapse;
863 if (node.key.start_line != line_num) {
864 const offset = line_num - node.key.start_line;
865 const key = Source{
866 .start_line = line_num,
867 .span = num_following_lines_to_collapse,
868 .corresponding_start_line = node.key.corresponding_start_line + node.key.span + offset,
869 .filename_offset = node.key.filename_offset,
870 };
871 var entry = self.sources.getEntryFor(key);
872 var new_node = try self.source_node_pool.create(std.heap.page_allocator);
873 new_node.key = key;
874 entry.set(new_node);
875 node = new_node;
876 } else {
877 node.key.span += span_diff;
878 }
879
880 // now subtract the span diff from the start line number of all of
881 // the following nodes in order
882 var it = Sources.InorderIterator{ .current = node };
883 // skip past current, but store it
884 var prev = it.next().?;
885 while (it.next()) |inorder_node| {
886 inorder_node.key.start_line -= span_diff;
887
888 // This can only really happen if there are #line commands within
889 // a multiline comment, which should be skipped over.
890 std.debug.assert(prev.key.start_line <= inorder_node.key.start_line);
891 prev = inorder_node;
892 }
893 self.end_line -= span_diff;
894 }
895
896 /// Returns true if the line is from the main/root file (i.e. not a file that has been
897 /// `#include`d).
898 pub fn isRootFile(self: *const SourceMappings, line_num: usize) bool {
899 const source = self.get(line_num) orelse return false;
900 return source.filename_offset == self.root_filename_offset;
901 }
902};
903
904test "SourceMappings collapse" {
905 const allocator = std.testing.allocator;
906
907 var mappings = SourceMappings{};
908 defer mappings.deinit(allocator);
909 const filename_offset = try mappings.files.put(allocator, "test.rc");
910
911 try mappings.set(1, 1, filename_offset);
912 try mappings.set(5, 5, filename_offset);
913
914 try mappings.collapse(2, 2);
915
916 try std.testing.expectEqual(@as(usize, 3), mappings.end_line);
917 const span_1 = mappings.getCorrespondingSpan(1).?;
918 try std.testing.expectEqual(@as(usize, 1), span_1.start_line);
919 try std.testing.expectEqual(@as(usize, 1), span_1.end_line);
920 const span_2 = mappings.getCorrespondingSpan(2).?;
921 try std.testing.expectEqual(@as(usize, 2), span_2.start_line);
922 try std.testing.expectEqual(@as(usize, 4), span_2.end_line);
923 const span_3 = mappings.getCorrespondingSpan(3).?;
924 try std.testing.expectEqual(@as(usize, 5), span_3.start_line);
925 try std.testing.expectEqual(@as(usize, 5), span_3.end_line);
926}
927
928/// Same thing as StringTable in Zig's src/Wasm.zig
929pub const StringTable = struct {
930 data: std.ArrayList(u8) = .empty,
931 map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .empty,
932
933 pub fn deinit(self: *StringTable, allocator: Allocator) void {
934 self.data.deinit(allocator);
935 self.map.deinit(allocator);
936 }
937
938 pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 {
939 const result = try self.map.getOrPutContextAdapted(
940 allocator,
941 value,
942 std.hash_map.StringIndexAdapter{ .bytes = &self.data },
943 .{ .bytes = &self.data },
944 );
945 if (result.found_existing) {
946 return result.key_ptr.*;
947 }
948
949 try self.data.ensureUnusedCapacity(allocator, value.len + 1);
950 const offset: u32 = @intCast(self.data.items.len);
951
952 self.data.appendSliceAssumeCapacity(value);
953 self.data.appendAssumeCapacity(0);
954
955 result.key_ptr.* = offset;
956
957 return offset;
958 }
959
960 pub fn get(self: StringTable, offset: u32) []const u8 {
961 std.debug.assert(offset < self.data.items.len);
962 return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0);
963 }
964
965 pub fn getOffset(self: *StringTable, value: []const u8) ?u32 {
966 return self.map.getKeyAdapted(
967 value,
968 std.hash_map.StringIndexAdapter{ .bytes = &self.data },
969 );
970 }
971};
972
973const ExpectedSourceSpan = struct {
974 start_line: usize,
975 end_line: usize,
976 filename: []const u8,
977};
978
979fn testParseAndRemoveLineCommands(
980 expected: []const u8,
981 comptime expected_spans: []const ExpectedSourceSpan,
982 source: []const u8,
983 options: ParseAndRemoveLineCommandsOptions,
984) !void {
985 var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options);
986 defer std.testing.allocator.free(results.result);
987 defer results.mappings.deinit(std.testing.allocator);
988
989 try std.testing.expectEqualStrings(expected, results.result);
990
991 expectEqualMappings(expected_spans, results.mappings) catch |err| {
992 std.debug.print("\nexpected mappings:\n", .{});
993 for (expected_spans, 0..) |span, i| {
994 const line_num = i + 1;
995 std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line });
996 }
997 std.debug.print("\nactual mappings:\n", .{});
998 var i: usize = 1;
999 while (i <= results.mappings.end_line) : (i += 1) {
1000 const span = results.mappings.getCorrespondingSpan(i).?;
1001 const filename = results.mappings.files.get(span.filename_offset);
1002 std.debug.print("{}: {s}:{}-{}\n", .{ i, filename, span.start_line, span.end_line });
1003 }
1004 std.debug.print("\n", .{});
1005 return err;
1006 };
1007}
1008
1009fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void {
1010 try std.testing.expectEqual(expected_spans.len, mappings.end_line);
1011 for (expected_spans, 0..) |expected_span, i| {
1012 const line_num = i + 1;
1013 const span = mappings.getCorrespondingSpan(line_num) orelse return error.MissingLineNum;
1014 const filename = mappings.files.get(span.filename_offset);
1015 try std.testing.expectEqual(expected_span.start_line, span.start_line);
1016 try std.testing.expectEqual(expected_span.end_line, span.end_line);
1017 try std.testing.expectEqualStrings(expected_span.filename, filename);
1018 }
1019}
1020
1021test "basic" {
1022 try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
1023 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1024 }, "#line 1 \"blah.rc\"", .{});
1025}
1026
1027test "only removes line commands" {
1028 try testParseAndRemoveLineCommands(
1029 \\#pragma code_page(65001)
1030 , &[_]ExpectedSourceSpan{
1031 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1032 },
1033 \\#line 1 "blah.rc"
1034 \\#pragma code_page(65001)
1035 , .{});
1036}
1037
1038test "whitespace and line endings" {
1039 try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
1040 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1041 }, "#line \t 1 \t \"blah.rc\"\r\n", .{});
1042}
1043
1044test "example" {
1045 try testParseAndRemoveLineCommands(
1046 \\
1047 \\included RCDATA {"hello"}
1048 , &[_]ExpectedSourceSpan{
1049 .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" },
1050 .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" },
1051 },
1052 \\#line 1 "rcdata.rc"
1053 \\#line 1 "<built-in>"
1054 \\#line 1 "<built-in>"
1055 \\#line 355 "<built-in>"
1056 \\#line 1 "<command line>"
1057 \\#line 1 "<built-in>"
1058 \\#line 1 "rcdata.rc"
1059 \\#line 1 "./header.h"
1060 \\
1061 \\
1062 \\2 RCDATA {"blah"}
1063 \\
1064 \\
1065 \\#line 1 "./included.rc"
1066 \\
1067 \\included RCDATA {"hello"}
1068 \\#line 7 "./header.h"
1069 \\#line 1 "rcdata.rc"
1070 , .{});
1071}
1072
1073test "CRLF and other line endings" {
1074 try testParseAndRemoveLineCommands(
1075 "hello\r\n#pragma code_page(65001)\r\nworld",
1076 &[_]ExpectedSourceSpan{
1077 .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" },
1078 .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" },
1079 .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" },
1080 },
1081 "#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n",
1082 .{},
1083 );
1084}
1085
1086test "no line commands" {
1087 try testParseAndRemoveLineCommands(
1088 \\1 RCDATA {"blah"}
1089 \\2 RCDATA {"blah"}
1090 , &[_]ExpectedSourceSpan{
1091 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1092 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
1093 },
1094 \\1 RCDATA {"blah"}
1095 \\2 RCDATA {"blah"}
1096 , .{ .initial_filename = "blah.rc" });
1097}
1098
1099test "in place" {
1100 var mut_source = "#line 1 \"blah.rc\"".*;
1101 var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{});
1102 defer result.mappings.deinit(std.testing.allocator);
1103 try std.testing.expectEqualStrings("", result.result);
1104}
1105
1106test "line command within a multiline comment" {
1107 try testParseAndRemoveLineCommands(
1108 \\/*
1109 \\#line 1 "irrelevant.rc"
1110 \\
1111 \\
1112 \\*/
1113 , &[_]ExpectedSourceSpan{
1114 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1115 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
1116 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
1117 .{ .start_line = 4, .end_line = 4, .filename = "blah.rc" },
1118 .{ .start_line = 5, .end_line = 5, .filename = "blah.rc" },
1119 },
1120 \\/*
1121 \\#line 1 "irrelevant.rc"
1122 \\
1123 \\
1124 \\*/
1125 , .{ .initial_filename = "blah.rc" });
1126
1127 // * but without / directly after
1128 try testParseAndRemoveLineCommands(
1129 \\/** /
1130 \\#line 1 "irrelevant.rc"
1131 \\*/
1132 , &[_]ExpectedSourceSpan{
1133 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1134 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
1135 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
1136 },
1137 \\/** /
1138 \\#line 1 "irrelevant.rc"
1139 \\*/
1140 , .{ .initial_filename = "blah.rc" });
1141
1142 // /** and **/
1143 try testParseAndRemoveLineCommands(
1144 \\/**
1145 \\#line 1 "irrelevant.rc"
1146 \\**/
1147 \\foo
1148 , &[_]ExpectedSourceSpan{
1149 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1150 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
1151 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
1152 .{ .start_line = 20, .end_line = 20, .filename = "blah.rc" },
1153 },
1154 \\/**
1155 \\#line 1 "irrelevant.rc"
1156 \\**/
1157 \\#line 20 "blah.rc"
1158 \\foo
1159 , .{ .initial_filename = "blah.rc" });
1160}
1161
1162test "whitespace preservation" {
1163 try testParseAndRemoveLineCommands(
1164 \\ /
1165 \\/
1166 , &[_]ExpectedSourceSpan{
1167 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1168 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
1169 },
1170 \\ /
1171 \\/
1172 , .{ .initial_filename = "blah.rc" });
1173}
1174
1175test "preprocessor line with a multiline comment after" {
1176 try testParseAndRemoveLineCommands(
1177 \\#pragma test /*
1178 \\
1179 \\*/
1180 , &[_]ExpectedSourceSpan{
1181 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
1182 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
1183 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
1184 },
1185 \\#pragma test /*
1186 \\
1187 \\*/
1188 , .{ .initial_filename = "blah.rc" });
1189}
1190
1191test "comment after line command" {
1192 var mut_source = "#line 1 \"blah.rc\" /*".*;
1193 try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
1194}
1195
1196test "line command with 0 as line number" {
1197 var mut_source = "#line 0 \"blah.rc\"".*;
1198 try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
1199}
1200
1201test "line number limits" {
1202 // TODO: Avoid usize for line numbers
1203 if (@sizeOf(usize) != 8) return error.SkipZigTest;
1204
1205 // greater than i64 max
1206 try testParseAndRemoveLineCommands(
1207 \\
1208 , &[_]ExpectedSourceSpan{
1209 .{ .start_line = 11111111111111111111, .end_line = 11111111111111111111, .filename = "blah.rc" },
1210 },
1211 \\#line 11111111111111111111 "blah.rc"
1212 , .{ .initial_filename = "blah.rc" });
1213
1214 // equal to u64 max, overflows on line number increment
1215 {
1216 var mut_source = "#line 18446744073709551615 \"blah.rc\"".*;
1217 try std.testing.expectError(error.LineNumberOverflow, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
1218 }
1219
1220 // greater than u64 max
1221 {
1222 var mut_source = "#line 18446744073709551616 \"blah.rc\"".*;
1223 try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
1224 }
1225}