master
1const Tokenizer = @This();
2
3index: usize = 0,
4bytes: []const u8,
5state: State = .lhs,
6
7const std = @import("std");
8const testing = std.testing;
9const assert = std.debug.assert;
10const Allocator = std.mem.Allocator;
11
12pub fn next(self: *Tokenizer) ?Token {
13 var start = self.index;
14 var must_resolve = false;
15 while (self.index < self.bytes.len) {
16 const char = self.bytes[self.index];
17 switch (self.state) {
18 .lhs => switch (char) {
19 '\t', '\n', '\r', ' ' => {
20 // silently ignore whitespace
21 self.index += 1;
22 },
23 else => {
24 start = self.index;
25 self.state = .target;
26 },
27 },
28 .target => switch (char) {
29 '\n', '\r' => {
30 return errorIllegalChar(.invalid_target, self.index, char);
31 },
32 '$' => {
33 self.state = .target_dollar_sign;
34 self.index += 1;
35 },
36 '\\' => {
37 self.state = .target_reverse_solidus;
38 self.index += 1;
39 },
40 ':' => {
41 self.state = .target_colon;
42 self.index += 1;
43 },
44 '\t', ' ' => {
45 self.state = .target_space;
46
47 const bytes = self.bytes[start..self.index];
48 std.debug.assert(bytes.len != 0);
49 self.index += 1;
50
51 return finishTarget(must_resolve, bytes);
52 },
53 else => {
54 self.index += 1;
55 },
56 },
57 .target_reverse_solidus => switch (char) {
58 '\t', '\n', '\r' => {
59 return errorIllegalChar(.bad_target_escape, self.index, char);
60 },
61 ' ', '#', '\\' => {
62 must_resolve = true;
63 self.state = .target;
64 self.index += 1;
65 },
66 '$' => {
67 self.state = .target_dollar_sign;
68 self.index += 1;
69 },
70 else => {
71 self.state = .target;
72 self.index += 1;
73 },
74 },
75 .target_dollar_sign => switch (char) {
76 '$' => {
77 must_resolve = true;
78 self.state = .target;
79 self.index += 1;
80 },
81 else => {
82 return errorIllegalChar(.expected_dollar_sign, self.index, char);
83 },
84 },
85 .target_colon => switch (char) {
86 '\n', '\r' => {
87 const bytes = self.bytes[start .. self.index - 1];
88 if (bytes.len != 0) {
89 self.state = .lhs;
90 return finishTarget(must_resolve, bytes);
91 }
92 // silently ignore null target
93 self.state = .lhs;
94 },
95 '/', '\\' => {
96 self.state = .target_colon_reverse_solidus;
97 self.index += 1;
98 },
99 else => {
100 const bytes = self.bytes[start .. self.index - 1];
101 if (bytes.len != 0) {
102 self.state = .rhs;
103 return finishTarget(must_resolve, bytes);
104 }
105 // silently ignore null target
106 self.state = .lhs;
107 },
108 },
109 .target_colon_reverse_solidus => switch (char) {
110 '\n', '\r' => {
111 const bytes = self.bytes[start .. self.index - 2];
112 if (bytes.len != 0) {
113 self.state = .lhs;
114 return finishTarget(must_resolve, bytes);
115 }
116 // silently ignore null target
117 self.state = .lhs;
118 },
119 else => {
120 self.state = .target;
121 },
122 },
123 .target_space => switch (char) {
124 '\t', ' ' => {
125 // silently ignore additional horizontal whitespace
126 self.index += 1;
127 },
128 ':' => {
129 self.state = .rhs;
130 self.index += 1;
131 },
132 else => {
133 return errorIllegalChar(.expected_colon, self.index, char);
134 },
135 },
136 .rhs => switch (char) {
137 '\t', ' ' => {
138 // silently ignore horizontal whitespace
139 self.index += 1;
140 },
141 '\n', '\r' => {
142 self.state = .lhs;
143 },
144 '\\' => {
145 self.state = .rhs_continuation;
146 self.index += 1;
147 },
148 '"' => {
149 self.state = .prereq_quote;
150 self.index += 1;
151 start = self.index;
152 },
153 else => {
154 start = self.index;
155 self.state = .prereq;
156 },
157 },
158 .rhs_continuation => switch (char) {
159 '\n' => {
160 self.state = .rhs;
161 self.index += 1;
162 },
163 '\r' => {
164 self.state = .rhs_continuation_linefeed;
165 self.index += 1;
166 },
167 else => {
168 return errorIllegalChar(.continuation_eol, self.index, char);
169 },
170 },
171 .rhs_continuation_linefeed => switch (char) {
172 '\n' => {
173 self.state = .rhs;
174 self.index += 1;
175 },
176 else => {
177 return errorIllegalChar(.continuation_eol, self.index, char);
178 },
179 },
180 .prereq_quote => switch (char) {
181 '"' => {
182 self.index += 1;
183 self.state = .rhs;
184 return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
185 },
186 else => {
187 self.index += 1;
188 },
189 },
190 .prereq => switch (char) {
191 '\t', ' ' => {
192 self.state = .rhs;
193 return finishPrereq(must_resolve, self.bytes[start..self.index]);
194 },
195 '\n', '\r' => {
196 self.state = .lhs;
197 return finishPrereq(must_resolve, self.bytes[start..self.index]);
198 },
199 '\\' => {
200 self.state = .prereq_continuation;
201 self.index += 1;
202 },
203 else => {
204 self.index += 1;
205 },
206 },
207 .prereq_continuation => switch (char) {
208 '\n' => {
209 self.index += 1;
210 self.state = .rhs;
211 return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
212 },
213 '\r' => {
214 self.state = .prereq_continuation_linefeed;
215 self.index += 1;
216 },
217 '\\' => {
218 // The previous \ wasn't a continuation, but this one might be.
219 self.index += 1;
220 },
221 ' ' => {
222 // not continuation, but escaped space must be resolved
223 must_resolve = true;
224 self.state = .prereq;
225 self.index += 1;
226 },
227 else => {
228 // not continuation
229 self.state = .prereq;
230 self.index += 1;
231 },
232 },
233 .prereq_continuation_linefeed => switch (char) {
234 '\n' => {
235 self.index += 1;
236 self.state = .rhs;
237 return finishPrereq(must_resolve, self.bytes[start .. self.index - 3]);
238 },
239 else => {
240 return errorIllegalChar(.continuation_eol, self.index, char);
241 },
242 },
243 }
244 } else {
245 switch (self.state) {
246 .lhs,
247 .rhs,
248 .rhs_continuation,
249 .rhs_continuation_linefeed,
250 => return null,
251 .target => {
252 return errorPosition(.incomplete_target, start, self.bytes[start..]);
253 },
254 .target_reverse_solidus,
255 .target_dollar_sign,
256 => {
257 const idx = self.index - 1;
258 return errorIllegalChar(.incomplete_escape, idx, self.bytes[idx]);
259 },
260 .target_colon => {
261 const bytes = self.bytes[start .. self.index - 1];
262 if (bytes.len != 0) {
263 self.index += 1;
264 self.state = .rhs;
265 return finishTarget(must_resolve, bytes);
266 }
267 // silently ignore null target
268 self.state = .lhs;
269 return null;
270 },
271 .target_colon_reverse_solidus => {
272 const bytes = self.bytes[start .. self.index - 2];
273 if (bytes.len != 0) {
274 self.index += 1;
275 self.state = .rhs;
276 return finishTarget(must_resolve, bytes);
277 }
278 // silently ignore null target
279 self.state = .lhs;
280 return null;
281 },
282 .target_space => {
283 const idx = self.index - 1;
284 return errorIllegalChar(.expected_colon, idx, self.bytes[idx]);
285 },
286 .prereq_quote => {
287 return errorPosition(.incomplete_quoted_prerequisite, start, self.bytes[start..]);
288 },
289 .prereq => {
290 self.state = .lhs;
291 return finishPrereq(must_resolve, self.bytes[start..]);
292 },
293 .prereq_continuation => {
294 self.state = .lhs;
295 return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
296 },
297 .prereq_continuation_linefeed => {
298 self.state = .lhs;
299 return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
300 },
301 }
302 }
303 unreachable;
304}
305
306fn errorPosition(comptime id: std.meta.Tag(Token), index: usize, bytes: []const u8) Token {
307 return @unionInit(Token, @tagName(id), .{ .index = index, .bytes = bytes });
308}
309
310fn errorIllegalChar(comptime id: std.meta.Tag(Token), index: usize, char: u8) Token {
311 return @unionInit(Token, @tagName(id), .{ .index = index, .char = char });
312}
313
314fn finishTarget(must_resolve: bool, bytes: []const u8) Token {
315 return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes };
316}
317
318fn finishPrereq(must_resolve: bool, bytes: []const u8) Token {
319 return if (must_resolve) .{ .prereq_must_resolve = bytes } else .{ .prereq = bytes };
320}
321
322const State = enum {
323 lhs,
324 target,
325 target_reverse_solidus,
326 target_dollar_sign,
327 target_colon,
328 target_colon_reverse_solidus,
329 target_space,
330 rhs,
331 rhs_continuation,
332 rhs_continuation_linefeed,
333 prereq_quote,
334 prereq,
335 prereq_continuation,
336 prereq_continuation_linefeed,
337};
338
339pub const Token = union(enum) {
340 target: []const u8,
341 target_must_resolve: []const u8,
342 prereq: []const u8,
343 prereq_must_resolve: []const u8,
344
345 incomplete_quoted_prerequisite: IndexAndBytes,
346 incomplete_target: IndexAndBytes,
347
348 invalid_target: IndexAndChar,
349 bad_target_escape: IndexAndChar,
350 expected_dollar_sign: IndexAndChar,
351 continuation_eol: IndexAndChar,
352 incomplete_escape: IndexAndChar,
353 expected_colon: IndexAndChar,
354
355 pub const IndexAndChar = struct {
356 index: usize,
357 char: u8,
358 };
359
360 pub const IndexAndBytes = struct {
361 index: usize,
362 bytes: []const u8,
363 };
364
365 /// Resolve escapes in target or prereq. Only valid with .target_must_resolve or .prereq_must_resolve.
366 pub fn resolve(self: Token, gpa: Allocator, list: *std.ArrayList(u8)) error{OutOfMemory}!void {
367 switch (self) {
368 .target_must_resolve => |bytes| {
369 var state: enum { start, escape, dollar } = .start;
370 for (bytes) |c| {
371 switch (state) {
372 .start => {
373 switch (c) {
374 '\\' => state = .escape,
375 '$' => state = .dollar,
376 else => try list.append(gpa, c),
377 }
378 },
379 .escape => {
380 switch (c) {
381 ' ', '#', '\\' => {},
382 '$' => {
383 try list.append(gpa, '\\');
384 state = .dollar;
385 continue;
386 },
387 else => try list.append(gpa, '\\'),
388 }
389 try list.append(gpa, c);
390 state = .start;
391 },
392 .dollar => {
393 try list.append(gpa, '$');
394 switch (c) {
395 '$' => {},
396 else => try list.append(gpa, c),
397 }
398 state = .start;
399 },
400 }
401 }
402 },
403 .prereq_must_resolve => |bytes| {
404 var state: enum { start, escape } = .start;
405 for (bytes) |c| {
406 switch (state) {
407 .start => {
408 switch (c) {
409 '\\' => state = .escape,
410 else => try list.append(gpa, c),
411 }
412 },
413 .escape => {
414 switch (c) {
415 ' ' => {},
416 '\\' => {
417 try list.append(gpa, c);
418 continue;
419 },
420 else => try list.append(gpa, '\\'),
421 }
422 try list.append(gpa, c);
423 state = .start;
424 },
425 }
426 }
427 },
428 else => unreachable,
429 }
430 }
431
432 pub fn printError(self: Token, gpa: Allocator, list: *std.ArrayList(u8)) error{OutOfMemory}!void {
433 switch (self) {
434 .target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
435 .incomplete_quoted_prerequisite,
436 .incomplete_target,
437 => |index_and_bytes| {
438 try list.print(gpa, "{s} '", .{self.errStr()});
439 if (self == .incomplete_target) {
440 const tmp = Token{ .target_must_resolve = index_and_bytes.bytes };
441 try tmp.resolve(gpa, list);
442 } else {
443 try printCharValues(gpa, list, index_and_bytes.bytes);
444 }
445 try list.print(gpa, "' at position {d}", .{index_and_bytes.index});
446 },
447 .invalid_target,
448 .bad_target_escape,
449 .expected_dollar_sign,
450 .continuation_eol,
451 .incomplete_escape,
452 .expected_colon,
453 => |index_and_char| {
454 try list.appendSlice(gpa, "illegal char ");
455 try printUnderstandableChar(gpa, list, index_and_char.char);
456 try list.print(gpa, " at position {d}: {s}", .{ index_and_char.index, self.errStr() });
457 },
458 }
459 }
460
461 fn errStr(self: Token) []const u8 {
462 return switch (self) {
463 .target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
464 .incomplete_quoted_prerequisite => "incomplete quoted prerequisite",
465 .incomplete_target => "incomplete target",
466 .invalid_target => "invalid target",
467 .bad_target_escape => "bad target escape",
468 .expected_dollar_sign => "expecting '$'",
469 .continuation_eol => "continuation expecting end-of-line",
470 .incomplete_escape => "incomplete escape",
471 .expected_colon => "expecting ':'",
472 };
473 }
474};
475
476test "empty file" {
477 try depTokenizer("", "");
478}
479
480test "empty whitespace" {
481 try depTokenizer("\n", "");
482 try depTokenizer("\r", "");
483 try depTokenizer("\r\n", "");
484 try depTokenizer(" ", "");
485}
486
487test "empty colon" {
488 try depTokenizer(":", "");
489 try depTokenizer("\n:", "");
490 try depTokenizer("\r:", "");
491 try depTokenizer("\r\n:", "");
492 try depTokenizer(" :", "");
493}
494
495test "empty target" {
496 try depTokenizer("foo.o:", "target = {foo.o}");
497 try depTokenizer(
498 \\foo.o:
499 \\bar.o:
500 \\abcd.o:
501 ,
502 \\target = {foo.o}
503 \\target = {bar.o}
504 \\target = {abcd.o}
505 );
506}
507
508test "whitespace empty target" {
509 try depTokenizer("\nfoo.o:", "target = {foo.o}");
510 try depTokenizer("\rfoo.o:", "target = {foo.o}");
511 try depTokenizer("\r\nfoo.o:", "target = {foo.o}");
512 try depTokenizer(" foo.o:", "target = {foo.o}");
513}
514
515test "escape empty target" {
516 try depTokenizer("\\ foo.o:", "target = { foo.o}");
517 try depTokenizer("\\#foo.o:", "target = {#foo.o}");
518 try depTokenizer("\\\\foo.o:", "target = {\\foo.o}");
519 try depTokenizer("$$foo.o:", "target = {$foo.o}");
520}
521
522test "empty target linefeeds" {
523 try depTokenizer("\n", "");
524 try depTokenizer("\r\n", "");
525
526 const expect = "target = {foo.o}";
527 try depTokenizer(
528 \\foo.o:
529 , expect);
530 try depTokenizer(
531 \\foo.o:
532 \\
533 , expect);
534 try depTokenizer(
535 \\foo.o:
536 , expect);
537 try depTokenizer(
538 \\foo.o:
539 \\
540 , expect);
541}
542
543test "empty target linefeeds + continuations" {
544 const expect = "target = {foo.o}";
545 try depTokenizer(
546 \\foo.o:\
547 , expect);
548 try depTokenizer(
549 \\foo.o:\
550 \\
551 , expect);
552 try depTokenizer(
553 \\foo.o:\
554 , expect);
555 try depTokenizer(
556 \\foo.o:\
557 \\
558 , expect);
559}
560
561test "empty target linefeeds + hspace + continuations" {
562 const expect = "target = {foo.o}";
563 try depTokenizer(
564 \\foo.o: \
565 , expect);
566 try depTokenizer(
567 \\foo.o: \
568 \\
569 , expect);
570 try depTokenizer(
571 \\foo.o: \
572 , expect);
573 try depTokenizer(
574 \\foo.o: \
575 \\
576 , expect);
577}
578
579test "empty target + hspace + colon" {
580 const expect = "target = {foo.o}";
581
582 try depTokenizer("foo.o :", expect);
583 try depTokenizer("foo.o\t\t\t:", expect);
584 try depTokenizer("foo.o \t \t :", expect);
585 try depTokenizer("\r\nfoo.o :", expect);
586 try depTokenizer(" foo.o :", expect);
587}
588
589test "prereq" {
590 const expect =
591 \\target = {foo.o}
592 \\prereq = {foo.c}
593 ;
594 try depTokenizer("foo.o: foo.c", expect);
595 try depTokenizer(
596 \\foo.o: \
597 \\foo.c
598 , expect);
599 try depTokenizer(
600 \\foo.o: \
601 \\ foo.c
602 , expect);
603 try depTokenizer(
604 \\foo.o: \
605 \\ foo.c
606 , expect);
607}
608
609test "prereq continuation" {
610 const expect =
611 \\target = {foo.o}
612 \\prereq = {foo.h}
613 \\prereq = {bar.h}
614 ;
615 try depTokenizer(
616 \\foo.o: foo.h\
617 \\bar.h
618 , expect);
619 try depTokenizer(
620 \\foo.o: foo.h\
621 \\bar.h
622 , expect);
623}
624
625test "prereq continuation (CRLF)" {
626 const expect =
627 \\target = {foo.o}
628 \\prereq = {foo.h}
629 \\prereq = {bar.h}
630 ;
631 try depTokenizer("foo.o: foo.h\\\r\nbar.h", expect);
632}
633
634test "multiple prereqs" {
635 const expect =
636 \\target = {foo.o}
637 \\prereq = {foo.c}
638 \\prereq = {foo.h}
639 \\prereq = {bar.h}
640 ;
641 try depTokenizer("foo.o: foo.c foo.h bar.h", expect);
642 try depTokenizer(
643 \\foo.o: \
644 \\foo.c foo.h bar.h
645 , expect);
646 try depTokenizer(
647 \\foo.o: foo.c foo.h bar.h\
648 , expect);
649 try depTokenizer(
650 \\foo.o: foo.c foo.h bar.h\
651 \\
652 , expect);
653 try depTokenizer(
654 \\foo.o: \
655 \\foo.c \
656 \\ foo.h\
657 \\bar.h
658 \\
659 , expect);
660 try depTokenizer(
661 \\foo.o: \
662 \\foo.c \
663 \\ foo.h\
664 \\bar.h\
665 \\
666 , expect);
667 try depTokenizer(
668 \\foo.o: \
669 \\foo.c \
670 \\ foo.h\
671 \\bar.h\
672 , expect);
673}
674
675test "multiple targets and prereqs" {
676 try depTokenizer(
677 \\foo.o: foo.c
678 \\bar.o: bar.c a.h b.h c.h
679 \\abc.o: abc.c \
680 \\ one.h two.h \
681 \\ three.h four.h
682 ,
683 \\target = {foo.o}
684 \\prereq = {foo.c}
685 \\target = {bar.o}
686 \\prereq = {bar.c}
687 \\prereq = {a.h}
688 \\prereq = {b.h}
689 \\prereq = {c.h}
690 \\target = {abc.o}
691 \\prereq = {abc.c}
692 \\prereq = {one.h}
693 \\prereq = {two.h}
694 \\prereq = {three.h}
695 \\prereq = {four.h}
696 );
697 try depTokenizer(
698 \\ascii.o: ascii.c
699 \\base64.o: base64.c stdio.h
700 \\elf.o: elf.c a.h b.h c.h
701 \\macho.o: \
702 \\ macho.c\
703 \\ a.h b.h c.h
704 ,
705 \\target = {ascii.o}
706 \\prereq = {ascii.c}
707 \\target = {base64.o}
708 \\prereq = {base64.c}
709 \\prereq = {stdio.h}
710 \\target = {elf.o}
711 \\prereq = {elf.c}
712 \\prereq = {a.h}
713 \\prereq = {b.h}
714 \\prereq = {c.h}
715 \\target = {macho.o}
716 \\prereq = {macho.c}
717 \\prereq = {a.h}
718 \\prereq = {b.h}
719 \\prereq = {c.h}
720 );
721 try depTokenizer(
722 \\a$$scii.o: ascii.c
723 \\\\base64.o: "\base64.c" "s t#dio.h"
724 \\e\\lf.o: "e\lf.c" "a.h$$" "$$b.h c.h$$"
725 \\macho.o: \
726 \\ "macho!.c" \
727 \\ a.h b.h c.h
728 ,
729 \\target = {a$scii.o}
730 \\prereq = {ascii.c}
731 \\target = {\base64.o}
732 \\prereq = {\base64.c}
733 \\prereq = {s t#dio.h}
734 \\target = {e\lf.o}
735 \\prereq = {e\lf.c}
736 \\prereq = {a.h$$}
737 \\prereq = {$$b.h c.h$$}
738 \\target = {macho.o}
739 \\prereq = {macho!.c}
740 \\prereq = {a.h}
741 \\prereq = {b.h}
742 \\prereq = {c.h}
743 );
744}
745
746test "windows quoted prereqs" {
747 try depTokenizer(
748 \\c:\foo.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo.c"
749 \\c:\foo2.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo2.c" \
750 \\ "C:\Program Files (x86)\Microsoft Visual Studio\foo1.h" \
751 \\ "C:\Program Files (x86)\Microsoft Visual Studio\foo2.h"
752 ,
753 \\target = {c:\foo.o}
754 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo.c}
755 \\target = {c:\foo2.o}
756 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.c}
757 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo1.h}
758 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.h}
759 );
760}
761
762test "windows mixed prereqs" {
763 try depTokenizer(
764 \\cimport.o: \
765 \\ C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h \
766 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h" \
767 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h" \
768 \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h" \
769 \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h" \
770 \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h" \
771 \\ C:\msys64\opt\zig\lib\zig\include\vadefs.h \
772 \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h" \
773 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h" \
774 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h" \
775 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h" \
776 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h" \
777 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h" \
778 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h" \
779 \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h" \
780 \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h"
781 ,
782 \\target = {cimport.o}
783 \\prereq = {C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h}
784 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h}
785 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h}
786 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h}
787 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h}
788 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h}
789 \\prereq = {C:\msys64\opt\zig\lib\zig\include\vadefs.h}
790 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h}
791 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h}
792 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h}
793 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h}
794 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h}
795 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h}
796 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h}
797 \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h}
798 \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h}
799 );
800}
801
802test "windows funky targets" {
803 try depTokenizer(
804 \\C:\Users\anon\foo.o:
805 \\C:\Users\anon\foo\ .o:
806 \\C:\Users\anon\foo\#.o:
807 \\C:\Users\anon\foo$$.o:
808 \\C:\Users\anon\\\ foo.o:
809 \\C:\Users\anon\\#foo.o:
810 \\C:\Users\anon\$$foo.o:
811 \\C:\Users\anon\\\ \ \ \ \ foo.o:
812 ,
813 \\target = {C:\Users\anon\foo.o}
814 \\target = {C:\Users\anon\foo .o}
815 \\target = {C:\Users\anon\foo#.o}
816 \\target = {C:\Users\anon\foo$.o}
817 \\target = {C:\Users\anon\ foo.o}
818 \\target = {C:\Users\anon\#foo.o}
819 \\target = {C:\Users\anon\$foo.o}
820 \\target = {C:\Users\anon\ foo.o}
821 );
822}
823
824test "windows funky prereqs" {
825 // Note we don't support unquoted escaped spaces at the very beginning of a relative path
826 // e.g. `\ SpaceAtTheBeginning.c`
827 // This typically wouldn't be seen in the wild, since depfiles usually use absolute paths
828 // and supporting it would degrade error messages for cases where it was meant to be a
829 // continuation, but the line ending is missing.
830 try depTokenizer(
831 \\cimport.o: \
832 \\ trailingbackslash\\
833 \\ C:\Users\John\ Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c \
834 \\ somedir\\ a.c\
835 \\ somedir/\ a.c\
836 \\ somedir\\ \ \ b.c\
837 \\ somedir\\ \\ \c.c\
838 \\
839 ,
840 \\target = {cimport.o}
841 \\prereq = {trailingbackslash\}
842 \\prereq = {C:\Users\John Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c}
843 \\prereq = {somedir\ a.c}
844 \\prereq = {somedir/ a.c}
845 \\prereq = {somedir\ b.c}
846 \\prereq = {somedir\ \ \c.c}
847 );
848}
849
850test "windows drive and forward slashes" {
851 try depTokenizer(
852 \\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \
853 \\ C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c
854 ,
855 \\target = {C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj}
856 \\prereq = {C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c}
857 );
858}
859
860test "error incomplete escape - reverse_solidus" {
861 try depTokenizer("\\",
862 \\ERROR: illegal char '\' at position 0: incomplete escape
863 );
864 try depTokenizer("\t\\",
865 \\ERROR: illegal char '\' at position 1: incomplete escape
866 );
867 try depTokenizer("\n\\",
868 \\ERROR: illegal char '\' at position 1: incomplete escape
869 );
870 try depTokenizer("\r\\",
871 \\ERROR: illegal char '\' at position 1: incomplete escape
872 );
873 try depTokenizer("\r\n\\",
874 \\ERROR: illegal char '\' at position 2: incomplete escape
875 );
876 try depTokenizer(" \\",
877 \\ERROR: illegal char '\' at position 1: incomplete escape
878 );
879}
880
881test "error incomplete escape - dollar_sign" {
882 try depTokenizer("$",
883 \\ERROR: illegal char '$' at position 0: incomplete escape
884 );
885 try depTokenizer("\t$",
886 \\ERROR: illegal char '$' at position 1: incomplete escape
887 );
888 try depTokenizer("\n$",
889 \\ERROR: illegal char '$' at position 1: incomplete escape
890 );
891 try depTokenizer("\r$",
892 \\ERROR: illegal char '$' at position 1: incomplete escape
893 );
894 try depTokenizer("\r\n$",
895 \\ERROR: illegal char '$' at position 2: incomplete escape
896 );
897 try depTokenizer(" $",
898 \\ERROR: illegal char '$' at position 1: incomplete escape
899 );
900}
901
902test "error incomplete target" {
903 try depTokenizer("foo.o",
904 \\ERROR: incomplete target 'foo.o' at position 0
905 );
906 try depTokenizer("\tfoo.o",
907 \\ERROR: incomplete target 'foo.o' at position 1
908 );
909 try depTokenizer("\nfoo.o",
910 \\ERROR: incomplete target 'foo.o' at position 1
911 );
912 try depTokenizer("\rfoo.o",
913 \\ERROR: incomplete target 'foo.o' at position 1
914 );
915 try depTokenizer("\r\nfoo.o",
916 \\ERROR: incomplete target 'foo.o' at position 2
917 );
918 try depTokenizer(" foo.o",
919 \\ERROR: incomplete target 'foo.o' at position 1
920 );
921
922 try depTokenizer("\\ foo.o",
923 \\ERROR: incomplete target ' foo.o' at position 0
924 );
925 try depTokenizer("\\#foo.o",
926 \\ERROR: incomplete target '#foo.o' at position 0
927 );
928 try depTokenizer("\\\\foo.o",
929 \\ERROR: incomplete target '\foo.o' at position 0
930 );
931 try depTokenizer("$$foo.o",
932 \\ERROR: incomplete target '$foo.o' at position 0
933 );
934}
935
936test "error illegal char at position - bad target escape" {
937 try depTokenizer("\\\t",
938 \\ERROR: illegal char \x09 at position 1: bad target escape
939 );
940 try depTokenizer("\\\n",
941 \\ERROR: illegal char \x0A at position 1: bad target escape
942 );
943 try depTokenizer("\\\r",
944 \\ERROR: illegal char \x0D at position 1: bad target escape
945 );
946 try depTokenizer("\\\r\n",
947 \\ERROR: illegal char \x0D at position 1: bad target escape
948 );
949}
950
951test "error illegal char at position - expecting dollar_sign" {
952 try depTokenizer("$\t",
953 \\ERROR: illegal char \x09 at position 1: expecting '$'
954 );
955 try depTokenizer("$\n",
956 \\ERROR: illegal char \x0A at position 1: expecting '$'
957 );
958 try depTokenizer("$\r",
959 \\ERROR: illegal char \x0D at position 1: expecting '$'
960 );
961 try depTokenizer("$\r\n",
962 \\ERROR: illegal char \x0D at position 1: expecting '$'
963 );
964}
965
966test "error illegal char at position - invalid target" {
967 try depTokenizer("foo\n.o",
968 \\ERROR: illegal char \x0A at position 3: invalid target
969 );
970 try depTokenizer("foo\r.o",
971 \\ERROR: illegal char \x0D at position 3: invalid target
972 );
973 try depTokenizer("foo\r\n.o",
974 \\ERROR: illegal char \x0D at position 3: invalid target
975 );
976}
977
978test "error target - continuation expecting end-of-line" {
979 try depTokenizer("foo.o: \\\t",
980 \\target = {foo.o}
981 \\ERROR: illegal char \x09 at position 8: continuation expecting end-of-line
982 );
983 try depTokenizer("foo.o: \\ ",
984 \\target = {foo.o}
985 \\ERROR: illegal char ' ' at position 8: continuation expecting end-of-line
986 );
987 try depTokenizer("foo.o: \\x",
988 \\target = {foo.o}
989 \\ERROR: illegal char 'x' at position 8: continuation expecting end-of-line
990 );
991 try depTokenizer("foo.o: \\\x0dx",
992 \\target = {foo.o}
993 \\ERROR: illegal char 'x' at position 9: continuation expecting end-of-line
994 );
995}
996
997test "error prereq - continuation expecting end-of-line" {
998 try depTokenizer("foo.o: foo.h\\\x0dx",
999 \\target = {foo.o}
1000 \\ERROR: illegal char 'x' at position 14: continuation expecting end-of-line
1001 );
1002}
1003
1004test "error illegal char at position - expecting colon" {
1005 try depTokenizer("foo\t.o:",
1006 \\target = {foo}
1007 \\ERROR: illegal char '.' at position 4: expecting ':'
1008 );
1009 try depTokenizer("foo .o:",
1010 \\target = {foo}
1011 \\ERROR: illegal char '.' at position 4: expecting ':'
1012 );
1013 try depTokenizer("foo \n.o:",
1014 \\target = {foo}
1015 \\ERROR: illegal char \x0A at position 4: expecting ':'
1016 );
1017 try depTokenizer("foo.o\t\n:",
1018 \\target = {foo.o}
1019 \\ERROR: illegal char \x0A at position 6: expecting ':'
1020 );
1021}
1022
1023// - tokenize input, emit textual representation, and compare to expect
1024fn depTokenizer(input: []const u8, expect: []const u8) !void {
1025 var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
1026 const arena = arena_allocator.allocator();
1027 defer arena_allocator.deinit();
1028
1029 var it: Tokenizer = .{ .bytes = input };
1030 var buffer: std.ArrayList(u8) = .empty;
1031 var resolve_buf: std.ArrayList(u8) = .empty;
1032 var i: usize = 0;
1033 while (it.next()) |token| {
1034 if (i != 0) try buffer.appendSlice(arena, "\n");
1035 switch (token) {
1036 .target, .prereq => |bytes| {
1037 try buffer.appendSlice(arena, @tagName(token));
1038 try buffer.appendSlice(arena, " = {");
1039 for (bytes) |b| {
1040 try buffer.append(arena, printable_char_tab[b]);
1041 }
1042 try buffer.appendSlice(arena, "}");
1043 },
1044 .target_must_resolve => {
1045 try buffer.appendSlice(arena, "target = {");
1046 try token.resolve(arena, &resolve_buf);
1047 for (resolve_buf.items) |b| {
1048 try buffer.append(arena, printable_char_tab[b]);
1049 }
1050 resolve_buf.items.len = 0;
1051 try buffer.appendSlice(arena, "}");
1052 },
1053 .prereq_must_resolve => {
1054 try buffer.appendSlice(arena, "prereq = {");
1055 try token.resolve(arena, &resolve_buf);
1056 for (resolve_buf.items) |b| {
1057 try buffer.append(arena, printable_char_tab[b]);
1058 }
1059 resolve_buf.items.len = 0;
1060 try buffer.appendSlice(arena, "}");
1061 },
1062 else => {
1063 try buffer.appendSlice(arena, "ERROR: ");
1064 try token.printError(arena, &buffer);
1065 break;
1066 },
1067 }
1068 i += 1;
1069 }
1070
1071 if (std.mem.eql(u8, expect, buffer.items)) {
1072 try testing.expect(true);
1073 return;
1074 }
1075
1076 try testing.expectEqualStrings(expect, buffer.items);
1077}
1078
1079fn printCharValues(gpa: Allocator, list: *std.ArrayList(u8), bytes: []const u8) !void {
1080 for (bytes) |b| try list.append(gpa, printable_char_tab[b]);
1081}
1082
1083fn printUnderstandableChar(gpa: Allocator, list: *std.ArrayList(u8), char: u8) !void {
1084 if (std.ascii.isPrint(char)) {
1085 try list.print(gpa, "'{c}'", .{char});
1086 } else {
1087 try list.print(gpa, "\\x{X:0>2}", .{char});
1088 }
1089}
1090
1091// zig fmt: off
1092const printable_char_tab: [256]u8 = (
1093 "................................ !\"#$%&'()*+,-./0123456789:;<=>?" ++
1094 "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." ++
1095 "................................................................" ++
1096 "................................................................"
1097).*;