master
   1const Tokenizer = @This();
   2
   3index: usize = 0,
   4bytes: []const u8,
   5state: State = .lhs,
   6
   7const std = @import("std");
   8const testing = std.testing;
   9const assert = std.debug.assert;
  10const Allocator = std.mem.Allocator;
  11
  12pub fn next(self: *Tokenizer) ?Token {
  13    var start = self.index;
  14    var must_resolve = false;
  15    while (self.index < self.bytes.len) {
  16        const char = self.bytes[self.index];
  17        switch (self.state) {
  18            .lhs => switch (char) {
  19                '\t', '\n', '\r', ' ' => {
  20                    // silently ignore whitespace
  21                    self.index += 1;
  22                },
  23                else => {
  24                    start = self.index;
  25                    self.state = .target;
  26                },
  27            },
  28            .target => switch (char) {
  29                '\n', '\r' => {
  30                    return errorIllegalChar(.invalid_target, self.index, char);
  31                },
  32                '$' => {
  33                    self.state = .target_dollar_sign;
  34                    self.index += 1;
  35                },
  36                '\\' => {
  37                    self.state = .target_reverse_solidus;
  38                    self.index += 1;
  39                },
  40                ':' => {
  41                    self.state = .target_colon;
  42                    self.index += 1;
  43                },
  44                '\t', ' ' => {
  45                    self.state = .target_space;
  46
  47                    const bytes = self.bytes[start..self.index];
  48                    std.debug.assert(bytes.len != 0);
  49                    self.index += 1;
  50
  51                    return finishTarget(must_resolve, bytes);
  52                },
  53                else => {
  54                    self.index += 1;
  55                },
  56            },
  57            .target_reverse_solidus => switch (char) {
  58                '\t', '\n', '\r' => {
  59                    return errorIllegalChar(.bad_target_escape, self.index, char);
  60                },
  61                ' ', '#', '\\' => {
  62                    must_resolve = true;
  63                    self.state = .target;
  64                    self.index += 1;
  65                },
  66                '$' => {
  67                    self.state = .target_dollar_sign;
  68                    self.index += 1;
  69                },
  70                else => {
  71                    self.state = .target;
  72                    self.index += 1;
  73                },
  74            },
  75            .target_dollar_sign => switch (char) {
  76                '$' => {
  77                    must_resolve = true;
  78                    self.state = .target;
  79                    self.index += 1;
  80                },
  81                else => {
  82                    return errorIllegalChar(.expected_dollar_sign, self.index, char);
  83                },
  84            },
  85            .target_colon => switch (char) {
  86                '\n', '\r' => {
  87                    const bytes = self.bytes[start .. self.index - 1];
  88                    if (bytes.len != 0) {
  89                        self.state = .lhs;
  90                        return finishTarget(must_resolve, bytes);
  91                    }
  92                    // silently ignore null target
  93                    self.state = .lhs;
  94                },
  95                '/', '\\' => {
  96                    self.state = .target_colon_reverse_solidus;
  97                    self.index += 1;
  98                },
  99                else => {
 100                    const bytes = self.bytes[start .. self.index - 1];
 101                    if (bytes.len != 0) {
 102                        self.state = .rhs;
 103                        return finishTarget(must_resolve, bytes);
 104                    }
 105                    // silently ignore null target
 106                    self.state = .lhs;
 107                },
 108            },
 109            .target_colon_reverse_solidus => switch (char) {
 110                '\n', '\r' => {
 111                    const bytes = self.bytes[start .. self.index - 2];
 112                    if (bytes.len != 0) {
 113                        self.state = .lhs;
 114                        return finishTarget(must_resolve, bytes);
 115                    }
 116                    // silently ignore null target
 117                    self.state = .lhs;
 118                },
 119                else => {
 120                    self.state = .target;
 121                },
 122            },
 123            .target_space => switch (char) {
 124                '\t', ' ' => {
 125                    // silently ignore additional horizontal whitespace
 126                    self.index += 1;
 127                },
 128                ':' => {
 129                    self.state = .rhs;
 130                    self.index += 1;
 131                },
 132                else => {
 133                    return errorIllegalChar(.expected_colon, self.index, char);
 134                },
 135            },
 136            .rhs => switch (char) {
 137                '\t', ' ' => {
 138                    // silently ignore horizontal whitespace
 139                    self.index += 1;
 140                },
 141                '\n', '\r' => {
 142                    self.state = .lhs;
 143                },
 144                '\\' => {
 145                    self.state = .rhs_continuation;
 146                    self.index += 1;
 147                },
 148                '"' => {
 149                    self.state = .prereq_quote;
 150                    self.index += 1;
 151                    start = self.index;
 152                },
 153                else => {
 154                    start = self.index;
 155                    self.state = .prereq;
 156                },
 157            },
 158            .rhs_continuation => switch (char) {
 159                '\n' => {
 160                    self.state = .rhs;
 161                    self.index += 1;
 162                },
 163                '\r' => {
 164                    self.state = .rhs_continuation_linefeed;
 165                    self.index += 1;
 166                },
 167                else => {
 168                    return errorIllegalChar(.continuation_eol, self.index, char);
 169                },
 170            },
 171            .rhs_continuation_linefeed => switch (char) {
 172                '\n' => {
 173                    self.state = .rhs;
 174                    self.index += 1;
 175                },
 176                else => {
 177                    return errorIllegalChar(.continuation_eol, self.index, char);
 178                },
 179            },
 180            .prereq_quote => switch (char) {
 181                '"' => {
 182                    self.index += 1;
 183                    self.state = .rhs;
 184                    return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
 185                },
 186                else => {
 187                    self.index += 1;
 188                },
 189            },
 190            .prereq => switch (char) {
 191                '\t', ' ' => {
 192                    self.state = .rhs;
 193                    return finishPrereq(must_resolve, self.bytes[start..self.index]);
 194                },
 195                '\n', '\r' => {
 196                    self.state = .lhs;
 197                    return finishPrereq(must_resolve, self.bytes[start..self.index]);
 198                },
 199                '\\' => {
 200                    self.state = .prereq_continuation;
 201                    self.index += 1;
 202                },
 203                else => {
 204                    self.index += 1;
 205                },
 206            },
 207            .prereq_continuation => switch (char) {
 208                '\n' => {
 209                    self.index += 1;
 210                    self.state = .rhs;
 211                    return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
 212                },
 213                '\r' => {
 214                    self.state = .prereq_continuation_linefeed;
 215                    self.index += 1;
 216                },
 217                '\\' => {
 218                    // The previous \ wasn't a continuation, but this one might be.
 219                    self.index += 1;
 220                },
 221                ' ' => {
 222                    // not continuation, but escaped space must be resolved
 223                    must_resolve = true;
 224                    self.state = .prereq;
 225                    self.index += 1;
 226                },
 227                else => {
 228                    // not continuation
 229                    self.state = .prereq;
 230                    self.index += 1;
 231                },
 232            },
 233            .prereq_continuation_linefeed => switch (char) {
 234                '\n' => {
 235                    self.index += 1;
 236                    self.state = .rhs;
 237                    return finishPrereq(must_resolve, self.bytes[start .. self.index - 3]);
 238                },
 239                else => {
 240                    return errorIllegalChar(.continuation_eol, self.index, char);
 241                },
 242            },
 243        }
 244    } else {
 245        switch (self.state) {
 246            .lhs,
 247            .rhs,
 248            .rhs_continuation,
 249            .rhs_continuation_linefeed,
 250            => return null,
 251            .target => {
 252                return errorPosition(.incomplete_target, start, self.bytes[start..]);
 253            },
 254            .target_reverse_solidus,
 255            .target_dollar_sign,
 256            => {
 257                const idx = self.index - 1;
 258                return errorIllegalChar(.incomplete_escape, idx, self.bytes[idx]);
 259            },
 260            .target_colon => {
 261                const bytes = self.bytes[start .. self.index - 1];
 262                if (bytes.len != 0) {
 263                    self.index += 1;
 264                    self.state = .rhs;
 265                    return finishTarget(must_resolve, bytes);
 266                }
 267                // silently ignore null target
 268                self.state = .lhs;
 269                return null;
 270            },
 271            .target_colon_reverse_solidus => {
 272                const bytes = self.bytes[start .. self.index - 2];
 273                if (bytes.len != 0) {
 274                    self.index += 1;
 275                    self.state = .rhs;
 276                    return finishTarget(must_resolve, bytes);
 277                }
 278                // silently ignore null target
 279                self.state = .lhs;
 280                return null;
 281            },
 282            .target_space => {
 283                const idx = self.index - 1;
 284                return errorIllegalChar(.expected_colon, idx, self.bytes[idx]);
 285            },
 286            .prereq_quote => {
 287                return errorPosition(.incomplete_quoted_prerequisite, start, self.bytes[start..]);
 288            },
 289            .prereq => {
 290                self.state = .lhs;
 291                return finishPrereq(must_resolve, self.bytes[start..]);
 292            },
 293            .prereq_continuation => {
 294                self.state = .lhs;
 295                return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
 296            },
 297            .prereq_continuation_linefeed => {
 298                self.state = .lhs;
 299                return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
 300            },
 301        }
 302    }
 303    unreachable;
 304}
 305
 306fn errorPosition(comptime id: std.meta.Tag(Token), index: usize, bytes: []const u8) Token {
 307    return @unionInit(Token, @tagName(id), .{ .index = index, .bytes = bytes });
 308}
 309
 310fn errorIllegalChar(comptime id: std.meta.Tag(Token), index: usize, char: u8) Token {
 311    return @unionInit(Token, @tagName(id), .{ .index = index, .char = char });
 312}
 313
 314fn finishTarget(must_resolve: bool, bytes: []const u8) Token {
 315    return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes };
 316}
 317
 318fn finishPrereq(must_resolve: bool, bytes: []const u8) Token {
 319    return if (must_resolve) .{ .prereq_must_resolve = bytes } else .{ .prereq = bytes };
 320}
 321
 322const State = enum {
 323    lhs,
 324    target,
 325    target_reverse_solidus,
 326    target_dollar_sign,
 327    target_colon,
 328    target_colon_reverse_solidus,
 329    target_space,
 330    rhs,
 331    rhs_continuation,
 332    rhs_continuation_linefeed,
 333    prereq_quote,
 334    prereq,
 335    prereq_continuation,
 336    prereq_continuation_linefeed,
 337};
 338
 339pub const Token = union(enum) {
 340    target: []const u8,
 341    target_must_resolve: []const u8,
 342    prereq: []const u8,
 343    prereq_must_resolve: []const u8,
 344
 345    incomplete_quoted_prerequisite: IndexAndBytes,
 346    incomplete_target: IndexAndBytes,
 347
 348    invalid_target: IndexAndChar,
 349    bad_target_escape: IndexAndChar,
 350    expected_dollar_sign: IndexAndChar,
 351    continuation_eol: IndexAndChar,
 352    incomplete_escape: IndexAndChar,
 353    expected_colon: IndexAndChar,
 354
 355    pub const IndexAndChar = struct {
 356        index: usize,
 357        char: u8,
 358    };
 359
 360    pub const IndexAndBytes = struct {
 361        index: usize,
 362        bytes: []const u8,
 363    };
 364
 365    /// Resolve escapes in target or prereq. Only valid with .target_must_resolve or .prereq_must_resolve.
 366    pub fn resolve(self: Token, gpa: Allocator, list: *std.ArrayList(u8)) error{OutOfMemory}!void {
 367        switch (self) {
 368            .target_must_resolve => |bytes| {
 369                var state: enum { start, escape, dollar } = .start;
 370                for (bytes) |c| {
 371                    switch (state) {
 372                        .start => {
 373                            switch (c) {
 374                                '\\' => state = .escape,
 375                                '$' => state = .dollar,
 376                                else => try list.append(gpa, c),
 377                            }
 378                        },
 379                        .escape => {
 380                            switch (c) {
 381                                ' ', '#', '\\' => {},
 382                                '$' => {
 383                                    try list.append(gpa, '\\');
 384                                    state = .dollar;
 385                                    continue;
 386                                },
 387                                else => try list.append(gpa, '\\'),
 388                            }
 389                            try list.append(gpa, c);
 390                            state = .start;
 391                        },
 392                        .dollar => {
 393                            try list.append(gpa, '$');
 394                            switch (c) {
 395                                '$' => {},
 396                                else => try list.append(gpa, c),
 397                            }
 398                            state = .start;
 399                        },
 400                    }
 401                }
 402            },
 403            .prereq_must_resolve => |bytes| {
 404                var state: enum { start, escape } = .start;
 405                for (bytes) |c| {
 406                    switch (state) {
 407                        .start => {
 408                            switch (c) {
 409                                '\\' => state = .escape,
 410                                else => try list.append(gpa, c),
 411                            }
 412                        },
 413                        .escape => {
 414                            switch (c) {
 415                                ' ' => {},
 416                                '\\' => {
 417                                    try list.append(gpa, c);
 418                                    continue;
 419                                },
 420                                else => try list.append(gpa, '\\'),
 421                            }
 422                            try list.append(gpa, c);
 423                            state = .start;
 424                        },
 425                    }
 426                }
 427            },
 428            else => unreachable,
 429        }
 430    }
 431
 432    pub fn printError(self: Token, gpa: Allocator, list: *std.ArrayList(u8)) error{OutOfMemory}!void {
 433        switch (self) {
 434            .target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
 435            .incomplete_quoted_prerequisite,
 436            .incomplete_target,
 437            => |index_and_bytes| {
 438                try list.print(gpa, "{s} '", .{self.errStr()});
 439                if (self == .incomplete_target) {
 440                    const tmp = Token{ .target_must_resolve = index_and_bytes.bytes };
 441                    try tmp.resolve(gpa, list);
 442                } else {
 443                    try printCharValues(gpa, list, index_and_bytes.bytes);
 444                }
 445                try list.print(gpa, "' at position {d}", .{index_and_bytes.index});
 446            },
 447            .invalid_target,
 448            .bad_target_escape,
 449            .expected_dollar_sign,
 450            .continuation_eol,
 451            .incomplete_escape,
 452            .expected_colon,
 453            => |index_and_char| {
 454                try list.appendSlice(gpa, "illegal char ");
 455                try printUnderstandableChar(gpa, list, index_and_char.char);
 456                try list.print(gpa, " at position {d}: {s}", .{ index_and_char.index, self.errStr() });
 457            },
 458        }
 459    }
 460
 461    fn errStr(self: Token) []const u8 {
 462        return switch (self) {
 463            .target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
 464            .incomplete_quoted_prerequisite => "incomplete quoted prerequisite",
 465            .incomplete_target => "incomplete target",
 466            .invalid_target => "invalid target",
 467            .bad_target_escape => "bad target escape",
 468            .expected_dollar_sign => "expecting '$'",
 469            .continuation_eol => "continuation expecting end-of-line",
 470            .incomplete_escape => "incomplete escape",
 471            .expected_colon => "expecting ':'",
 472        };
 473    }
 474};
 475
 476test "empty file" {
 477    try depTokenizer("", "");
 478}
 479
 480test "empty whitespace" {
 481    try depTokenizer("\n", "");
 482    try depTokenizer("\r", "");
 483    try depTokenizer("\r\n", "");
 484    try depTokenizer(" ", "");
 485}
 486
 487test "empty colon" {
 488    try depTokenizer(":", "");
 489    try depTokenizer("\n:", "");
 490    try depTokenizer("\r:", "");
 491    try depTokenizer("\r\n:", "");
 492    try depTokenizer(" :", "");
 493}
 494
 495test "empty target" {
 496    try depTokenizer("foo.o:", "target = {foo.o}");
 497    try depTokenizer(
 498        \\foo.o:
 499        \\bar.o:
 500        \\abcd.o:
 501    ,
 502        \\target = {foo.o}
 503        \\target = {bar.o}
 504        \\target = {abcd.o}
 505    );
 506}
 507
 508test "whitespace empty target" {
 509    try depTokenizer("\nfoo.o:", "target = {foo.o}");
 510    try depTokenizer("\rfoo.o:", "target = {foo.o}");
 511    try depTokenizer("\r\nfoo.o:", "target = {foo.o}");
 512    try depTokenizer(" foo.o:", "target = {foo.o}");
 513}
 514
 515test "escape empty target" {
 516    try depTokenizer("\\ foo.o:", "target = { foo.o}");
 517    try depTokenizer("\\#foo.o:", "target = {#foo.o}");
 518    try depTokenizer("\\\\foo.o:", "target = {\\foo.o}");
 519    try depTokenizer("$$foo.o:", "target = {$foo.o}");
 520}
 521
 522test "empty target linefeeds" {
 523    try depTokenizer("\n", "");
 524    try depTokenizer("\r\n", "");
 525
 526    const expect = "target = {foo.o}";
 527    try depTokenizer(
 528        \\foo.o:
 529    , expect);
 530    try depTokenizer(
 531        \\foo.o:
 532        \\
 533    , expect);
 534    try depTokenizer(
 535        \\foo.o:
 536    , expect);
 537    try depTokenizer(
 538        \\foo.o:
 539        \\
 540    , expect);
 541}
 542
 543test "empty target linefeeds + continuations" {
 544    const expect = "target = {foo.o}";
 545    try depTokenizer(
 546        \\foo.o:\
 547    , expect);
 548    try depTokenizer(
 549        \\foo.o:\
 550        \\
 551    , expect);
 552    try depTokenizer(
 553        \\foo.o:\
 554    , expect);
 555    try depTokenizer(
 556        \\foo.o:\
 557        \\
 558    , expect);
 559}
 560
 561test "empty target linefeeds + hspace + continuations" {
 562    const expect = "target = {foo.o}";
 563    try depTokenizer(
 564        \\foo.o: \
 565    , expect);
 566    try depTokenizer(
 567        \\foo.o: \
 568        \\
 569    , expect);
 570    try depTokenizer(
 571        \\foo.o: \
 572    , expect);
 573    try depTokenizer(
 574        \\foo.o: \
 575        \\
 576    , expect);
 577}
 578
 579test "empty target + hspace + colon" {
 580    const expect = "target = {foo.o}";
 581
 582    try depTokenizer("foo.o :", expect);
 583    try depTokenizer("foo.o\t\t\t:", expect);
 584    try depTokenizer("foo.o \t \t :", expect);
 585    try depTokenizer("\r\nfoo.o :", expect);
 586    try depTokenizer(" foo.o :", expect);
 587}
 588
 589test "prereq" {
 590    const expect =
 591        \\target = {foo.o}
 592        \\prereq = {foo.c}
 593    ;
 594    try depTokenizer("foo.o: foo.c", expect);
 595    try depTokenizer(
 596        \\foo.o: \
 597        \\foo.c
 598    , expect);
 599    try depTokenizer(
 600        \\foo.o: \
 601        \\ foo.c
 602    , expect);
 603    try depTokenizer(
 604        \\foo.o:    \
 605        \\    foo.c
 606    , expect);
 607}
 608
 609test "prereq continuation" {
 610    const expect =
 611        \\target = {foo.o}
 612        \\prereq = {foo.h}
 613        \\prereq = {bar.h}
 614    ;
 615    try depTokenizer(
 616        \\foo.o: foo.h\
 617        \\bar.h
 618    , expect);
 619    try depTokenizer(
 620        \\foo.o: foo.h\
 621        \\bar.h
 622    , expect);
 623}
 624
 625test "prereq continuation (CRLF)" {
 626    const expect =
 627        \\target = {foo.o}
 628        \\prereq = {foo.h}
 629        \\prereq = {bar.h}
 630    ;
 631    try depTokenizer("foo.o: foo.h\\\r\nbar.h", expect);
 632}
 633
 634test "multiple prereqs" {
 635    const expect =
 636        \\target = {foo.o}
 637        \\prereq = {foo.c}
 638        \\prereq = {foo.h}
 639        \\prereq = {bar.h}
 640    ;
 641    try depTokenizer("foo.o: foo.c foo.h bar.h", expect);
 642    try depTokenizer(
 643        \\foo.o: \
 644        \\foo.c foo.h bar.h
 645    , expect);
 646    try depTokenizer(
 647        \\foo.o: foo.c foo.h bar.h\
 648    , expect);
 649    try depTokenizer(
 650        \\foo.o: foo.c foo.h bar.h\
 651        \\
 652    , expect);
 653    try depTokenizer(
 654        \\foo.o: \
 655        \\foo.c       \
 656        \\     foo.h\
 657        \\bar.h
 658        \\
 659    , expect);
 660    try depTokenizer(
 661        \\foo.o: \
 662        \\foo.c       \
 663        \\     foo.h\
 664        \\bar.h\
 665        \\
 666    , expect);
 667    try depTokenizer(
 668        \\foo.o: \
 669        \\foo.c       \
 670        \\     foo.h\
 671        \\bar.h\
 672    , expect);
 673}
 674
 675test "multiple targets and prereqs" {
 676    try depTokenizer(
 677        \\foo.o: foo.c
 678        \\bar.o: bar.c a.h b.h c.h
 679        \\abc.o: abc.c \
 680        \\  one.h two.h \
 681        \\  three.h four.h
 682    ,
 683        \\target = {foo.o}
 684        \\prereq = {foo.c}
 685        \\target = {bar.o}
 686        \\prereq = {bar.c}
 687        \\prereq = {a.h}
 688        \\prereq = {b.h}
 689        \\prereq = {c.h}
 690        \\target = {abc.o}
 691        \\prereq = {abc.c}
 692        \\prereq = {one.h}
 693        \\prereq = {two.h}
 694        \\prereq = {three.h}
 695        \\prereq = {four.h}
 696    );
 697    try depTokenizer(
 698        \\ascii.o: ascii.c
 699        \\base64.o: base64.c stdio.h
 700        \\elf.o: elf.c a.h b.h c.h
 701        \\macho.o: \
 702        \\  macho.c\
 703        \\  a.h b.h c.h
 704    ,
 705        \\target = {ascii.o}
 706        \\prereq = {ascii.c}
 707        \\target = {base64.o}
 708        \\prereq = {base64.c}
 709        \\prereq = {stdio.h}
 710        \\target = {elf.o}
 711        \\prereq = {elf.c}
 712        \\prereq = {a.h}
 713        \\prereq = {b.h}
 714        \\prereq = {c.h}
 715        \\target = {macho.o}
 716        \\prereq = {macho.c}
 717        \\prereq = {a.h}
 718        \\prereq = {b.h}
 719        \\prereq = {c.h}
 720    );
 721    try depTokenizer(
 722        \\a$$scii.o: ascii.c
 723        \\\\base64.o: "\base64.c" "s t#dio.h"
 724        \\e\\lf.o: "e\lf.c" "a.h$$" "$$b.h c.h$$"
 725        \\macho.o: \
 726        \\  "macho!.c" \
 727        \\  a.h b.h c.h
 728    ,
 729        \\target = {a$scii.o}
 730        \\prereq = {ascii.c}
 731        \\target = {\base64.o}
 732        \\prereq = {\base64.c}
 733        \\prereq = {s t#dio.h}
 734        \\target = {e\lf.o}
 735        \\prereq = {e\lf.c}
 736        \\prereq = {a.h$$}
 737        \\prereq = {$$b.h c.h$$}
 738        \\target = {macho.o}
 739        \\prereq = {macho!.c}
 740        \\prereq = {a.h}
 741        \\prereq = {b.h}
 742        \\prereq = {c.h}
 743    );
 744}
 745
 746test "windows quoted prereqs" {
 747    try depTokenizer(
 748        \\c:\foo.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo.c"
 749        \\c:\foo2.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo2.c" \
 750        \\  "C:\Program Files (x86)\Microsoft Visual Studio\foo1.h" \
 751        \\  "C:\Program Files (x86)\Microsoft Visual Studio\foo2.h"
 752    ,
 753        \\target = {c:\foo.o}
 754        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo.c}
 755        \\target = {c:\foo2.o}
 756        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.c}
 757        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo1.h}
 758        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.h}
 759    );
 760}
 761
 762test "windows mixed prereqs" {
 763    try depTokenizer(
 764        \\cimport.o: \
 765        \\  C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h \
 766        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h" \
 767        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h" \
 768        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h" \
 769        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h" \
 770        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h" \
 771        \\  C:\msys64\opt\zig\lib\zig\include\vadefs.h \
 772        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h" \
 773        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h" \
 774        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h" \
 775        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h" \
 776        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h" \
 777        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h" \
 778        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h" \
 779        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h" \
 780        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h"
 781    ,
 782        \\target = {cimport.o}
 783        \\prereq = {C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h}
 784        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h}
 785        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h}
 786        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h}
 787        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h}
 788        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h}
 789        \\prereq = {C:\msys64\opt\zig\lib\zig\include\vadefs.h}
 790        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h}
 791        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h}
 792        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h}
 793        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h}
 794        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h}
 795        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h}
 796        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h}
 797        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h}
 798        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h}
 799    );
 800}
 801
 802test "windows funky targets" {
 803    try depTokenizer(
 804        \\C:\Users\anon\foo.o:
 805        \\C:\Users\anon\foo\ .o:
 806        \\C:\Users\anon\foo\#.o:
 807        \\C:\Users\anon\foo$$.o:
 808        \\C:\Users\anon\\\ foo.o:
 809        \\C:\Users\anon\\#foo.o:
 810        \\C:\Users\anon\$$foo.o:
 811        \\C:\Users\anon\\\ \ \ \ \ foo.o:
 812    ,
 813        \\target = {C:\Users\anon\foo.o}
 814        \\target = {C:\Users\anon\foo .o}
 815        \\target = {C:\Users\anon\foo#.o}
 816        \\target = {C:\Users\anon\foo$.o}
 817        \\target = {C:\Users\anon\ foo.o}
 818        \\target = {C:\Users\anon\#foo.o}
 819        \\target = {C:\Users\anon\$foo.o}
 820        \\target = {C:\Users\anon\     foo.o}
 821    );
 822}
 823
 824test "windows funky prereqs" {
 825    // Note we don't support unquoted escaped spaces at the very beginning of a relative path
 826    // e.g. `\ SpaceAtTheBeginning.c`
 827    // This typically wouldn't be seen in the wild, since depfiles usually use absolute paths
 828    // and supporting it would degrade error messages for cases where it was meant to be a
 829    // continuation, but the line ending is missing.
 830    try depTokenizer(
 831        \\cimport.o: \
 832        \\  trailingbackslash\\
 833        \\  C:\Users\John\ Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c \
 834        \\  somedir\\ a.c\
 835        \\  somedir/\ a.c\
 836        \\  somedir\\ \ \ b.c\
 837        \\  somedir\\ \\ \c.c\
 838        \\
 839    ,
 840        \\target = {cimport.o}
 841        \\prereq = {trailingbackslash\}
 842        \\prereq = {C:\Users\John Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c}
 843        \\prereq = {somedir\ a.c}
 844        \\prereq = {somedir/ a.c}
 845        \\prereq = {somedir\   b.c}
 846        \\prereq = {somedir\ \ \c.c}
 847    );
 848}
 849
 850test "windows drive and forward slashes" {
 851    try depTokenizer(
 852        \\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \
 853        \\  C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c
 854    ,
 855        \\target = {C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj}
 856        \\prereq = {C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c}
 857    );
 858}
 859
 860test "error incomplete escape - reverse_solidus" {
 861    try depTokenizer("\\",
 862        \\ERROR: illegal char '\' at position 0: incomplete escape
 863    );
 864    try depTokenizer("\t\\",
 865        \\ERROR: illegal char '\' at position 1: incomplete escape
 866    );
 867    try depTokenizer("\n\\",
 868        \\ERROR: illegal char '\' at position 1: incomplete escape
 869    );
 870    try depTokenizer("\r\\",
 871        \\ERROR: illegal char '\' at position 1: incomplete escape
 872    );
 873    try depTokenizer("\r\n\\",
 874        \\ERROR: illegal char '\' at position 2: incomplete escape
 875    );
 876    try depTokenizer(" \\",
 877        \\ERROR: illegal char '\' at position 1: incomplete escape
 878    );
 879}
 880
 881test "error incomplete escape - dollar_sign" {
 882    try depTokenizer("$",
 883        \\ERROR: illegal char '$' at position 0: incomplete escape
 884    );
 885    try depTokenizer("\t$",
 886        \\ERROR: illegal char '$' at position 1: incomplete escape
 887    );
 888    try depTokenizer("\n$",
 889        \\ERROR: illegal char '$' at position 1: incomplete escape
 890    );
 891    try depTokenizer("\r$",
 892        \\ERROR: illegal char '$' at position 1: incomplete escape
 893    );
 894    try depTokenizer("\r\n$",
 895        \\ERROR: illegal char '$' at position 2: incomplete escape
 896    );
 897    try depTokenizer(" $",
 898        \\ERROR: illegal char '$' at position 1: incomplete escape
 899    );
 900}
 901
 902test "error incomplete target" {
 903    try depTokenizer("foo.o",
 904        \\ERROR: incomplete target 'foo.o' at position 0
 905    );
 906    try depTokenizer("\tfoo.o",
 907        \\ERROR: incomplete target 'foo.o' at position 1
 908    );
 909    try depTokenizer("\nfoo.o",
 910        \\ERROR: incomplete target 'foo.o' at position 1
 911    );
 912    try depTokenizer("\rfoo.o",
 913        \\ERROR: incomplete target 'foo.o' at position 1
 914    );
 915    try depTokenizer("\r\nfoo.o",
 916        \\ERROR: incomplete target 'foo.o' at position 2
 917    );
 918    try depTokenizer(" foo.o",
 919        \\ERROR: incomplete target 'foo.o' at position 1
 920    );
 921
 922    try depTokenizer("\\ foo.o",
 923        \\ERROR: incomplete target ' foo.o' at position 0
 924    );
 925    try depTokenizer("\\#foo.o",
 926        \\ERROR: incomplete target '#foo.o' at position 0
 927    );
 928    try depTokenizer("\\\\foo.o",
 929        \\ERROR: incomplete target '\foo.o' at position 0
 930    );
 931    try depTokenizer("$$foo.o",
 932        \\ERROR: incomplete target '$foo.o' at position 0
 933    );
 934}
 935
 936test "error illegal char at position - bad target escape" {
 937    try depTokenizer("\\\t",
 938        \\ERROR: illegal char \x09 at position 1: bad target escape
 939    );
 940    try depTokenizer("\\\n",
 941        \\ERROR: illegal char \x0A at position 1: bad target escape
 942    );
 943    try depTokenizer("\\\r",
 944        \\ERROR: illegal char \x0D at position 1: bad target escape
 945    );
 946    try depTokenizer("\\\r\n",
 947        \\ERROR: illegal char \x0D at position 1: bad target escape
 948    );
 949}
 950
 951test "error illegal char at position - expecting dollar_sign" {
 952    try depTokenizer("$\t",
 953        \\ERROR: illegal char \x09 at position 1: expecting '$'
 954    );
 955    try depTokenizer("$\n",
 956        \\ERROR: illegal char \x0A at position 1: expecting '$'
 957    );
 958    try depTokenizer("$\r",
 959        \\ERROR: illegal char \x0D at position 1: expecting '$'
 960    );
 961    try depTokenizer("$\r\n",
 962        \\ERROR: illegal char \x0D at position 1: expecting '$'
 963    );
 964}
 965
 966test "error illegal char at position - invalid target" {
 967    try depTokenizer("foo\n.o",
 968        \\ERROR: illegal char \x0A at position 3: invalid target
 969    );
 970    try depTokenizer("foo\r.o",
 971        \\ERROR: illegal char \x0D at position 3: invalid target
 972    );
 973    try depTokenizer("foo\r\n.o",
 974        \\ERROR: illegal char \x0D at position 3: invalid target
 975    );
 976}
 977
 978test "error target - continuation expecting end-of-line" {
 979    try depTokenizer("foo.o: \\\t",
 980        \\target = {foo.o}
 981        \\ERROR: illegal char \x09 at position 8: continuation expecting end-of-line
 982    );
 983    try depTokenizer("foo.o: \\ ",
 984        \\target = {foo.o}
 985        \\ERROR: illegal char ' ' at position 8: continuation expecting end-of-line
 986    );
 987    try depTokenizer("foo.o: \\x",
 988        \\target = {foo.o}
 989        \\ERROR: illegal char 'x' at position 8: continuation expecting end-of-line
 990    );
 991    try depTokenizer("foo.o: \\\x0dx",
 992        \\target = {foo.o}
 993        \\ERROR: illegal char 'x' at position 9: continuation expecting end-of-line
 994    );
 995}
 996
 997test "error prereq - continuation expecting end-of-line" {
 998    try depTokenizer("foo.o: foo.h\\\x0dx",
 999        \\target = {foo.o}
1000        \\ERROR: illegal char 'x' at position 14: continuation expecting end-of-line
1001    );
1002}
1003
1004test "error illegal char at position - expecting colon" {
1005    try depTokenizer("foo\t.o:",
1006        \\target = {foo}
1007        \\ERROR: illegal char '.' at position 4: expecting ':'
1008    );
1009    try depTokenizer("foo .o:",
1010        \\target = {foo}
1011        \\ERROR: illegal char '.' at position 4: expecting ':'
1012    );
1013    try depTokenizer("foo \n.o:",
1014        \\target = {foo}
1015        \\ERROR: illegal char \x0A at position 4: expecting ':'
1016    );
1017    try depTokenizer("foo.o\t\n:",
1018        \\target = {foo.o}
1019        \\ERROR: illegal char \x0A at position 6: expecting ':'
1020    );
1021}
1022
1023// - tokenize input, emit textual representation, and compare to expect
1024fn depTokenizer(input: []const u8, expect: []const u8) !void {
1025    var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
1026    const arena = arena_allocator.allocator();
1027    defer arena_allocator.deinit();
1028
1029    var it: Tokenizer = .{ .bytes = input };
1030    var buffer: std.ArrayList(u8) = .empty;
1031    var resolve_buf: std.ArrayList(u8) = .empty;
1032    var i: usize = 0;
1033    while (it.next()) |token| {
1034        if (i != 0) try buffer.appendSlice(arena, "\n");
1035        switch (token) {
1036            .target, .prereq => |bytes| {
1037                try buffer.appendSlice(arena, @tagName(token));
1038                try buffer.appendSlice(arena, " = {");
1039                for (bytes) |b| {
1040                    try buffer.append(arena, printable_char_tab[b]);
1041                }
1042                try buffer.appendSlice(arena, "}");
1043            },
1044            .target_must_resolve => {
1045                try buffer.appendSlice(arena, "target = {");
1046                try token.resolve(arena, &resolve_buf);
1047                for (resolve_buf.items) |b| {
1048                    try buffer.append(arena, printable_char_tab[b]);
1049                }
1050                resolve_buf.items.len = 0;
1051                try buffer.appendSlice(arena, "}");
1052            },
1053            .prereq_must_resolve => {
1054                try buffer.appendSlice(arena, "prereq = {");
1055                try token.resolve(arena, &resolve_buf);
1056                for (resolve_buf.items) |b| {
1057                    try buffer.append(arena, printable_char_tab[b]);
1058                }
1059                resolve_buf.items.len = 0;
1060                try buffer.appendSlice(arena, "}");
1061            },
1062            else => {
1063                try buffer.appendSlice(arena, "ERROR: ");
1064                try token.printError(arena, &buffer);
1065                break;
1066            },
1067        }
1068        i += 1;
1069    }
1070
1071    if (std.mem.eql(u8, expect, buffer.items)) {
1072        try testing.expect(true);
1073        return;
1074    }
1075
1076    try testing.expectEqualStrings(expect, buffer.items);
1077}
1078
1079fn printCharValues(gpa: Allocator, list: *std.ArrayList(u8), bytes: []const u8) !void {
1080    for (bytes) |b| try list.append(gpa, printable_char_tab[b]);
1081}
1082
1083fn printUnderstandableChar(gpa: Allocator, list: *std.ArrayList(u8), char: u8) !void {
1084    if (std.ascii.isPrint(char)) {
1085        try list.print(gpa, "'{c}'", .{char});
1086    } else {
1087        try list.print(gpa, "\\x{X:0>2}", .{char});
1088    }
1089}
1090
1091// zig fmt: off
1092const printable_char_tab: [256]u8 = (
1093    "................................ !\"#$%&'()*+,-./0123456789:;<=>?" ++
1094    "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." ++
1095    "................................................................" ++
1096    "................................................................"
1097).*;