Commit d4b241c14e

Michael Dusan <michael.dusan@gmail.com>
2019-05-28 01:47:10
new .d file parser for C compilation
- wip for #2046 - clang .d output must be created with `clang -MV` switch - implemented in Zig - hybridized for zig stage0 and stage1 - zig test src-self-hosted/dep_tokenizer.zig
1 parent f68d806
src/cache_hash.cpp
@@ -5,6 +5,7 @@
  * See http://opensource.org/licenses/MIT
  */
 
+#include "userland.h"
 #include "cache_hash.hpp"
 #include "all_types.hpp"
 #include "buffer.hpp"
@@ -473,71 +474,62 @@ Error cache_add_dep_file(CacheHash *ch, Buf *dep_file_path, bool verbose) {
         if (err == ErrorFileNotFound)
             return err;
         if (verbose) {
-            fprintf(stderr, "unable to read .d file: %s\n", err_str(err));
+            fprintf(stderr, "%s: unable to read .d file: %s\n", err_str(err), buf_ptr(dep_file_path));
         }
         return ErrorReadingDepFile;
     }
-    SplitIterator it = memSplit(buf_to_slice(contents), str("\r\n"));
-    // skip first line
-    SplitIterator_next(&it);
-    for (;;) {
-        Optional<Slice<uint8_t>> opt_line = SplitIterator_next(&it);
-        if (!opt_line.is_some)
-            break;
-        if (opt_line.value.len == 0)
-            continue;
-        // skip over indentation
-        while (opt_line.value.len != 0 && (opt_line.value.ptr[0] == ' ' || opt_line.value.ptr[0] == '\t')) {
-            opt_line.value.ptr += 1;
-            opt_line.value.len -= 1;
-        }
-        if (opt_line.value.len == 0)
-            continue;
-
-        if (opt_line.value.ptr[0] == '"') {
-            if (opt_line.value.len < 2) {
+    auto it = stage2_DepTokenizer_init(buf_ptr(contents), buf_len(contents));
+    // skip first token: target
+    {
+        auto result = stage2_DepTokenizer_next(&it);
+        switch (result.ent) {
+            case stage2_DepNextResult::error:
                 if (verbose) {
-                    fprintf(stderr, "unable to process invalid .d file %s: line too short\n", buf_ptr(dep_file_path));
+                    fprintf(stderr, "%s: failed processing .d file: %s\n", result.textz, buf_ptr(dep_file_path));
                 }
-                return ErrorInvalidDepFile;
-            }
-            opt_line.value.ptr += 1;
-            opt_line.value.len -= 2;
-            while (opt_line.value.len != 0 && opt_line.value.ptr[opt_line.value.len] != '"') {
-                opt_line.value.len -= 1;
-            }
-            if (opt_line.value.len == 0) {
-                if (verbose) {
-                    fprintf(stderr, "unable to process invalid .d file %s: missing double quote\n", buf_ptr(dep_file_path));
-                }
-                return ErrorInvalidDepFile;
-            }
-            Buf *filename_buf = buf_create_from_slice(opt_line.value);
-            if ((err = cache_add_file(ch, filename_buf))) {
+                err = ErrorInvalidDepFile;
+                goto finish;
+            case stage2_DepNextResult::null:
+                err = ErrorNone;
+                goto finish;
+            case stage2_DepNextResult::target:
+            case stage2_DepNextResult::prereq:
+                err = ErrorNone;
+                break;
+        }
+    }
+    // Process 0+ preqreqs.
+    // clang is invoked in single-source mode so we never get more targets.
+    for (;;) {
+        auto result = stage2_DepTokenizer_next(&it);
+        switch (result.ent) {
+            case stage2_DepNextResult::error:
                 if (verbose) {
-                    fprintf(stderr, "unable to add %s to cache: %s\n", buf_ptr(filename_buf), err_str(err));
-                    fprintf(stderr, "when processing .d file: %s\n", buf_ptr(dep_file_path));
-                }
-                return err;
-            }
-        } else {
-            // sometimes there are multiple files on the same line; we actually need space tokenization.
-            SplitIterator line_it = memSplit(opt_line.value, str(" \t"));
-            Slice<uint8_t> filename;
-            while (SplitIterator_next(&line_it).unwrap(&filename)) {
-                Buf *filename_buf = buf_create_from_slice(filename);
-                if (buf_eql_str(filename_buf, "\\")) continue;
-                if ((err = cache_add_file(ch, filename_buf))) {
-                    if (verbose) {
-                        fprintf(stderr, "unable to add %s to cache: %s\n", buf_ptr(filename_buf), err_str(err));
-                        fprintf(stderr, "when processing .d file: %s\n", buf_ptr(dep_file_path));
-                    }
-                    return err;
+                    fprintf(stderr, "%s: failed processing .d file: %s\n", result.textz, buf_ptr(dep_file_path));
                 }
+                err = ErrorInvalidDepFile;
+                goto finish;
+            case stage2_DepNextResult::null:
+            case stage2_DepNextResult::target:
+                err = ErrorNone;
+                goto finish;
+            case stage2_DepNextResult::prereq:
+                break;
+        }
+        auto textbuf = buf_alloc();
+        buf_init_from_str(textbuf, result.textz);
+        if ((err = cache_add_file(ch, textbuf))) {
+            if (verbose) {
+                fprintf(stderr, "unable to add %s to cache: %s\n", result.textz, err_str(err));
+                fprintf(stderr, "when processing .d file: %s\n", buf_ptr(dep_file_path));
             }
+            goto finish;
         }
     }
-    return ErrorNone;
+
+    finish:
+    stage2_DepTokenizer_deinit(&it);
+    return err;
 }
 
 static Error write_manifest_file(CacheHash *ch) {
src/userland.cpp
@@ -42,3 +42,18 @@ int stage2_fmt(int argc, char **argv) {
     const char *msg = "stage0 called stage2_fmt";
     stage2_panic(msg, strlen(msg));
 }
+
+stage2_DepTokenizer stage2_DepTokenizer_init(const char *input, size_t len) {
+    const char *msg = "stage0 called stage2_DepTokenizer_init";
+    stage2_panic(msg, strlen(msg));
+}
+
+void stage2_DepTokenizer_deinit(stage2_DepTokenizer *self) {
+    const char *msg = "stage0 called stage2_DepTokenizer_deinit";
+    stage2_panic(msg, strlen(msg));
+}
+
+stage2_DepNextResult stage2_DepTokenizer_next(stage2_DepTokenizer *self) {
+    const char *msg = "stage0 called stage2_DepTokenizer_next";
+    stage2_panic(msg, strlen(msg));
+}
src/userland.h
@@ -9,6 +9,7 @@
 #define ZIG_USERLAND_H
 
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 
 #ifdef __cplusplus
@@ -118,4 +119,36 @@ ZIG_EXTERN_C ZIG_ATTRIBUTE_NORETURN void stage2_panic(const char *ptr, size_t le
 // ABI warning
 ZIG_EXTERN_C int stage2_fmt(int argc, char **argv);
 
+// ABI warning
+struct stage2_DepTokenizer {
+    void *handle;
+};
+
+// ABI warning
+struct stage2_DepNextResult {
+    enum Ent : uint8_t {
+        error,
+        null,
+        target,
+        prereq,
+    };
+
+    Ent ent;
+
+    // when ent == error --> error text
+    // when ent == null --> undefined
+    // when ent == target --> target pathname
+    // when ent == prereq --> prereq pathname
+    const char *textz;
+};
+
+// ABI warning
+ZIG_EXTERN_C stage2_DepTokenizer stage2_DepTokenizer_init(const char *input, size_t len);
+
+// ABI warning
+ZIG_EXTERN_C void stage2_DepTokenizer_deinit(stage2_DepTokenizer *self);
+
+// ABI warning
+ZIG_EXTERN_C stage2_DepNextResult stage2_DepTokenizer_next(stage2_DepTokenizer *self);
+
 #endif
src-self-hosted/dep_tokenizer.zig
@@ -0,0 +1,1131 @@
+const std = @import("std");
+const testing = std.testing;
+
+pub const Tokenizer = struct {
+    arena: std.heap.ArenaAllocator,
+    index: usize,
+    bytes: []const u8,
+    error_text: []const u8,
+    state: State,
+
+    pub fn init(allocator: *std.mem.Allocator, bytes: []const u8) Tokenizer {
+        return Tokenizer{
+            .arena = std.heap.ArenaAllocator.init(allocator),
+            .index = 0,
+            .bytes = bytes,
+            .error_text = "",
+            .state = State{ .lhs = {} },
+        };
+    }
+
+    pub fn deinit(self: *Tokenizer) void {
+        self.arena.deinit();
+    }
+
+    pub fn next(self: *Tokenizer) Error!?Token {
+        while (self.index < self.bytes.len) {
+            const char = self.bytes[self.index];
+            while (true) {
+                switch (self.state) {
+                    .lhs => switch (char) {
+                        '\t', '\n', '\r', ' ' => {
+                            // silently ignore whitespace
+                            break; // advance
+                        },
+                        else => {
+                            self.state = State{ .target = try std.Buffer.initSize(&self.arena.allocator, 0) };
+                        },
+                    },
+                    .target => |*target| switch (char) {
+                        '\t', '\n', '\r', ' ' => {
+                            return self.errorIllegalChar(self.index, char, "invalid target");
+                        },
+                        '$' => {
+                            self.state = State{ .target_dollar_sign = target.* };
+                            break; // advance
+                        },
+                        '\\' => {
+                            self.state = State{ .target_reverse_solidus = target.* };
+                            break; // advance
+                        },
+                        ':' => {
+                            self.state = State{ .target_colon = target.* };
+                            break; // advance
+                        },
+                        else => {
+                            try target.appendByte(char);
+                            break; // advance
+                        },
+                    },
+                    .target_reverse_solidus => |*target| switch (char) {
+                        '\t', '\n', '\r' => {
+                            return self.errorIllegalChar(self.index, char, "bad target escape");
+                        },
+                        ' ', '#', '\\' => {
+                            try target.appendByte(char);
+                            self.state = State{ .target = target.* };
+                            break; // advance
+                        },
+                        '$' => {
+                            try target.append(self.bytes[self.index - 1 .. self.index]);
+                            self.state = State{ .target_dollar_sign = target.* };
+                            break; // advance
+                        },
+                        else => {
+                            try target.append(self.bytes[self.index - 1 .. self.index + 1]);
+                            self.state = State{ .target = target.* };
+                            break; // advance
+                        },
+                    },
+                    .target_dollar_sign => |*target| switch (char) {
+                        '$' => {
+                            try target.appendByte(char);
+                            self.state = State{ .target = target.* };
+                            break; // advance
+                        },
+                        else => {
+                            return self.errorIllegalChar(self.index, char, "expecting '$'");
+                        },
+                    },
+                    .target_colon => |*target| switch (char) {
+                        '\n', '\r' => {
+                            const bytes = target.toSlice();
+                            if (bytes.len != 0) {
+                                self.state = State{ .lhs = {} };
+                                return Token{ .id = .target, .bytes = bytes };
+                            }
+                            // silently ignore null target
+                            self.state = State{ .lhs = {} };
+                            continue;
+                        },
+                        '\\' => {
+                            self.state = State{ .target_colon_reverse_solidus = target.* };
+                            break; // advance
+                        },
+                        else => {
+                            const bytes = target.toSlice();
+                            if (bytes.len != 0) {
+                                self.state = State{ .rhs = {} };
+                                return Token{ .id = .target, .bytes = bytes };
+                            }
+                            // silently ignore null target
+                            self.state = State{ .lhs = {} };
+                            continue;
+                        },
+                    },
+                    .target_colon_reverse_solidus => |*target| switch (char) {
+                        '\n', '\r' => {
+                            const bytes = target.toSlice();
+                            if (bytes.len != 0) {
+                                self.state = State{ .lhs = {} };
+                                return Token{ .id = .target, .bytes = bytes };
+                            }
+                            // silently ignore null target
+                            self.state = State{ .lhs = {} };
+                            continue;
+                        },
+                        else => {
+                            try target.append(self.bytes[self.index - 2 .. self.index + 1]);
+                            self.state = State{ .target = target.* };
+                            break;
+                        },
+                    },
+                    .rhs => switch (char) {
+                        '\t', ' ' => {
+                            // silently ignore horizontal whitespace
+                            break; // advance
+                        },
+                        '\n', '\r' => {
+                            self.state = State{ .lhs = {} };
+                            continue;
+                        },
+                        '\\' => {
+                            self.state = State{ .rhs_continuation = {} };
+                            break; // advance
+                        },
+                        '"' => {
+                            self.state = State{ .prereq_quote = try std.Buffer.initSize(&self.arena.allocator, 0) };
+                            break; // advance
+                        },
+                        else => {
+                            self.state = State{ .prereq = try std.Buffer.initSize(&self.arena.allocator, 0) };
+                        },
+                    },
+                    .rhs_continuation => switch (char) {
+                        '\n' => {
+                            self.state = State{ .rhs = {} };
+                            break; // advance
+                        },
+                        '\r' => {
+                            self.state = State{ .rhs_continuation_linefeed = {} };
+                            break; // advance
+                        },
+                        else => {
+                            return self.errorIllegalChar(self.index, char, "continuation expecting end-of-line");
+                        },
+                    },
+                    .rhs_continuation_linefeed => switch (char) {
+                        '\n' => {
+                            self.state = State{ .rhs = {} };
+                            break; // advance
+                        },
+                        else => {
+                            return self.errorIllegalChar(self.index, char, "continuation expecting end-of-line");
+                        },
+                    },
+                    .prereq_quote => |*prereq| switch (char) {
+                        '"' => {
+                            const bytes = prereq.toSlice();
+                            self.index += 1;
+                            self.state = State{ .rhs = {} };
+                            return Token{ .id = .prereq, .bytes = bytes };
+                        },
+                        else => {
+                            try prereq.appendByte(char);
+                            break; // advance
+                        },
+                    },
+                    .prereq => |*prereq| switch (char) {
+                        '\t', ' ' => {
+                            const bytes = prereq.toSlice();
+                            self.state = State{ .rhs = {} };
+                            return Token{ .id = .prereq, .bytes = bytes };
+                        },
+                        '\n', '\r' => {
+                            const bytes = prereq.toSlice();
+                            self.state = State{ .lhs = {} };
+                            return Token{ .id = .prereq, .bytes = bytes };
+                        },
+                        '\\' => {
+                            self.state = State{ .prereq_continuation = prereq.* };
+                            break; // advance
+                        },
+                        else => {
+                            try prereq.appendByte(char);
+                            break; // advance
+                        },
+                    },
+                    .prereq_continuation => |*prereq| switch (char) {
+                        '\n' => {
+                            const bytes = prereq.toSlice();
+                            self.index += 1;
+                            self.state = State{ .rhs = {} };
+                            return Token{ .id = .prereq, .bytes = bytes };
+                        },
+                        '\r' => {
+                            self.state = State{ .prereq_continuation_linefeed = prereq.* };
+                            break; // advance
+                        },
+                        else => {
+                            // not continuation
+                            try prereq.append(self.bytes[self.index - 1 .. self.index + 1]);
+                            self.state = State{ .prereq = prereq.* };
+                            break; // advance
+                        },
+                    },
+                    .prereq_continuation_linefeed => |prereq| switch (char) {
+                        '\n' => {
+                            const bytes = prereq.toSlice();
+                            self.index += 1;
+                            self.state = State{ .rhs = {} };
+                            return Token{ .id = .prereq, .bytes = bytes };
+                        },
+                        else => {
+                            return self.errorIllegalChar(self.index, char, "continuation expecting end-of-line");
+                        },
+                    },
+                }
+            }
+            self.index += 1;
+        }
+
+        // eof, handle maybe incomplete token
+        if (self.index == 0) return null;
+        const idx = self.index - 1;
+        switch (self.state) {
+            .lhs,
+            .rhs,
+            .rhs_continuation,
+            .rhs_continuation_linefeed,
+            => {},
+            .target => |target| {
+                return self.errorPosition(idx, target.toSlice(), "incomplete target");
+            },
+            .target_reverse_solidus,
+            .target_dollar_sign,
+            => {
+                const index = self.index - 1;
+                return self.errorIllegalChar(idx, self.bytes[idx], "incomplete escape");
+            },
+            .target_colon => |target| {
+                const bytes = target.toSlice();
+                if (bytes.len != 0) {
+                    self.index += 1;
+                    self.state = State{ .rhs = {} };
+                    return Token{ .id = .target, .bytes = bytes };
+                }
+                // silently ignore null target
+                self.state = State{ .lhs = {} };
+            },
+            .target_colon_reverse_solidus => |target| {
+                const bytes = target.toSlice();
+                if (bytes.len != 0) {
+                    self.index += 1;
+                    self.state = State{ .rhs = {} };
+                    return Token{ .id = .target, .bytes = bytes };
+                }
+                // silently ignore null target
+                self.state = State{ .lhs = {} };
+            },
+            .prereq_quote => |prereq| {
+                return self.errorPosition(idx, prereq.toSlice(), "incomplete quoted prerequisite");
+            },
+            .prereq => |prereq| {
+                const bytes = prereq.toSlice();
+                self.state = State{ .lhs = {} };
+                return Token{ .id = .prereq, .bytes = bytes };
+            },
+            .prereq_continuation => |prereq| {
+                const bytes = prereq.toSlice();
+                self.state = State{ .lhs = {} };
+                return Token{ .id = .prereq, .bytes = bytes };
+            },
+            .prereq_continuation_linefeed => |prereq| {
+                const bytes = prereq.toSlice();
+                self.state = State{ .lhs = {} };
+                return Token{ .id = .prereq, .bytes = bytes };
+            },
+        }
+        return null;
+    }
+
+    fn errorf(self: *Tokenizer, comptime fmt: []const u8, args: ...) Error {
+        self.error_text = (try std.Buffer.allocPrint(&self.arena.allocator, fmt, args)).toSlice();
+        return Error.InvalidInput;
+    }
+
+    fn errorPosition(self: *Tokenizer, position: usize, bytes: []const u8, comptime fmt: []const u8, args: ...) Error {
+        var buffer = try std.Buffer.initSize(&self.arena.allocator, 0);
+        std.fmt.format(&buffer, anyerror, std.Buffer.append, fmt, args) catch {};
+        try buffer.append(" '");
+        var out = makeOutput(std.Buffer.append, &buffer);
+        try printCharValues(&out, bytes);
+        try buffer.append("'");
+        std.fmt.format(&buffer, anyerror, std.Buffer.append, " at position {}", position - (bytes.len - 1)) catch {};
+        self.error_text = buffer.toSlice();
+        return Error.InvalidInput;
+    }
+
+    fn errorIllegalChar(self: *Tokenizer, position: usize, char: u8, comptime fmt: []const u8, args: ...) Error {
+        var buffer = try std.Buffer.initSize(&self.arena.allocator, 0);
+        try buffer.append("illegal char ");
+        var out = makeOutput(std.Buffer.append, &buffer);
+        try printUnderstandableChar(&out, char);
+        std.fmt.format(&buffer, anyerror, std.Buffer.append, " at position {}", position) catch {};
+        if (fmt.len != 0) std.fmt.format(&buffer, anyerror, std.Buffer.append, ": " ++ fmt, args) catch {};
+        self.error_text = buffer.toSlice();
+        return Error.InvalidInput;
+    }
+
+    const Error = error{
+        OutOfMemory,
+        InvalidInput,
+    };
+
+    const State = union(enum) {
+        lhs: void,
+        target: std.Buffer,
+        target_reverse_solidus: std.Buffer,
+        target_dollar_sign: std.Buffer,
+        target_colon: std.Buffer,
+        target_colon_reverse_solidus: std.Buffer,
+        rhs: void,
+        rhs_continuation: void,
+        rhs_continuation_linefeed: void,
+        prereq_quote: std.Buffer,
+        prereq: std.Buffer,
+        prereq_continuation: std.Buffer,
+        prereq_continuation_linefeed: std.Buffer,
+    };
+
+    const Token = struct {
+        id: ID,
+        bytes: []const u8,
+
+        const ID = enum {
+            target,
+            prereq,
+        };
+    };
+};
+
+// stage1 compiler support
+var stage2_da = std.heap.DirectAllocator.init();
+
+export fn stage2_DepTokenizer_init(input: [*]const u8, len: usize) stage2_DepTokenizer {
+    const t = stage2_da.allocator.create(Tokenizer) catch unreachable;
+    t.* = Tokenizer.init(&stage2_da.allocator, input[0..len]);
+    return stage2_DepTokenizer{
+        .handle = t,
+    };
+}
+
+export fn stage2_DepTokenizer_deinit(self: *stage2_DepTokenizer) void {
+    self.handle.deinit();
+}
+
+export fn stage2_DepTokenizer_next(self: *stage2_DepTokenizer) stage2_DepNextResult {
+    const otoken = self.handle.next() catch {
+        return stage2_DepNextResult{
+            .ent = 0,
+            .textz = (std.Buffer.init(&self.handle.arena.allocator, self.handle.error_text) catch unreachable).toSlice().ptr,
+        };
+    };
+    const token = otoken orelse {
+        return stage2_DepNextResult{
+            .ent = 1,
+            .textz = undefined,
+        };
+    };
+    return stage2_DepNextResult{
+        .ent = @enumToInt(token.id) + u8(2),
+        .textz = (std.Buffer.init(&self.handle.arena.allocator, token.bytes) catch unreachable).toSlice().ptr,
+    };
+}
+
+export const stage2_DepTokenizer = extern struct {
+    handle: *Tokenizer,
+};
+
+export const stage2_DepNextResult = extern struct {
+    // 0=error, 1=null, 2=token=target, 3=token=prereq
+    ent: u8,
+    // ent=0 -- error text
+    // ent=1 -- NEVER
+    // ent=2 -- token text value
+    // ent=3 -- token text value
+    textz: [*]const u8,
+};
+
+test "empty file" {
+    try depTokenizer("", "");
+}
+
+test "empty whitespace" {
+    try depTokenizer("\n", "");
+    try depTokenizer("\r", "");
+    try depTokenizer("\r\n", "");
+    try depTokenizer(" ", "");
+}
+
+test "empty colon" {
+    try depTokenizer(":", "");
+    try depTokenizer("\n:", "");
+    try depTokenizer("\r:", "");
+    try depTokenizer("\r\n:", "");
+    try depTokenizer(" :", "");
+}
+
+test "empty target" {
+    try depTokenizer("foo.o:", "target = {foo.o}");
+    try depTokenizer(
+        \\foo.o:
+        \\bar.o:
+        \\abcd.o:
+    ,
+        \\target = {foo.o}
+        \\target = {bar.o}
+        \\target = {abcd.o}
+    );
+}
+
+test "whitespace empty target" {
+    try depTokenizer("\nfoo.o:", "target = {foo.o}");
+    try depTokenizer("\rfoo.o:", "target = {foo.o}");
+    try depTokenizer("\r\nfoo.o:", "target = {foo.o}");
+    try depTokenizer(" foo.o:", "target = {foo.o}");
+}
+
+test "escape empty target" {
+    try depTokenizer("\\ foo.o:", "target = { foo.o}");
+    try depTokenizer("\\#foo.o:", "target = {#foo.o}");
+    try depTokenizer("\\\\foo.o:", "target = {\\foo.o}");
+    try depTokenizer("$$foo.o:", "target = {$foo.o}");
+}
+
+test "empty target linefeeds" {
+    try depTokenizer("\n", "");
+    try depTokenizer("\r\n", "");
+
+    const expect = "target = {foo.o}";
+    try depTokenizer(
+        \\foo.o:
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o:
+        \\
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o:
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o:
+        \\
+    ,
+        expect
+    );
+}
+
+test "empty target linefeeds + continuations" {
+    const expect = "target = {foo.o}";
+    try depTokenizer(
+        \\foo.o:\
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o:\
+        \\
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o:\
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o:\
+        \\
+    ,
+        expect
+    );
+}
+
+test "empty target linefeeds + hspace + continuations" {
+    const expect = "target = {foo.o}";
+    try depTokenizer(
+        \\foo.o: \
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o: \
+        \\
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o: \
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o: \
+        \\
+    ,
+        expect
+    );
+}
+
+test "prereq" {
+    const expect =
+        \\target = {foo.o}
+        \\prereq = {foo.c}
+    ;
+    try depTokenizer("foo.o: foo.c", expect);
+    try depTokenizer(
+        \\foo.o: \
+        \\foo.c
+    , expect);
+    try depTokenizer(
+        \\foo.o: \
+        \\ foo.c
+    , expect);
+    try depTokenizer(
+        \\foo.o:    \
+        \\    foo.c
+    , expect);
+}
+
+test "prereq continuation" {
+    const expect =
+        \\target = {foo.o}
+        \\prereq = {foo.h}
+        \\prereq = {bar.h}
+    ;
+    try depTokenizer(
+        \\foo.o: foo.h\
+        \\bar.h
+    ,
+        expect
+    );
+    try depTokenizer(
+        \\foo.o: foo.h\
+        \\bar.h
+    ,
+        expect
+    );
+}
+
+test "multiple prereqs" {
+    const expect =
+        \\target = {foo.o}
+        \\prereq = {foo.c}
+        \\prereq = {foo.h}
+        \\prereq = {bar.h}
+    ;
+    try depTokenizer("foo.o: foo.c foo.h bar.h", expect);
+    try depTokenizer(
+        \\foo.o: \
+        \\foo.c foo.h bar.h
+    , expect);
+    try depTokenizer(
+        \\foo.o: foo.c foo.h bar.h\
+    , expect);
+    try depTokenizer(
+        \\foo.o: foo.c foo.h bar.h\
+        \\
+    , expect);
+    try depTokenizer(
+        \\foo.o: \
+        \\foo.c       \
+        \\     foo.h\
+        \\bar.h
+        \\
+    , expect);
+    try depTokenizer(
+        \\foo.o: \
+        \\foo.c       \
+        \\     foo.h\
+        \\bar.h\
+        \\
+    , expect);
+    try depTokenizer(
+        \\foo.o: \
+        \\foo.c       \
+        \\     foo.h\
+        \\bar.h\
+    , expect);
+}
+
+test "multiple targets and prereqs" {
+    try depTokenizer(
+        \\foo.o: foo.c
+        \\bar.o: bar.c a.h b.h c.h
+        \\abc.o: abc.c \
+        \\  one.h two.h \
+        \\  three.h four.h
+    ,
+        \\target = {foo.o}
+        \\prereq = {foo.c}
+        \\target = {bar.o}
+        \\prereq = {bar.c}
+        \\prereq = {a.h}
+        \\prereq = {b.h}
+        \\prereq = {c.h}
+        \\target = {abc.o}
+        \\prereq = {abc.c}
+        \\prereq = {one.h}
+        \\prereq = {two.h}
+        \\prereq = {three.h}
+        \\prereq = {four.h}
+    );
+    try depTokenizer(
+        \\ascii.o: ascii.c
+        \\base64.o: base64.c stdio.h
+        \\elf.o: elf.c a.h b.h c.h
+        \\macho.o: \
+        \\  macho.c\
+        \\  a.h b.h c.h
+    ,
+        \\target = {ascii.o}
+        \\prereq = {ascii.c}
+        \\target = {base64.o}
+        \\prereq = {base64.c}
+        \\prereq = {stdio.h}
+        \\target = {elf.o}
+        \\prereq = {elf.c}
+        \\prereq = {a.h}
+        \\prereq = {b.h}
+        \\prereq = {c.h}
+        \\target = {macho.o}
+        \\prereq = {macho.c}
+        \\prereq = {a.h}
+        \\prereq = {b.h}
+        \\prereq = {c.h}
+    );
+    try depTokenizer(
+        \\a$$scii.o: ascii.c
+        \\\\base64.o: "\base64.c" "s t#dio.h"
+        \\e\\lf.o: "e\lf.c" "a.h$$" "$$b.h c.h$$"
+        \\macho.o: \
+        \\  "macho!.c" \
+        \\  a.h b.h c.h
+    ,
+        \\target = {a$scii.o}
+        \\prereq = {ascii.c}
+        \\target = {\base64.o}
+        \\prereq = {\base64.c}
+        \\prereq = {s t#dio.h}
+        \\target = {e\lf.o}
+        \\prereq = {e\lf.c}
+        \\prereq = {a.h$$}
+        \\prereq = {$$b.h c.h$$}
+        \\target = {macho.o}
+        \\prereq = {macho!.c}
+        \\prereq = {a.h}
+        \\prereq = {b.h}
+        \\prereq = {c.h}
+    );
+}
+
+test "windows quoted prereqs" {
+    try depTokenizer(
+        \\c:\foo.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo.c"
+        \\c:\foo2.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo2.c" \
+        \\  "C:\Program Files (x86)\Microsoft Visual Studio\foo1.h" \
+        \\  "C:\Program Files (x86)\Microsoft Visual Studio\foo2.h"
+    ,
+        \\target = {c:\foo.o}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo.c}
+        \\target = {c:\foo2.o}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.c}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo1.h}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.h}
+    );
+}
+
+test "windows mixed prereqs" {
+    try depTokenizer(
+        \\cimport.o: \
+        \\  C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h" \
+        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h" \
+        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h" \
+        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h" \
+        \\  C:\msys64\opt\zig\lib\zig\include\vadefs.h \
+        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h" \
+        \\  "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h" \
+        \\  "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h"
+    ,
+        \\target = {cimport.o}
+        \\prereq = {C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h}
+        \\prereq = {C:\msys64\opt\zig\lib\zig\include\vadefs.h}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h}
+        \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h}
+        \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h}
+    );
+}
+
+test "funky targets" {
+    try depTokenizer(
+        \\C:\Users\anon\foo.o:
+        \\C:\Users\anon\foo\ .o:
+        \\C:\Users\anon\foo\#.o:
+        \\C:\Users\anon\foo$$.o:
+        \\C:\Users\anon\\\ foo.o:
+        \\C:\Users\anon\\#foo.o:
+        \\C:\Users\anon\$$foo.o:
+        \\C:\Users\anon\\\ \ \ \ \ foo.o:
+    ,
+        \\target = {C:\Users\anon\foo.o}
+        \\target = {C:\Users\anon\foo .o}
+        \\target = {C:\Users\anon\foo#.o}
+        \\target = {C:\Users\anon\foo$.o}
+        \\target = {C:\Users\anon\ foo.o}
+        \\target = {C:\Users\anon\#foo.o}
+        \\target = {C:\Users\anon\$foo.o}
+        \\target = {C:\Users\anon\     foo.o}
+    );
+}
+
+test "error incomplete escape - reverse_solidus" {
+    try depTokenizer("\\",
+        \\ERROR: illegal char '\' at position 0: incomplete escape
+    );
+    try depTokenizer("\t\\",
+        \\ERROR: illegal char '\' at position 1: incomplete escape
+    );
+    try depTokenizer("\n\\",
+        \\ERROR: illegal char '\' at position 1: incomplete escape
+    );
+    try depTokenizer("\r\\",
+        \\ERROR: illegal char '\' at position 1: incomplete escape
+    );
+    try depTokenizer("\r\n\\",
+        \\ERROR: illegal char '\' at position 2: incomplete escape
+    );
+    try depTokenizer(" \\",
+        \\ERROR: illegal char '\' at position 1: incomplete escape
+    );
+}
+
+test "error incomplete escape - dollar_sign" {
+    try depTokenizer("$",
+        \\ERROR: illegal char '$' at position 0: incomplete escape
+    );
+    try depTokenizer("\t$",
+        \\ERROR: illegal char '$' at position 1: incomplete escape
+    );
+    try depTokenizer("\n$",
+        \\ERROR: illegal char '$' at position 1: incomplete escape
+    );
+    try depTokenizer("\r$",
+        \\ERROR: illegal char '$' at position 1: incomplete escape
+    );
+    try depTokenizer("\r\n$",
+        \\ERROR: illegal char '$' at position 2: incomplete escape
+    );
+    try depTokenizer(" $",
+        \\ERROR: illegal char '$' at position 1: incomplete escape
+    );
+}
+
+test "error incomplete target" {
+    try depTokenizer("foo.o",
+        \\ERROR: incomplete target 'foo.o' at position 0
+    );
+    try depTokenizer("\tfoo.o",
+        \\ERROR: incomplete target 'foo.o' at position 1
+    );
+    try depTokenizer("\nfoo.o",
+        \\ERROR: incomplete target 'foo.o' at position 1
+    );
+    try depTokenizer("\rfoo.o",
+        \\ERROR: incomplete target 'foo.o' at position 1
+    );
+    try depTokenizer("\r\nfoo.o",
+        \\ERROR: incomplete target 'foo.o' at position 2
+    );
+    try depTokenizer(" foo.o",
+        \\ERROR: incomplete target 'foo.o' at position 1
+    );
+
+    try depTokenizer("\\ foo.o",
+        \\ERROR: incomplete target ' foo.o' at position 1
+    );
+    try depTokenizer("\\#foo.o",
+        \\ERROR: incomplete target '#foo.o' at position 1
+    );
+    try depTokenizer("\\\\foo.o",
+        \\ERROR: incomplete target '\foo.o' at position 1
+    );
+    try depTokenizer("$$foo.o",
+        \\ERROR: incomplete target '$foo.o' at position 1
+    );
+}
+
+test "error illegal char at position - bad target escape" {
+    try depTokenizer("\\\t",
+        \\ERROR: illegal char \x09 at position 1: bad target escape
+    );
+    try depTokenizer("\\\n",
+        \\ERROR: illegal char \x0A at position 1: bad target escape
+    );
+    try depTokenizer("\\\r",
+        \\ERROR: illegal char \x0D at position 1: bad target escape
+    );
+    try depTokenizer("\\\r\n",
+        \\ERROR: illegal char \x0D at position 1: bad target escape
+    );
+}
+
+test "error illegal char at position - execting dollar_sign" {
+    try depTokenizer("$\t",
+        \\ERROR: illegal char \x09 at position 1: expecting '$'
+    );
+    try depTokenizer("$\n",
+        \\ERROR: illegal char \x0A at position 1: expecting '$'
+    );
+    try depTokenizer("$\r",
+        \\ERROR: illegal char \x0D at position 1: expecting '$'
+    );
+    try depTokenizer("$\r\n",
+        \\ERROR: illegal char \x0D at position 1: expecting '$'
+    );
+}
+
+test "error illegal char at position - invalid target" {
+    try depTokenizer("foo\t.o",
+        \\ERROR: illegal char \x09 at position 3: invalid target
+    );
+    try depTokenizer("foo\n.o",
+        \\ERROR: illegal char \x0A at position 3: invalid target
+    );
+    try depTokenizer("foo\r.o",
+        \\ERROR: illegal char \x0D at position 3: invalid target
+    );
+    try depTokenizer("foo\r\n.o",
+        \\ERROR: illegal char \x0D at position 3: invalid target
+    );
+}
+
+test "error target - continuation expecting end-of-line" {
+    try depTokenizer("foo.o: \\\t",
+        \\target = {foo.o}
+        \\ERROR: illegal char \x09 at position 8: continuation expecting end-of-line
+    );
+    try depTokenizer("foo.o: \\ ",
+        \\target = {foo.o}
+        \\ERROR: illegal char \x20 at position 8: continuation expecting end-of-line
+    );
+    try depTokenizer("foo.o: \\x",
+        \\target = {foo.o}
+        \\ERROR: illegal char 'x' at position 8: continuation expecting end-of-line
+    );
+    try depTokenizer("foo.o: \\
x",
+        \\target = {foo.o}
+        \\ERROR: illegal char 'x' at position 9: continuation expecting end-of-line
+    );
+}
+
+test "error prereq - continuation expecting end-of-line" {
+    try depTokenizer("foo.o: foo.h\\
x",
+        \\target = {foo.o}
+        \\ERROR: illegal char 'x' at position 14: continuation expecting end-of-line
+    );
+}
+
+// - tokenize input, emit textual representation, and compare to expect
+fn depTokenizer(input: []const u8, expect: []const u8) !void {
+    var direct_allocator = std.heap.DirectAllocator.init();
+    var arena_allocator = std.heap.ArenaAllocator.init(&direct_allocator.allocator);
+    const arena = &arena_allocator.allocator;
+    defer arena_allocator.deinit();
+
+    var it = Tokenizer.init(&direct_allocator.allocator, input);
+    var buffer = try std.Buffer.initSize(arena, 0);
+    var i: usize = 0;
+    while (true) {
+        const r = it.next() catch |err| {
+            switch (err) {
+                Tokenizer.Error.InvalidInput => {
+                    if (i != 0) try buffer.append("\n");
+                    try buffer.append("ERROR: ");
+                    try buffer.append(it.error_text);
+                },
+                else => return err,
+            }
+            break;
+        };
+        const token = r orelse break;
+        if (i != 0) try buffer.append("\n");
+        try buffer.append(@tagName(token.id));
+        try buffer.append(" = {");
+        for (token.bytes) |b| {
+            try buffer.appendByte(printable_char_tab[b]);
+        }
+        try buffer.append("}");
+        i += 1;
+    }
+    const got: []const u8 = buffer.toSlice();
+
+    if (std.mem.eql(u8, expect, got)) {
+        testing.expect(true);
+        return;
+    }
+
+    var out = makeOutput(std.fs.File.write, try std.io.getStdErr());
+
+    try out.write("\n");
+    try printSection(&out, "<<<< input", input);
+    try printSection(&out, "==== expect", expect);
+    try printSection(&out, ">>>> got", got);
+    try printRuler(&out);
+
+    testing.expect(false);
+}
+
+fn printSection(out: var, label: []const u8, bytes: []const u8) !void {
+    try printLabel(out, label, bytes);
+    try hexDump(out, bytes);
+    try printRuler(out);
+    try out.write(bytes);
+    try out.write("\n");
+}
+
+fn printLabel(out: var, label: []const u8, bytes: []const u8) !void {
+    var buf: [80]u8 = undefined;
+    var text = try std.fmt.bufPrint(buf[0..], "{} {} bytes ", label, bytes.len);
+    try out.write(text);
+    var i: usize = text.len;
+    const end = 79;
+    while (i < 79) : (i += 1) {
+        try out.write([]const u8{label[0]});
+    }
+    try out.write("\n");
+}
+
+fn printRuler(out: var) !void {
+    var i: usize = 0;
+    const end = 79;
+    while (i < 79) : (i += 1) {
+        try out.write("-");
+    }
+    try out.write("\n");
+}
+
+fn hexDump(out: var, bytes: []const u8) !void {
+    const n16 = bytes.len >> 4;
+    var line: usize = 0;
+    var offset: usize = 0;
+    while (line < n16) : (line += 1) {
+        try hexDump16(out, offset, bytes[offset .. offset + 16]);
+        offset += 16;
+    }
+
+    const n = bytes.len & 0x0f;
+    if (n > 0) {
+        try printDecValue(out, offset, 8);
+        try out.write(":");
+        try out.write(" ");
+        var end1 = std.math.min(offset + n, offset + 8);
+        for (bytes[offset..end1]) |b| {
+            try out.write(" ");
+            try printHexValue(out, b, 2);
+        }
+        var end2 = offset + n;
+        if (end2 > end1) {
+            try out.write(" ");
+            for (bytes[end1..end2]) |b| {
+                try out.write(" ");
+                try printHexValue(out, b, 2);
+            }
+        }
+        const short = 16 - n;
+        var i: usize = 0;
+        while (i < short) : (i += 1) {
+            try out.write("   ");
+        }
+        if (end2 > end1) {
+            try out.write("  |");
+        } else {
+            try out.write("   |");
+        }
+        try printCharValues(out, bytes[offset..end2]);
+        try out.write("|\n");
+        offset += n;
+    }
+
+    try printDecValue(out, offset, 8);
+    try out.write(":");
+    try out.write("\n");
+}
+
+fn hexDump16(out: var, offset: usize, bytes: []const u8) !void {
+    try printDecValue(out, offset, 8);
+    try out.write(":");
+    try out.write(" ");
+    for (bytes[0..8]) |b| {
+        try out.write(" ");
+        try printHexValue(out, b, 2);
+    }
+    try out.write(" ");
+    for (bytes[8..16]) |b| {
+        try out.write(" ");
+        try printHexValue(out, b, 2);
+    }
+    try out.write("  |");
+    try printCharValues(out, bytes);
+    try out.write("|\n");
+}
+
+fn printDecValue(out: var, value: u64, width: u8) !void {
+    var buffer: [20]u8 = undefined;
+    const len = std.fmt.formatIntBuf(buffer[0..], value, 10, false, width);
+    try out.write(buffer[0..len]);
+}
+
+fn printHexValue(out: var, value: u64, width: u8) !void {
+    var buffer: [16]u8 = undefined;
+    const len = std.fmt.formatIntBuf(buffer[0..], value, 16, false, width);
+    try out.write(buffer[0..len]);
+}
+
+fn printCharValues(out: var, bytes: []const u8) !void {
+    for (bytes) |b| {
+        try out.write([]const u8{printable_char_tab[b]});
+    }
+}
+
+fn printUnderstandableChar(out: var, char: u8) !void {
+    if (!std.ascii.isPrint(char) or char == ' ') {
+        std.fmt.format(out.context, anyerror, out.output, "\\x{X2}", char) catch {};
+    } else {
+        try out.write("'");
+        try out.write([]const u8{printable_char_tab[char]});
+        try out.write("'");
+    }
+}
+
+// zig fmt: off
+const printable_char_tab: []const u8 =
+    "................................ !\"#$%&'()*+,-./0123456789:;<=>?" ++
+    "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." ++
+    "................................................................" ++
+    "................................................................";
+// zig fmt: on
+comptime {
+    std.debug.assert(printable_char_tab.len == 256);
+}
+
+// Make an output var that wraps a context and output function.
+// output: must be a function that takes a `self` idiom parameter
+// and a bytes parameter
+// context: must be that self
+fn makeOutput(output: var, context: var) Output(@typeOf(output)) {
+    return Output(@typeOf(output)){
+        .output = output,
+        .context = context,
+    };
+}
+
+fn Output(comptime T: type) type {
+    const args = switch (@typeInfo(T)) {
+        .Fn => |f| f.args,
+        else => @compileError("output parameter is not a function"),
+    };
+    if (args.len != 2) {
+        @compileError("output function must take 2 arguments");
+    }
+    const at0 = args[0].arg_type orelse @compileError("output arg[0] does not have a type");
+    const at1 = args[1].arg_type orelse @compileError("output arg[1] does not have a type");
+    const arg1p = switch (@typeInfo(at1)) {
+        .Pointer => |p| p,
+        else => @compileError("output arg[1] is not a slice"),
+    };
+    if (arg1p.child != u8) @compileError("output arg[1] is not a u8 slice");
+    return struct {
+        output: T,
+        context: at0,
+
+        fn write(self: *@This(), bytes: []const u8) !void {
+            try self.output(self.context, bytes);
+        }
+    };
+}
src-self-hosted/stage1.zig
@@ -20,6 +20,10 @@ var stderr_file: fs.File = undefined;
 var stderr: *io.OutStream(fs.File.WriteError) = undefined;
 var stdout: *io.OutStream(fs.File.WriteError) = undefined;
 
+comptime {
+    _ =  @import("dep_tokenizer.zig");
+}
+
 // ABI warning
 export fn stage2_zen(ptr: *[*]const u8, len: *usize) void {
     const info_zen = @import("main.zig").info_zen;
CMakeLists.txt
@@ -6726,6 +6726,7 @@ add_custom_command(
         "-Doutput-dir=${CMAKE_BINARY_DIR}"
     WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
     DEPENDS
+        "${CMAKE_SOURCE_DIR}/src-self-hosted/dep_tokenizer.zig"
         "${CMAKE_SOURCE_DIR}/src-self-hosted/stage1.zig"
         "${CMAKE_SOURCE_DIR}/src-self-hosted/translate_c.zig"
         "${CMAKE_SOURCE_DIR}/build.zig"