Commit 3b23929be5

Vexu <git@vexu.eu>
2020-02-02 00:40:46
use std.c.tokenizer in translate-c
1 parent 4f2652d
Changed files (2)
src-self-hosted/c_tokenizer.zig
@@ -1,977 +0,0 @@
-const std = @import("std");
-const expect = std.testing.expect;
-const ZigClangSourceLocation = @import("clang.zig").ZigClangSourceLocation;
-const Context = @import("translate_c.zig").Context;
-const failDecl = @import("translate_c.zig").failDecl;
-
-pub const TokenList = std.SegmentedList(CToken, 32);
-
-pub const CToken = struct {
-    id: Id,
-    bytes: []const u8 = "",
-    num_lit_suffix: NumLitSuffix = .None,
-
-    pub const Id = enum {
-        CharLit,
-        StrLit,
-        NumLitInt,
-        NumLitFloat,
-        Identifier,
-        Plus,
-        Minus,
-        Slash,
-        LParen,
-        RParen,
-        Eof,
-        Dot,
-        Asterisk, // *
-        Ampersand, // &
-        And, // &&
-        Assign, // =
-        Or, // ||
-        Bang, // !
-        Tilde, // ~
-        Shl, // <<
-        Shr, // >>
-        Lt, // <
-        Lte, // <=
-        Gt, // >
-        Gte, // >=
-        Eq, // ==
-        Ne, // !=
-        Increment, // ++
-        Decrement, // --
-        Comma,
-        Fn,
-        Arrow, // ->
-        LBrace,
-        RBrace,
-        Pipe,
-        QuestionMark,
-        Colon,
-    };
-
-    pub const NumLitSuffix = enum {
-        None,
-        F,
-        L,
-        U,
-        LU,
-        LL,
-        LLU,
-    };
-};
-
-pub fn tokenizeCMacro(ctx: *Context, loc: ZigClangSourceLocation, name: []const u8, tl: *TokenList, chars: [*:0]const u8) !void {
-    var index: usize = 0;
-    var first = true;
-    while (true) {
-        const tok = try next(ctx, loc, name, chars, &index);
-        if (tok.id == .StrLit or tok.id == .CharLit)
-            try tl.push(try zigifyEscapeSequences(ctx, loc, name, tl.allocator, tok))
-        else
-            try tl.push(tok);
-        if (tok.id == .Eof)
-            return;
-        if (first) {
-            // distinguish NAME (EXPR) from NAME(ARGS)
-            first = false;
-            if (chars[index] == '(') {
-                try tl.push(.{
-                    .id = .Fn,
-                    .bytes = "",
-                });
-            }
-        }
-    }
-}
-
-fn zigifyEscapeSequences(ctx: *Context, loc: ZigClangSourceLocation, name: []const u8, allocator: *std.mem.Allocator, tok: CToken) !CToken {
-    for (tok.bytes) |c| {
-        if (c == '\\') {
-            break;
-        }
-    } else return tok;
-    var bytes = try allocator.alloc(u8, tok.bytes.len * 2);
-    var state: enum {
-        Start,
-        Escape,
-        Hex,
-        Octal,
-    } = .Start;
-    var i: usize = 0;
-    var count: u8 = 0;
-    var num: u8 = 0;
-    for (tok.bytes) |c| {
-        switch (state) {
-            .Escape => {
-                switch (c) {
-                    'n', 'r', 't', '\\', '\'', '\"' => {
-                        bytes[i] = c;
-                    },
-                    '0'...'7' => {
-                        count += 1;
-                        num += c - '0';
-                        state = .Octal;
-                        bytes[i] = 'x';
-                    },
-                    'x' => {
-                        state = .Hex;
-                        bytes[i] = 'x';
-                    },
-                    'a' => {
-                        bytes[i] = 'x';
-                        i += 1;
-                        bytes[i] = '0';
-                        i += 1;
-                        bytes[i] = '7';
-                    },
-                    'b' => {
-                        bytes[i] = 'x';
-                        i += 1;
-                        bytes[i] = '0';
-                        i += 1;
-                        bytes[i] = '8';
-                    },
-                    'f' => {
-                        bytes[i] = 'x';
-                        i += 1;
-                        bytes[i] = '0';
-                        i += 1;
-                        bytes[i] = 'C';
-                    },
-                    'v' => {
-                        bytes[i] = 'x';
-                        i += 1;
-                        bytes[i] = '0';
-                        i += 1;
-                        bytes[i] = 'B';
-                    },
-                    '?' => {
-                        i -= 1;
-                        bytes[i] = '?';
-                    },
-                    'u', 'U' => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: TODO unicode escape sequences", .{});
-                        return error.TokenizingFailed;
-                    },
-                    else => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: unknown escape sequence", .{});
-                        return error.TokenizingFailed;
-                    },
-                }
-                i += 1;
-                if (state == .Escape)
-                    state = .Start;
-            },
-            .Start => {
-                if (c == '\\') {
-                    state = .Escape;
-                }
-                bytes[i] = c;
-                i += 1;
-            },
-            .Hex => {
-                switch (c) {
-                    '0'...'9' => {
-                        num = std.math.mul(u8, num, 16) catch {
-                            try failDecl(ctx, loc, name, "macro tokenizing failed: hex literal overflowed", .{});
-                            return error.TokenizingFailed;
-                        };
-                        num += c - '0';
-                    },
-                    'a'...'f' => {
-                        num = std.math.mul(u8, num, 16) catch {
-                            try failDecl(ctx, loc, name, "macro tokenizing failed: hex literal overflowed", .{});
-                            return error.TokenizingFailed;
-                        };
-                        num += c - 'a' + 10;
-                    },
-                    'A'...'F' => {
-                        num = std.math.mul(u8, num, 16) catch {
-                            try failDecl(ctx, loc, name, "macro tokenizing failed: hex literal overflowed", .{});
-                            return error.TokenizingFailed;
-                        };
-                        num += c - 'A' + 10;
-                    },
-                    else => {
-                        i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 });
-                        num = 0;
-                        if (c == '\\')
-                            state = .Escape
-                        else
-                            state = .Start;
-                        bytes[i] = c;
-                        i += 1;
-                    },
-                }
-            },
-            .Octal => {
-                const accept_digit = switch (c) {
-                    // The maximum length of a octal literal is 3 digits
-                    '0'...'7' => count < 3,
-                    else => false,
-                };
-
-                if (accept_digit) {
-                    count += 1;
-                    num = std.math.mul(u8, num, 8) catch {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: octal literal overflowed", .{});
-                        return error.TokenizingFailed;
-                    };
-                    num += c - '0';
-                } else {
-                    i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 });
-                    num = 0;
-                    count = 0;
-                    if (c == '\\')
-                        state = .Escape
-                    else
-                        state = .Start;
-                    bytes[i] = c;
-                    i += 1;
-                }
-            },
-        }
-    }
-    if (state == .Hex or state == .Octal)
-        i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 });
-    return CToken{
-        .id = tok.id,
-        .bytes = bytes[0..i],
-    };
-}
-
-fn next(ctx: *Context, loc: ZigClangSourceLocation, name: []const u8, chars: [*:0]const u8, i: *usize) !CToken {
-    var state: enum {
-        Start,
-        SawLt,
-        SawGt,
-        SawPlus,
-        SawMinus,
-        SawAmpersand,
-        SawPipe,
-        SawBang,
-        SawEq,
-        CharLit,
-        OpenComment,
-        Comment,
-        CommentStar,
-        Backslash,
-        String,
-        Identifier,
-        Decimal,
-        Octal,
-        SawZero,
-        Hex,
-        Bin,
-        Float,
-        ExpSign,
-        FloatExp,
-        FloatExpFirst,
-        NumLitIntSuffixU,
-        NumLitIntSuffixL,
-        NumLitIntSuffixLL,
-        NumLitIntSuffixUL,
-        Done,
-    } = .Start;
-
-    var result = CToken{
-        .bytes = "",
-        .id = .Eof,
-    };
-    var begin_index: usize = 0;
-    var digits: u8 = 0;
-    var pre_escape = state;
-
-    while (true) {
-        const c = chars[i.*];
-        if (c == 0) {
-            switch (state) {
-                .Identifier,
-                .Decimal,
-                .Hex,
-                .Bin,
-                .Octal,
-                .SawZero,
-                .Float,
-                .FloatExp,
-                => {
-                    result.bytes = chars[begin_index..i.*];
-                    return result;
-                },
-                .Start,
-                .SawMinus,
-                .Done,
-                .NumLitIntSuffixU,
-                .NumLitIntSuffixL,
-                .NumLitIntSuffixUL,
-                .NumLitIntSuffixLL,
-                .SawLt,
-                .SawGt,
-                .SawPlus,
-                .SawAmpersand,
-                .SawPipe,
-                .SawBang,
-                .SawEq,
-                => {
-                    return result;
-                },
-                .CharLit,
-                .OpenComment,
-                .Comment,
-                .CommentStar,
-                .Backslash,
-                .String,
-                .ExpSign,
-                .FloatExpFirst,
-                => {
-                    try failDecl(ctx, loc, name, "macro tokenizing failed: unexpected EOF", .{});
-                    return error.TokenizingFailed;
-                },
-            }
-        }
-        switch (state) {
-            .Start => {
-                switch (c) {
-                    ' ', '\t', '\x0B', '\x0C' => {},
-                    '\'' => {
-                        state = .CharLit;
-                        result.id = .CharLit;
-                        begin_index = i.*;
-                    },
-                    '\"' => {
-                        state = .String;
-                        result.id = .StrLit;
-                        begin_index = i.*;
-                    },
-                    '/' => {
-                        state = .OpenComment;
-                    },
-                    '\\' => {
-                        state = .Backslash;
-                    },
-                    '\n', '\r' => {
-                        return result;
-                    },
-                    'a'...'z', 'A'...'Z', '_' => {
-                        state = .Identifier;
-                        result.id = .Identifier;
-                        begin_index = i.*;
-                    },
-                    '1'...'9' => {
-                        state = .Decimal;
-                        result.id = .NumLitInt;
-                        begin_index = i.*;
-                    },
-                    '0' => {
-                        state = .SawZero;
-                        result.id = .NumLitInt;
-                        begin_index = i.*;
-                    },
-                    '.' => {
-                        result.id = .Dot;
-                        state = .Done;
-                    },
-                    '<' => {
-                        result.id = .Lt;
-                        state = .SawLt;
-                    },
-                    '>' => {
-                        result.id = .Gt;
-                        state = .SawGt;
-                    },
-                    '(' => {
-                        result.id = .LParen;
-                        state = .Done;
-                    },
-                    ')' => {
-                        result.id = .RParen;
-                        state = .Done;
-                    },
-                    '*' => {
-                        result.id = .Asterisk;
-                        state = .Done;
-                    },
-                    '+' => {
-                        result.id = .Plus;
-                        state = .SawPlus;
-                    },
-                    '-' => {
-                        result.id = .Minus;
-                        state = .SawMinus;
-                    },
-                    '!' => {
-                        result.id = .Bang;
-                        state = .SawBang;
-                    },
-                    '~' => {
-                        result.id = .Tilde;
-                        state = .Done;
-                    },
-                    '=' => {
-                        result.id = .Assign;
-                        state = .SawEq;
-                    },
-                    ',' => {
-                        result.id = .Comma;
-                        state = .Done;
-                    },
-                    '[' => {
-                        result.id = .LBrace;
-                        state = .Done;
-                    },
-                    ']' => {
-                        result.id = .RBrace;
-                        state = .Done;
-                    },
-                    '|' => {
-                        result.id = .Pipe;
-                        state = .SawPipe;
-                    },
-                    '&' => {
-                        result.id = .Ampersand;
-                        state = .SawAmpersand;
-                    },
-                    '?' => {
-                        result.id = .QuestionMark;
-                        state = .Done;
-                    },
-                    ':' => {
-                        result.id = .Colon;
-                        state = .Done;
-                    },
-                    else => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: unexpected character '{c}'", .{c});
-                        return error.TokenizingFailed;
-                    },
-                }
-            },
-            .Done => return result,
-            .SawMinus => {
-                switch (c) {
-                    '>' => {
-                        result.id = .Arrow;
-                        state = .Done;
-                    },
-                    '-' => {
-                        result.id = .Decrement;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .SawPlus => {
-                switch (c) {
-                    '+' => {
-                        result.id = .Increment;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .SawLt => {
-                switch (c) {
-                    '<' => {
-                        result.id = .Shl;
-                        state = .Done;
-                    },
-                    '=' => {
-                        result.id = .Lte;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .SawGt => {
-                switch (c) {
-                    '>' => {
-                        result.id = .Shr;
-                        state = .Done;
-                    },
-                    '=' => {
-                        result.id = .Gte;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .SawPipe => {
-                switch (c) {
-                    '|' => {
-                        result.id = .Or;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .SawAmpersand => {
-                switch (c) {
-                    '&' => {
-                        result.id = .And;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .SawBang => {
-                switch (c) {
-                    '=' => {
-                        result.id = .Ne;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .SawEq => {
-                switch (c) {
-                    '=' => {
-                        result.id = .Eq;
-                        state = .Done;
-                    },
-                    else => return result,
-                }
-            },
-            .Float => {
-                switch (c) {
-                    '.', '0'...'9' => {},
-                    'e', 'E' => {
-                        state = .ExpSign;
-                    },
-                    'f',
-                    'F',
-                    => {
-                        result.num_lit_suffix = .F;
-                        result.bytes = chars[begin_index..i.*];
-                        state = .Done;
-                    },
-                    'l', 'L' => {
-                        result.num_lit_suffix = .L;
-                        result.bytes = chars[begin_index..i.*];
-                        state = .Done;
-                    },
-                    else => {
-                        result.bytes = chars[begin_index..i.*];
-                        return result;
-                    },
-                }
-            },
-            .ExpSign => {
-                switch (c) {
-                    '+', '-' => {
-                        state = .FloatExpFirst;
-                    },
-                    '0'...'9' => {
-                        state = .FloatExp;
-                    },
-                    else => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: expected a digit or '+' or '-'", .{});
-                        return error.TokenizingFailed;
-                    },
-                }
-            },
-            .FloatExpFirst => {
-                switch (c) {
-                    '0'...'9' => {
-                        state = .FloatExp;
-                    },
-                    else => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: expected a digit", .{});
-                        return error.TokenizingFailed;
-                    },
-                }
-            },
-            .FloatExp => {
-                switch (c) {
-                    '0'...'9' => {},
-                    'f', 'F' => {
-                        result.num_lit_suffix = .F;
-                        result.bytes = chars[begin_index..i.*];
-                        state = .Done;
-                    },
-                    'l', 'L' => {
-                        result.num_lit_suffix = .L;
-                        result.bytes = chars[begin_index..i.*];
-                        state = .Done;
-                    },
-                    else => {
-                        result.bytes = chars[begin_index..i.*];
-                        return result;
-                    },
-                }
-            },
-            .Decimal => {
-                switch (c) {
-                    '0'...'9' => {},
-                    '\'' => {},
-                    'u', 'U' => {
-                        state = .NumLitIntSuffixU;
-                        result.num_lit_suffix = .U;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    'l', 'L' => {
-                        state = .NumLitIntSuffixL;
-                        result.num_lit_suffix = .L;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    '.' => {
-                        result.id = .NumLitFloat;
-                        state = .Float;
-                    },
-                    else => {
-                        result.bytes = chars[begin_index..i.*];
-                        return result;
-                    },
-                }
-            },
-            .SawZero => {
-                switch (c) {
-                    'x', 'X' => {
-                        state = .Hex;
-                    },
-                    'b', 'B' => {
-                        state = .Bin;
-                    },
-                    '.' => {
-                        state = .Float;
-                        result.id = .NumLitFloat;
-                    },
-                    'u', 'U' => {
-                        state = .NumLitIntSuffixU;
-                        result.num_lit_suffix = .U;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    'l', 'L' => {
-                        state = .NumLitIntSuffixL;
-                        result.num_lit_suffix = .L;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    else => {
-                        i.* -= 1;
-                        state = .Octal;
-                    },
-                }
-            },
-            .Octal => {
-                switch (c) {
-                    '0'...'7' => {},
-                    '8', '9' => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: invalid digit '{c}' in octal number", .{c});
-                        return error.TokenizingFailed;
-                    },
-                    'u', 'U' => {
-                        state = .NumLitIntSuffixU;
-                        result.num_lit_suffix = .U;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    'l', 'L' => {
-                        state = .NumLitIntSuffixL;
-                        result.num_lit_suffix = .L;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    else => {
-                        result.bytes = chars[begin_index..i.*];
-                        return result;
-                    },
-                }
-            },
-            .Hex => {
-                switch (c) {
-                    '0'...'9', 'a'...'f', 'A'...'F' => {},
-                    'u', 'U' => {
-                        // marks the number literal as unsigned
-                        state = .NumLitIntSuffixU;
-                        result.num_lit_suffix = .U;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    'l', 'L' => {
-                        // marks the number literal as long
-                        state = .NumLitIntSuffixL;
-                        result.num_lit_suffix = .L;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    else => {
-                        result.bytes = chars[begin_index..i.*];
-                        return result;
-                    },
-                }
-            },
-            .Bin => {
-                switch (c) {
-                    '0'...'1' => {},
-                    '2'...'9' => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: invalid digit '{c}' in binary number", .{c});
-                        return error.TokenizingFailed;
-                    },
-                    'u', 'U' => {
-                        // marks the number literal as unsigned
-                        state = .NumLitIntSuffixU;
-                        result.num_lit_suffix = .U;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    'l', 'L' => {
-                        // marks the number literal as long
-                        state = .NumLitIntSuffixL;
-                        result.num_lit_suffix = .L;
-                        result.bytes = chars[begin_index..i.*];
-                    },
-                    else => {
-                        result.bytes = chars[begin_index..i.*];
-                        return result;
-                    },
-                }
-            },
-            .NumLitIntSuffixU => {
-                switch (c) {
-                    'l', 'L' => {
-                        result.num_lit_suffix = .LU;
-                        state = .NumLitIntSuffixUL;
-                    },
-                    else => {
-                        return result;
-                    },
-                }
-            },
-            .NumLitIntSuffixL => {
-                switch (c) {
-                    'l', 'L' => {
-                        result.num_lit_suffix = .LL;
-                        state = .NumLitIntSuffixLL;
-                    },
-                    'u', 'U' => {
-                        result.num_lit_suffix = .LU;
-                        state = .Done;
-                    },
-                    else => {
-                        return result;
-                    },
-                }
-            },
-            .NumLitIntSuffixLL => {
-                switch (c) {
-                    'u', 'U' => {
-                        result.num_lit_suffix = .LLU;
-                        state = .Done;
-                    },
-                    else => {
-                        return result;
-                    },
-                }
-            },
-            .NumLitIntSuffixUL => {
-                switch (c) {
-                    'l', 'L' => {
-                        result.num_lit_suffix = .LLU;
-                        state = .Done;
-                    },
-                    else => {
-                        return result;
-                    },
-                }
-            },
-            .Identifier => {
-                switch (c) {
-                    '_', 'a'...'z', 'A'...'Z', '0'...'9' => {},
-                    else => {
-                        result.bytes = chars[begin_index..i.*];
-                        return result;
-                    },
-                }
-            },
-            .String => {
-                switch (c) {
-                    '\"' => {
-                        result.bytes = chars[begin_index .. i.* + 1];
-                        state = .Done;
-                    },
-                    else => {},
-                }
-            },
-            .CharLit => {
-                switch (c) {
-                    '\'' => {
-                        result.bytes = chars[begin_index .. i.* + 1];
-                        state = .Done;
-                    },
-                    else => {},
-                }
-            },
-            .OpenComment => {
-                switch (c) {
-                    '/' => {
-                        return result;
-                    },
-                    '*' => {
-                        state = .Comment;
-                    },
-                    else => {
-                        result.id = .Slash;
-                        state = .Done;
-                    },
-                }
-            },
-            .Comment => {
-                switch (c) {
-                    '*' => {
-                        state = .CommentStar;
-                    },
-                    else => {},
-                }
-            },
-            .CommentStar => {
-                switch (c) {
-                    '/' => {
-                        state = .Start;
-                    },
-                    else => {
-                        state = .Comment;
-                    },
-                }
-            },
-            .Backslash => {
-                switch (c) {
-                    ' ', '\t', '\x0B', '\x0C' => {},
-                    '\n', '\r' => {
-                        state = .Start;
-                    },
-                    else => {
-                        try failDecl(ctx, loc, name, "macro tokenizing failed: expected whitespace", .{});
-                        return error.TokenizingFailed;
-                    },
-                }
-            },
-        }
-        i.* += 1;
-    }
-    unreachable;
-}
-
-fn expectTokens(tl: *TokenList, src: [*:0]const u8, expected: []CToken) void {
-    // these can be undefined since they are only used for error reporting
-    tokenizeCMacro(undefined, undefined, undefined, tl, src) catch unreachable;
-    var it = tl.iterator(0);
-    for (expected) |t| {
-        var tok = it.next().?;
-        std.testing.expectEqual(t.id, tok.id);
-        if (t.bytes.len > 0) {
-            //std.debug.warn("  {} = {}\n", .{tok.bytes, t.bytes});
-            std.testing.expectEqualSlices(u8, tok.bytes, t.bytes);
-        }
-        if (t.num_lit_suffix != .None) {
-            std.testing.expectEqual(t.num_lit_suffix, tok.num_lit_suffix);
-        }
-    }
-    std.testing.expect(it.next() == null);
-    tl.shrink(0);
-}
-
-test "tokenize macro" {
-    var tl = TokenList.init(std.testing.allocator);
-    defer tl.deinit();
-
-    expectTokens(&tl, "TEST(0\n", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "TEST" },
-        .{ .id = .Fn },
-        .{ .id = .LParen },
-        .{ .id = .NumLitInt, .bytes = "0" },
-        .{ .id = .Eof },
-    });
-
-    expectTokens(&tl, "__FLT_MIN_10_EXP__ -37\n", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "__FLT_MIN_10_EXP__" },
-        .{ .id = .Minus },
-        .{ .id = .NumLitInt, .bytes = "37" },
-        .{ .id = .Eof },
-    });
-
-    expectTokens(&tl, "__llvm__ 1\n#define", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "__llvm__" },
-        .{ .id = .NumLitInt, .bytes = "1" },
-        .{ .id = .Eof },
-    });
-
-    expectTokens(&tl, "TEST 2", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "TEST" },
-        .{ .id = .NumLitInt, .bytes = "2" },
-        .{ .id = .Eof },
-    });
-
-    expectTokens(&tl, "FOO 0ull", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "FOO" },
-        .{ .id = .NumLitInt, .bytes = "0", .num_lit_suffix = .LLU },
-        .{ .id = .Eof },
-    });
-}
-
-test "tokenize macro ops" {
-    var tl = TokenList.init(std.testing.allocator);
-    defer tl.deinit();
-
-    expectTokens(&tl, "ADD A + B", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "ADD" },
-        .{ .id = .Identifier, .bytes = "A" },
-        .{ .id = .Plus },
-        .{ .id = .Identifier, .bytes = "B" },
-        .{ .id = .Eof },
-    });
-
-    expectTokens(&tl, "ADD (A) + B", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "ADD" },
-        .{ .id = .LParen },
-        .{ .id = .Identifier, .bytes = "A" },
-        .{ .id = .RParen },
-        .{ .id = .Plus },
-        .{ .id = .Identifier, .bytes = "B" },
-        .{ .id = .Eof },
-    });
-
-    expectTokens(&tl, "ADD (A) + B", &[_]CToken{
-        .{ .id = .Identifier, .bytes = "ADD" },
-        .{ .id = .LParen },
-        .{ .id = .Identifier, .bytes = "A" },
-        .{ .id = .RParen },
-        .{ .id = .Plus },
-        .{ .id = .Identifier, .bytes = "B" },
-        .{ .id = .Eof },
-    });
-}
-
-test "escape sequences" {
-    var buf: [1024]u8 = undefined;
-    var alloc = std.heap.FixedBufferAllocator.init(buf[0..]);
-    const a = &alloc.allocator;
-    // these can be undefined since they are only used for error reporting
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .StrLit,
-        .bytes = "\\x0077",
-    })).bytes, "\\x77"));
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .StrLit,
-        .bytes = "\\24500",
-    })).bytes, "\\xa500"));
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .StrLit,
-        .bytes = "\\x0077 abc",
-    })).bytes, "\\x77 abc"));
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .StrLit,
-        .bytes = "\\045abc",
-    })).bytes, "\\x25abc"));
-
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .CharLit,
-        .bytes = "\\0",
-    })).bytes, "\\x00"));
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .CharLit,
-        .bytes = "\\00",
-    })).bytes, "\\x00"));
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .CharLit,
-        .bytes = "\\000\\001",
-    })).bytes, "\\x00\\x01"));
-    expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{
-        .id = .CharLit,
-        .bytes = "\\000abc",
-    })).bytes, "\\x00abc"));
-}
src-self-hosted/translate_c.zig
@@ -6,8 +6,9 @@ const assert = std.debug.assert;
 const ast = std.zig.ast;
 const Token = std.zig.Token;
 usingnamespace @import("clang.zig");
-const ctok = @import("c_tokenizer.zig");
-const CToken = ctok.CToken;
+const ctok = std.c.tokenizer;
+const CToken = std.c.Token;
+const CTokenList = std.c.tokenizer.Source.TokenList;
 const mem = std.mem;
 const math = std.math;
 
@@ -4818,7 +4819,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
     // TODO if we see #undef, delete it from the table
     var it = ZigClangASTUnit_getLocalPreprocessingEntities_begin(unit);
     const it_end = ZigClangASTUnit_getLocalPreprocessingEntities_end(unit);
-    var tok_list = ctok.TokenList.init(c.a());
+    var tok_list = CTokenList.init(c.a());
     const scope = c.global_scope;
 
     while (it.I != it_end.I) : (it.I += 1) {
@@ -4829,6 +4830,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
                 const macro = @ptrCast(*ZigClangMacroDefinitionRecord, entity);
                 const raw_name = ZigClangMacroDefinitionRecord_getName_getNameStart(macro);
                 const begin_loc = ZigClangMacroDefinitionRecord_getSourceRange_getBegin(macro);
+                // const end_loc = ZigClangMacroDefinitionRecord_getSourceRange_getEnd(macro);
 
                 const name = try c.str(raw_name);
                 // TODO https://github.com/ziglang/zig/issues/3756
@@ -4839,42 +4841,61 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
                 }
 
                 const begin_c = ZigClangSourceManager_getCharacterData(c.source_manager, begin_loc);
-                ctok.tokenizeCMacro(c, begin_loc, mangled_name, &tok_list, begin_c) catch |err| switch (err) {
-                    error.OutOfMemory => |e| return e,
-                    else => {
-                        continue;
+                // const end_c = ZigClangSourceManager_getCharacterData(c.source_manager, end_loc);
+                // const slice = begin_c[0 .. @ptrToInt(end_c) - @ptrToInt(begin_c)];
+                const slice = begin_c[0..mem.len(u8, begin_c)];
+
+                tok_list.shrink(0);
+                var tokenizer = std.c.Tokenizer{
+                    .source = &std.c.tokenizer.Source{
+                        .buffer = slice,
+                        .file_name = undefined,
+                        .tokens = undefined,
                     },
                 };
+                while (true) {
+                    const tok = tokenizer.next();
+                    switch (tok.id) {
+                        .Nl, .Eof => {
+                            try tok_list.push(tok);
+                            break;
+                        },
+                        .LineComment, .MultiLineComment => continue,
+                        else => {},
+                    }
+                    try tok_list.push(tok);
+                }
 
                 var tok_it = tok_list.iterator(0);
                 const first_tok = tok_it.next().?;
-                assert(first_tok.id == .Identifier and mem.eql(u8, first_tok.bytes, name));
+                assert(first_tok.id == .Identifier and mem.eql(u8, slice[first_tok.start..first_tok.end], name));
+
+                var macro_fn = false;
                 const next = tok_it.peek().?;
                 switch (next.id) {
                     .Identifier => {
                         // if it equals itself, ignore. for example, from stdio.h:
                         // #define stdin stdin
-                        if (mem.eql(u8, name, next.bytes)) {
+                        if (mem.eql(u8, name, slice[next.start..next.end])) {
                             continue;
                         }
                     },
-                    .Eof => {
+                    .Nl, .Eof => {
                         // this means it is a macro without a value
                         // we don't care about such things
                         continue;
                     },
+                    .LParen => {
+                        // if the name is immediately followed by a '(' then it is a function
+                        macro_fn = first_tok.end == next.start;
+                    },
                     else => {},
                 }
 
-                const macro_fn = if (tok_it.peek().?.id == .Fn) blk: {
-                    _ = tok_it.next();
-                    break :blk true;
-                } else false;
-
                 (if (macro_fn)
-                    transMacroFnDefine(c, &tok_it, mangled_name, begin_loc)
+                    transMacroFnDefine(c, &tok_it, slice, mangled_name, begin_loc)
                 else
-                    transMacroDefine(c, &tok_it, mangled_name, begin_loc)) catch |err| switch (err) {
+                    transMacroDefine(c, &tok_it, slice, mangled_name, begin_loc)) catch |err| switch (err) {
                     error.ParseError => continue,
                     error.OutOfMemory => |e| return e,
                 };
@@ -4884,15 +4905,15 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
     }
 }
 
-fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
+fn transMacroDefine(c: *Context, it: *CTokenList.Iterator, source: []const u8, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
     const scope = &c.global_scope.base;
 
     const node = try transCreateNodeVarDecl(c, true, true, name);
     node.eq_token = try appendToken(c, .Equal, "=");
 
-    node.init_node = try parseCExpr(c, it, source_loc, scope);
+    node.init_node = try parseCExpr(c, it, source, source_loc, scope);
     const last = it.next().?;
-    if (last.id != .Eof)
+    if (last.id != .Eof and last.id != .Nl)
         return failDecl(
             c,
             source_loc,
@@ -4905,7 +4926,7 @@ fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8,
     _ = try c.global_scope.macro_table.put(name, &node.base);
 }
 
-fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
+fn transMacroFnDefine(c: *Context, it: *CTokenList.Iterator, source: []const u8, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
     const block_scope = try Scope.Block.init(c, &c.global_scope.base, null);
     const scope = &block_scope.base;
 
@@ -4937,7 +4958,7 @@ fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u
             );
         }
 
-        const mangled_name = try block_scope.makeMangledName(c, param_tok.bytes);
+        const mangled_name = try block_scope.makeMangledName(c, source[param_tok.start..param_tok.end]);
         const param_name_tok = try appendIdentifier(c, mangled_name);
         _ = try appendToken(c, .Colon, ":");
 
@@ -5000,7 +5021,7 @@ fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u
     const block = try transCreateNodeBlock(c, null);
 
     const return_expr = try transCreateNodeReturnExpr(c);
-    const expr = try parseCExpr(c, it, source_loc, scope);
+    const expr = try parseCExpr(c, it, source, source_loc, scope);
     const last = it.next().?;
     if (last.id != .Eof)
         return failDecl(
@@ -5022,27 +5043,28 @@ fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u
 
 const ParseError = Error || error{ParseError};
 
-fn parseCExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
-    const node = try parseCPrefixOpExpr(c, it, source_loc, scope);
+fn parseCExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
+    const node = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
     switch (it.next().?.id) {
         .QuestionMark => {
             // must come immediately after expr
             _ = try appendToken(c, .RParen, ")");
             const if_node = try transCreateNodeIf(c);
             if_node.condition = node;
-            if_node.body = try parseCPrimaryExpr(c, it, source_loc, scope);
+            if_node.body = try parseCPrimaryExpr(c, it, source, source_loc, scope);
             if (it.next().?.id != .Colon) {
+                const first_tok = it.list.at(0);
                 try failDecl(
                     c,
                     source_loc,
-                    it.list.at(0).*.bytes,
+                    source[first_tok.start..first_tok.end],
                     "unable to translate C expr: expected ':'",
                     .{},
                 );
                 return error.ParseError;
             }
             if_node.@"else" = try transCreateNodeElse(c);
-            if_node.@"else".?.body = try parseCPrimaryExpr(c, it, source_loc, scope);
+            if_node.@"else".?.body = try parseCPrimaryExpr(c, it, source, source_loc, scope);
             return &if_node.base;
         },
         else => {
@@ -5052,30 +5074,30 @@ fn parseCExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSou
     }
 }
 
-fn parseCNumLit(c: *Context, tok: *CToken, source_loc: ZigClangSourceLocation) ParseError!*ast.Node {
-    if (tok.id == .NumLitInt) {
-        var lit_bytes = tok.bytes;
+fn parseCNumLit(c: *Context, tok: *CToken, source: []const u8, source_loc: ZigClangSourceLocation) ParseError!*ast.Node {
+    var lit_bytes = source[tok.start..tok.end];
 
-        if (tok.bytes.len > 2 and tok.bytes[0] == '0') {
-            switch (tok.bytes[1]) {
+    if (tok.id == .IntegerLiteral) {
+        if (lit_bytes.len > 2 and lit_bytes[0] == '0') {
+            switch (lit_bytes[1]) {
                 '0'...'7' => {
                     // Octal
-                    lit_bytes = try std.fmt.allocPrint(c.a(), "0o{}", .{tok.bytes});
+                    lit_bytes = try std.fmt.allocPrint(c.a(), "0o{}", .{lit_bytes});
                 },
                 'X' => {
                     // Hexadecimal with capital X, valid in C but not in Zig
-                    lit_bytes = try std.fmt.allocPrint(c.a(), "0x{}", .{tok.bytes[2..]});
+                    lit_bytes = try std.fmt.allocPrint(c.a(), "0x{}", .{lit_bytes[2..]});
                 },
                 else => {},
             }
         }
 
-        if (tok.num_lit_suffix == .None) {
+        if (tok.id.IntegerLiteral == .None) {
             return transCreateNodeInt(c, lit_bytes);
         }
 
         const cast_node = try transCreateNodeBuiltinFnCall(c, "@as");
-        try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.num_lit_suffix) {
+        try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.id.IntegerLiteral) {
             .U => "c_uint",
             .L => "c_long",
             .LU => "c_ulong",
@@ -5083,55 +5105,216 @@ fn parseCNumLit(c: *Context, tok: *CToken, source_loc: ZigClangSourceLocation) P
             .LLU => "c_ulonglong",
             else => unreachable,
         }));
+        lit_bytes = lit_bytes[0 .. lit_bytes.len - switch (tok.id.IntegerLiteral) {
+            .U, .L => @as(u8, 1),
+            .LU, .LL => 2,
+            .LLU => 3,
+            else => unreachable,
+        }];
         _ = try appendToken(c, .Comma, ",");
         try cast_node.params.push(try transCreateNodeInt(c, lit_bytes));
         cast_node.rparen_token = try appendToken(c, .RParen, ")");
         return &cast_node.base;
-    } else if (tok.id == .NumLitFloat) {
-        if (tok.num_lit_suffix == .None) {
-            return transCreateNodeFloat(c, tok.bytes);
+    } else if (tok.id == .FloatLiteral) {
+        if (tok.id.FloatLiteral == .None) {
+            return transCreateNodeFloat(c, lit_bytes);
         }
         const cast_node = try transCreateNodeBuiltinFnCall(c, "@as");
-        try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.num_lit_suffix) {
+        try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.id.FloatLiteral) {
             .F => "f32",
             .L => "f64",
             else => unreachable,
         }));
         _ = try appendToken(c, .Comma, ",");
-        try cast_node.params.push(try transCreateNodeFloat(c, tok.bytes));
+        try cast_node.params.push(try transCreateNodeFloat(c, lit_bytes[0 .. lit_bytes.len - 1]));
         cast_node.rparen_token = try appendToken(c, .RParen, ")");
         return &cast_node.base;
     } else unreachable;
 }
 
-fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
+fn zigifyEscapeSequences(ctx: *Context, source: []const u8, name: []const u8, source_loc: ZigClangSourceLocation) ![]const u8 {
+    for (source) |c| {
+        if (c == '\\') {
+            break;
+        }
+    } else return source;
+    var bytes = try ctx.a().alloc(u8, source.len * 2);
+    var state: enum {
+        Start,
+        Escape,
+        Hex,
+        Octal,
+    } = .Start;
+    var i: usize = 0;
+    var count: u8 = 0;
+    var num: u8 = 0;
+    for (source) |c| {
+        switch (state) {
+            .Escape => {
+                switch (c) {
+                    'n', 'r', 't', '\\', '\'', '\"' => {
+                        bytes[i] = c;
+                    },
+                    '0'...'7' => {
+                        count += 1;
+                        num += c - '0';
+                        state = .Octal;
+                        bytes[i] = 'x';
+                    },
+                    'x' => {
+                        state = .Hex;
+                        bytes[i] = 'x';
+                    },
+                    'a' => {
+                        bytes[i] = 'x';
+                        i += 1;
+                        bytes[i] = '0';
+                        i += 1;
+                        bytes[i] = '7';
+                    },
+                    'b' => {
+                        bytes[i] = 'x';
+                        i += 1;
+                        bytes[i] = '0';
+                        i += 1;
+                        bytes[i] = '8';
+                    },
+                    'f' => {
+                        bytes[i] = 'x';
+                        i += 1;
+                        bytes[i] = '0';
+                        i += 1;
+                        bytes[i] = 'C';
+                    },
+                    'v' => {
+                        bytes[i] = 'x';
+                        i += 1;
+                        bytes[i] = '0';
+                        i += 1;
+                        bytes[i] = 'B';
+                    },
+                    '?' => {
+                        i -= 1;
+                        bytes[i] = '?';
+                    },
+                    'u', 'U' => {
+                        try failDecl(ctx, source_loc, name, "macro tokenizing failed: TODO unicode escape sequences", .{});
+                        return error.ParseError;
+                    },
+                    else => {
+                        try failDecl(ctx, source_loc, name, "macro tokenizing failed: unknown escape sequence", .{});
+                        return error.ParseError;
+                    },
+                }
+                i += 1;
+                if (state == .Escape)
+                    state = .Start;
+            },
+            .Start => {
+                if (c == '\\') {
+                    state = .Escape;
+                }
+                bytes[i] = c;
+                i += 1;
+            },
+            .Hex => {
+                switch (c) {
+                    '0'...'9' => {
+                        num = std.math.mul(u8, num, 16) catch {
+                            try failDecl(ctx, source_loc, name, "macro tokenizing failed: hex literal overflowed", .{});
+                            return error.ParseError;
+                        };
+                        num += c - '0';
+                    },
+                    'a'...'f' => {
+                        num = std.math.mul(u8, num, 16) catch {
+                            try failDecl(ctx, source_loc, name, "macro tokenizing failed: hex literal overflowed", .{});
+                            return error.ParseError;
+                        };
+                        num += c - 'a' + 10;
+                    },
+                    'A'...'F' => {
+                        num = std.math.mul(u8, num, 16) catch {
+                            try failDecl(ctx, source_loc, name, "macro tokenizing failed: hex literal overflowed", .{});
+                            return error.ParseError;
+                        };
+                        num += c - 'A' + 10;
+                    },
+                    else => {
+                        i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 });
+                        num = 0;
+                        if (c == '\\')
+                            state = .Escape
+                        else
+                            state = .Start;
+                        bytes[i] = c;
+                        i += 1;
+                    },
+                }
+            },
+            .Octal => {
+                const accept_digit = switch (c) {
+                    // The maximum length of a octal literal is 3 digits
+                    '0'...'7' => count < 3,
+                    else => false,
+                };
+
+                if (accept_digit) {
+                    count += 1;
+                    num = std.math.mul(u8, num, 8) catch {
+                        try failDecl(ctx, source_loc, name, "macro tokenizing failed: octal literal overflowed", .{});
+                        return error.ParseError;
+                    };
+                    num += c - '0';
+                } else {
+                    i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 });
+                    num = 0;
+                    count = 0;
+                    if (c == '\\')
+                        state = .Escape
+                    else
+                        state = .Start;
+                    bytes[i] = c;
+                    i += 1;
+                }
+            },
+        }
+    }
+    if (state == .Hex or state == .Octal)
+        i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 });
+    return bytes[0..i];
+}
+
+fn parseCPrimaryExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
     const tok = it.next().?;
     switch (tok.id) {
-        .CharLit => {
-            const token = try appendToken(c, .CharLiteral, tok.bytes);
+        .CharLiteral => {
+            const first_tok = it.list.at(0);
+            const token = try appendToken(c, .CharLiteral, try zigifyEscapeSequences(c, source[tok.start..tok.end], source[first_tok.start..first_tok.end], source_loc));
             const node = try c.a().create(ast.Node.CharLiteral);
             node.* = ast.Node.CharLiteral{
                 .token = token,
             };
             return &node.base;
         },
-        .StrLit => {
-            const token = try appendToken(c, .StringLiteral, tok.bytes);
+        .StringLiteral => {
+            const first_tok = it.list.at(0);
+            const token = try appendToken(c, .StringLiteral, try zigifyEscapeSequences(c, source[tok.start..tok.end], source[first_tok.start..first_tok.end], source_loc));
             const node = try c.a().create(ast.Node.StringLiteral);
             node.* = ast.Node.StringLiteral{
                 .token = token,
             };
             return &node.base;
         },
-        .NumLitInt, .NumLitFloat => {
-            return parseCNumLit(c, tok, source_loc);
+        .IntegerLiteral, .FloatLiteral => {
+            return parseCNumLit(c, tok, source, source_loc);
         },
         .Identifier => {
-            const mangled_name = scope.getAlias(tok.bytes);
+            const mangled_name = scope.getAlias(source[tok.start..tok.end]);
             return transCreateNodeIdentifier(c, mangled_name);
         },
         .LParen => {
-            const inner_node = try parseCExpr(c, it, source_loc, scope);
+            const inner_node = try parseCExpr(c, it, source, source_loc, scope);
 
             if (it.peek().?.id == .RParen) {
                 _ = it.next();
@@ -5144,13 +5327,14 @@ fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigC
             // hack to get zig fmt to render a comma in builtin calls
             _ = try appendToken(c, .Comma, ",");
 
-            const node_to_cast = try parseCExpr(c, it, source_loc, scope);
+            const node_to_cast = try parseCExpr(c, it, source, source_loc, scope);
 
             if (it.next().?.id != .RParen) {
+                const first_tok = it.list.at(0);
                 try failDecl(
                     c,
                     source_loc,
-                    it.list.at(0).*.bytes,
+                    source[first_tok.start..first_tok.end],
                     "unable to translate C expr: expected ')''",
                     .{},
                 );
@@ -5228,10 +5412,11 @@ fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigC
             return &if_1.base;
         },
         else => {
+            const first_tok = it.list.at(0);
             try failDecl(
                 c,
                 source_loc,
-                it.list.at(0).*.bytes,
+                source[first_tok.start..first_tok.end],
                 "unable to translate C expr: unexpected token {}",
                 .{tok.id},
             );
@@ -5240,33 +5425,35 @@ fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigC
     }
 }
 
-fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
-    var node = try parseCPrimaryExpr(c, it, source_loc, scope);
+fn parseCSuffixOpExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
+    var node = try parseCPrimaryExpr(c, it, source, source_loc, scope);
     while (true) {
         const tok = it.next().?;
         switch (tok.id) {
-            .Dot => {
+            .Period => {
                 const name_tok = it.next().?;
                 if (name_tok.id != .Identifier) {
+                    const first_tok = it.list.at(0);
                     try failDecl(
                         c,
                         source_loc,
-                        it.list.at(0).*.bytes,
+                        source[first_tok.start..first_tok.end],
                         "unable to translate C expr: expected identifier",
                         .{},
                     );
                     return error.ParseError;
                 }
 
-                node = try transCreateNodeFieldAccess(c, node, name_tok.bytes);
+                node = try transCreateNodeFieldAccess(c, node, source[name_tok.start..name_tok.end]);
             },
             .Arrow => {
                 const name_tok = it.next().?;
                 if (name_tok.id != .Identifier) {
+                    const first_tok = it.list.at(0);
                     try failDecl(
                         c,
                         source_loc,
-                        it.list.at(0).*.bytes,
+                        source[first_tok.start..first_tok.end],
                         "unable to translate C expr: expected identifier",
                         .{},
                     );
@@ -5274,7 +5461,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 }
 
                 const deref = try transCreateNodePtrDeref(c, node);
-                node = try transCreateNodeFieldAccess(c, deref, name_tok.bytes);
+                node = try transCreateNodeFieldAccess(c, deref, source[name_tok.start..name_tok.end]);
             },
             .Asterisk => {
                 if (it.peek().?.id == .RParen) {
@@ -5289,7 +5476,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 } else {
                     // expr * expr
                     const op_token = try appendToken(c, .Asterisk, "*");
-                    const rhs = try parseCPrimaryExpr(c, it, source_loc, scope);
+                    const rhs = try parseCPrimaryExpr(c, it, source, source_loc, scope);
                     const mul_node = try c.a().create(ast.Node.InfixOp);
                     mul_node.* = .{
                         .op_token = op_token,
@@ -5300,9 +5487,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                     node = &mul_node.base;
                 }
             },
-            .Shl => {
+            .AngleBracketAngleBracketLeft => {
                 const op_token = try appendToken(c, .AngleBracketAngleBracketLeft, "<<");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const bitshift_node = try c.a().create(ast.Node.InfixOp);
                 bitshift_node.* = .{
                     .op_token = op_token,
@@ -5312,9 +5499,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 };
                 node = &bitshift_node.base;
             },
-            .Shr => {
+            .AngleBracketAngleBracketRight => {
                 const op_token = try appendToken(c, .AngleBracketAngleBracketRight, ">>");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const bitshift_node = try c.a().create(ast.Node.InfixOp);
                 bitshift_node.* = .{
                     .op_token = op_token,
@@ -5326,7 +5513,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
             },
             .Pipe => {
                 const op_token = try appendToken(c, .Pipe, "|");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const or_node = try c.a().create(ast.Node.InfixOp);
                 or_node.* = .{
                     .op_token = op_token,
@@ -5338,7 +5525,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
             },
             .Ampersand => {
                 const op_token = try appendToken(c, .Ampersand, "&");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const bitand_node = try c.a().create(ast.Node.InfixOp);
                 bitand_node.* = .{
                     .op_token = op_token,
@@ -5350,7 +5537,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
             },
             .Plus => {
                 const op_token = try appendToken(c, .Plus, "+");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const add_node = try c.a().create(ast.Node.InfixOp);
                 add_node.* = .{
                     .op_token = op_token,
@@ -5362,7 +5549,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
             },
             .Minus => {
                 const op_token = try appendToken(c, .Minus, "-");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const sub_node = try c.a().create(ast.Node.InfixOp);
                 sub_node.* = .{
                     .op_token = op_token,
@@ -5372,9 +5559,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 };
                 node = &sub_node.base;
             },
-            .And => {
+            .AmpersandAmpersand => {
                 const op_token = try appendToken(c, .Keyword_and, "and");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const and_node = try c.a().create(ast.Node.InfixOp);
                 and_node.* = .{
                     .op_token = op_token,
@@ -5384,9 +5571,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 };
                 node = &and_node.base;
             },
-            .Or => {
+            .PipePipe => {
                 const op_token = try appendToken(c, .Keyword_or, "or");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const or_node = try c.a().create(ast.Node.InfixOp);
                 or_node.* = .{
                     .op_token = op_token,
@@ -5396,9 +5583,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 };
                 node = &or_node.base;
             },
-            .Gt => {
+            .AngleBracketRight => {
                 const op_token = try appendToken(c, .AngleBracketRight, ">");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const and_node = try c.a().create(ast.Node.InfixOp);
                 and_node.* = .{
                     .op_token = op_token,
@@ -5408,9 +5595,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 };
                 node = &and_node.base;
             },
-            .Gte => {
+            .AngleBracketRightEqual => {
                 const op_token = try appendToken(c, .AngleBracketRightEqual, ">=");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const and_node = try c.a().create(ast.Node.InfixOp);
                 and_node.* = .{
                     .op_token = op_token,
@@ -5420,9 +5607,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 };
                 node = &and_node.base;
             },
-            .Lt => {
+            .AngleBracketLeft => {
                 const op_token = try appendToken(c, .AngleBracketLeft, "<");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const and_node = try c.a().create(ast.Node.InfixOp);
                 and_node.* = .{
                     .op_token = op_token,
@@ -5432,9 +5619,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                 };
                 node = &and_node.base;
             },
-            .Lte => {
+            .AngleBracketLeftEqual => {
                 const op_token = try appendToken(c, .AngleBracketLeftEqual, "<=");
-                const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 const and_node = try c.a().create(ast.Node.InfixOp);
                 and_node.* = .{
                     .op_token = op_token,
@@ -5446,14 +5633,15 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
             },
             .LBrace => {
                 const arr_node = try transCreateNodeArrayAccess(c, node);
-                arr_node.op.ArrayAccess = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                arr_node.op.ArrayAccess = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                 arr_node.rtoken = try appendToken(c, .RBrace, "]");
                 node = &arr_node.base;
                 if (it.next().?.id != .RBrace) {
+                    const first_tok = it.list.at(0);
                     try failDecl(
                         c,
                         source_loc,
-                        it.list.at(0).*.bytes,
+                        source[first_tok.start..first_tok.end],
                         "unable to translate C expr: expected ']'",
                         .{},
                     );
@@ -5463,7 +5651,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
             .LParen => {
                 const call_node = try transCreateNodeFnCall(c, node);
                 while (true) {
-                    const arg = try parseCPrefixOpExpr(c, it, source_loc, scope);
+                    const arg = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
                     try call_node.op.Call.params.push(arg);
                     const next = it.next().?;
                     if (next.id == .Comma)
@@ -5471,10 +5659,11 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
                     else if (next.id == .RParen)
                         break
                     else {
+                        const first_tok = it.list.at(0);
                         try failDecl(
                             c,
                             source_loc,
-                            it.list.at(0).*.bytes,
+                            source[first_tok.start..first_tok.end],
                             "unable to translate C expr: expected ',' or ')'",
                             .{},
                         );
@@ -5492,32 +5681,32 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig
     }
 }
 
-fn parseCPrefixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
+fn parseCPrefixOpExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node {
     const op_tok = it.next().?;
 
     switch (op_tok.id) {
         .Bang => {
             const node = try transCreateNodePrefixOp(c, .BoolNot, .Bang, "!");
-            node.rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+            node.rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
             return &node.base;
         },
         .Minus => {
             const node = try transCreateNodePrefixOp(c, .Negation, .Minus, "-");
-            node.rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+            node.rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
             return &node.base;
         },
         .Tilde => {
             const node = try transCreateNodePrefixOp(c, .BitNot, .Tilde, "~");
-            node.rhs = try parseCPrefixOpExpr(c, it, source_loc, scope);
+            node.rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
             return &node.base;
         },
         .Asterisk => {
-            const prefix_op_expr = try parseCPrefixOpExpr(c, it, source_loc, scope);
+            const prefix_op_expr = try parseCPrefixOpExpr(c, it, source, source_loc, scope);
             return try transCreateNodePtrDeref(c, prefix_op_expr);
         },
         else => {
             _ = it.prev();
-            return try parseCSuffixOpExpr(c, it, source_loc, scope);
+            return try parseCSuffixOpExpr(c, it, source, source_loc, scope);
         },
     }
 }