Commit ab60c8e28f

Vexu <git@vexu.eu>
2019-12-15 23:22:41
c tokenizer escape sequences
1 parent 9f0e83a
Changed files (4)
src-self-hosted/c_tokenizer.zig
@@ -40,12 +40,15 @@ pub const CToken = struct {
     };
 };
 
-pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
+pub fn tokenizeCMacro(tl: *TokenList, chars: [*:0]const u8) !void {
     var index: usize = 0;
     var first = true;
     while (true) {
         const tok = try next(chars, &index);
-        try tl.push(tok);
+        if (tok.id == .StrLit or tok.id == .CharLit)
+            try tl.push(try zigifyEscapeSequences(tl.allocator, tok))
+        else
+            try tl.push(tok);
         if (tok.id == .Eof)
             return;
         if (first) {
@@ -61,7 +64,83 @@ pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
     }
 }
 
-fn next(chars: [*]const u8, i: *usize) !CToken {
+fn zigifyEscapeSequences(allocator: *std.mem.Allocator, tok: CToken) !CToken {
+    for (tok.bytes) |c| {
+        if (c == '\\') {
+            break;
+        }
+    } else return tok;
+    var bytes = try allocator.alloc(u8, tok.bytes.len * 2);
+    var escape = false;
+    var i: usize = 0;
+    for (tok.bytes) |c| {
+        if (escape) {
+            switch (c) {
+                'n', 'r', 't', '\\', '\'', '\"', 'x' => {
+                    bytes[i] = c;
+                },
+                'a' => {
+                    bytes[i] = 'x';
+                    i += 1;
+                    bytes[i] = '0';
+                    i += 1;
+                    bytes[i] = '7';
+                },
+                'b' => {
+                    bytes[i] = 'x';
+                    i += 1;
+                    bytes[i] = '0';
+                    i += 1;
+                    bytes[i] = '8';
+                },
+                'f' => {
+                    bytes[i] = 'x';
+                    i += 1;
+                    bytes[i] = '0';
+                    i += 1;
+                    bytes[i] = 'C';
+                },
+                'v' => {
+                    bytes[i] = 'x';
+                    i += 1;
+                    bytes[i] = '0';
+                    i += 1;
+                    bytes[i] = 'B';
+                },
+                '?' => {
+                    i -= 1;
+                    bytes[i] = '?';
+                },
+                'u', 'U' => {
+                    // TODO unicode escape sequences
+                    return error.TokenizingFailed;
+                },
+                '0'...'7' => {
+                    // TODO octal escape sequences
+                    return error.TokenizingFailed;
+                },
+                else => {
+                    // unknown escape sequence
+                    return error.TokenizingFailed;
+                },
+            }
+            i += 1;
+            escape = false;
+        } else {
+            if (c == '\\') {
+                escape = true;
+            }
+            bytes[i] = c;
+            i += 1;
+        }
+    }
+    return CToken{
+        .id = tok.id,
+        .bytes = bytes[0..i],
+    };
+}
+
+fn next(chars: [*:0]const u8, i: *usize) !CToken {
     var state: enum {
         Start,
         GotLt,
@@ -462,7 +541,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
             .String => { // TODO char escapes
                 switch (c) {
                     '\"' => {
-                        result.bytes = chars[begin_index + 1 .. i.* - 1];
+                        result.bytes = chars[begin_index..i.*];
                         return result;
                     },
                     else => {},
@@ -471,7 +550,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
             .CharLit => {
                 switch (c) {
                     '\'' => {
-                        result.bytes = chars[begin_index + 1 .. i.* - 1];
+                        result.bytes = chars[begin_index..i.*];
                         return result;
                     },
                     else => {},
src-self-hosted/clang.zig
@@ -734,7 +734,7 @@ pub extern fn ZigClangSourceManager_getSpellingLoc(self: ?*const struct_ZigClang
 pub extern fn ZigClangSourceManager_getFilename(self: *const struct_ZigClangSourceManager, SpellingLoc: struct_ZigClangSourceLocation) ?[*:0]const u8;
 pub extern fn ZigClangSourceManager_getSpellingLineNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
 pub extern fn ZigClangSourceManager_getSpellingColumnNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
-pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*c]const u8;
+pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*:0]const u8;
 pub extern fn ZigClangASTContext_getPointerType(self: ?*const struct_ZigClangASTContext, T: struct_ZigClangQualType) struct_ZigClangQualType;
 pub extern fn ZigClangASTUnit_getASTContext(self: ?*struct_ZigClangASTUnit) ?*struct_ZigClangASTContext;
 pub extern fn ZigClangASTUnit_getSourceManager(self: *struct_ZigClangASTUnit) *struct_ZigClangSourceManager;
src-self-hosted/translate_c.zig
@@ -2629,9 +2629,9 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
                 } else false;
 
                 (if (macro_fn)
-                    transMacroFnDefine(c, &tok_it, name, begin_c, begin_loc)
+                    transMacroFnDefine(c, &tok_it, name, begin_loc)
                 else
-                    transMacroDefine(c, &tok_it, name, begin_c, begin_loc)) catch |err| switch (err) {
+                    transMacroDefine(c, &tok_it, name, begin_loc)) catch |err| switch (err) {
                     error.UnsupportedTranslation,
                     error.ParseError,
                     => try failDecl(c, begin_loc, name, "unable to translate macro", .{}),
@@ -2643,7 +2643,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
     }
 }
 
-fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
+fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
     const rp = makeRestorePoint(c);
 
     const visib_tok = try appendToken(c, .Keyword_pub, "pub");
@@ -2674,7 +2674,7 @@ fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8,
     _ = try c.macro_table.put(name, &node.base);
 }
 
-fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
+fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
     const rp = makeRestorePoint(c);
     const pub_tok = try appendToken(c, .Keyword_pub, "pub");
     const inline_tok = try appendToken(c, .Keyword_inline, "inline");
@@ -2829,11 +2829,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
     const tok = it.next().?;
     switch (tok.id) {
         .CharLit => {
-            const buf = try rp.c.a().alloc(u8, tok.bytes.len + "''".len);
-            buf[0] = '\'';
-            writeEscapedString(buf[1..], tok.bytes);
-            buf[buf.len - 1] = '\'';
-            const token = try appendToken(rp.c, .CharLiteral, buf);
+            const token = try appendToken(rp.c, .CharLiteral, tok.bytes);
             const node = try rp.c.a().create(ast.Node.CharLiteral);
             node.* = ast.Node.CharLiteral{
                 .token = token,
@@ -2841,11 +2837,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
             return &node.base;
         },
         .StrLit => {
-            const buf = try rp.c.a().alloc(u8, tok.bytes.len + "\"\"".len);
-            buf[0] = '"';
-            writeEscapedString(buf[1..], tok.bytes);
-            buf[buf.len - 1] = '"';
-            const token = try appendToken(rp.c, .StringLiteral, buf);
+            const token = try appendToken(rp.c, .StringLiteral, tok.bytes);
             const node = try rp.c.a().create(ast.Node.StringLiteral);
             node.* = ast.Node.StringLiteral{
                 .token = token,
test/translate_c.zig
@@ -411,6 +411,15 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\}
     });
 
+    cases.add_2("macro escape sequences",
+        \\#define FOO "aoeu\xab derp"
+        \\#define FOO2 "aoeu\a derp"
+    , &[_][]const u8{
+        \\pub const FOO = "aoeu\xab derp";
+    ,
+        \\pub const FOO2 = "aoeu\x07 derp";
+    });
+
     /////////////// Cases for only stage1 which are TODO items for stage2 ////////////////
 
     cases.add_both("typedef of function in struct field",