Commit a5251a1c10

Andrew Kelley <superjoe30@gmail.com>
2016-07-09 21:17:31
parseh: support octal in C macro string literal
1 parent 100e8e1
src/ast_render.cpp
@@ -251,7 +251,7 @@ static bool is_digit(uint8_t c) {
 }
 
 static bool is_printable(uint8_t c) {
-    return is_alpha_under(c) || is_digit(c);
+    return is_alpha_under(c) || is_digit(c) || c == ' ';
 }
 
 static void string_literal_escape(Buf *source, Buf *dest) {
@@ -463,10 +463,14 @@ static void render_node(AstRender *ar, AstNode *node) {
             }
             break;
         case NodeTypeStringLiteral:
-            if (node->data.string_literal.c) {
-                fprintf(ar->f, "c");
+            {
+                if (node->data.string_literal.c) {
+                    fprintf(ar->f, "c");
+                }
+                Buf tmp_buf = BUF_INIT;
+                string_literal_escape(&node->data.string_literal.buf, &tmp_buf);
+                fprintf(ar->f, "\"%s\"", buf_ptr(&tmp_buf));
             }
-            fprintf(ar->f, "\"%s\"", buf_ptr(&node->data.string_literal.buf));
             break;
         case NodeTypeCharLiteral:
             {
src/c_tokenizer.cpp
@@ -539,8 +539,17 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
                     case 'v':
                         add_char(ctok, '\v');
                         break;
-                    case DIGIT:
-                        zig_panic("TODO octal");
+                    case '0':
+                    case '1':
+                    case '2':
+                    case '3':
+                    case '4':
+                    case '5':
+                    case '6':
+                    case '7':
+                        ctok->state = CTokStateStrOctal;
+                        ctok->cur_char = *c - '0';
+                        ctok->octal_index = 1;
                         break;
                     case 'x':
                         zig_panic("TODO hex");
@@ -555,6 +564,53 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
                         return mark_error(ctok);
                 }
                 break;
+            case CTokStateStrOctal:
+                switch (*c) {
+                    case '0':
+                    case '1':
+                    case '2':
+                    case '3':
+                    case '4':
+                    case '5':
+                    case '6':
+                    case '7':
+                        // TODO @mul_with_overflow
+                        if (((long)ctok->cur_char) * 8 >= 256) {
+                            zig_panic("TODO");
+                        }
+                        ctok->cur_char *= 8;
+                        // TODO @add_with_overflow
+                        if (((long)ctok->cur_char) + (long)(*c - '0') >= 256) {
+                            zig_panic("TODO");
+                        }
+                        ctok->cur_char += *c - '0';
+                        ctok->octal_index += 1;
+                        if (ctok->octal_index == 3) {
+                            if (ctok->cur_tok->id == CTokIdStrLit) {
+                                add_char(ctok, ctok->cur_char);
+                                ctok->state = CTokStateString;
+                            } else if (ctok->cur_tok->id == CTokIdCharLit) {
+                                ctok->cur_tok->data.char_lit = ctok->cur_char;
+                                ctok->state = CTokStateExpectEndQuot;
+                            } else {
+                                zig_unreachable();
+                            }
+                        }
+                        break;
+                    default:
+                        c -= 1;
+                        if (ctok->cur_tok->id == CTokIdStrLit) {
+                            add_char(ctok, ctok->cur_char);
+                            ctok->state = CTokStateString;
+                        } else if (ctok->cur_tok->id == CTokIdCharLit) {
+                            ctok->cur_tok->data.char_lit = ctok->cur_char;
+                            ctok->state = CTokStateExpectEndQuot;
+                        } else {
+                            zig_unreachable();
+                        }
+                        continue;
+                }
+                break;
             case CTokStateExpectEndQuot:
                 switch (*c) {
                     case '\'':
@@ -644,6 +700,7 @@ found_end_of_macro:
         case CTokStateString:
         case CTokStateExpSign:
         case CTokStateFloatExpFirst:
+        case CTokStateStrOctal:
             return mark_error(ctok);
     }
 
src/c_tokenizer.hpp
@@ -53,6 +53,7 @@ enum CTokState {
     CTokStateExpSign,
     CTokStateFloatExp,
     CTokStateFloatExpFirst,
+    CTokStateStrOctal,
 };
 
 struct CTokenize {
@@ -63,6 +64,8 @@ struct CTokenize {
     Buf buf;
     bool unsigned_suffix;
     bool long_suffix;
+    uint8_t cur_char;
+    int octal_index;
 };
 
 void tokenize_c_macro(CTokenize *ctok, const uint8_t *c);
test/run_tests.cpp
@@ -1731,6 +1731,15 @@ struct type {
     )SOURCE", 2, R"(export struct struct_type {
     @"defer": c_int,
 })", R"(pub const @"type" = struct_type;)");
+
+    add_parseh_case("macro defines string literal with octal", R"SOURCE(
+#define FOO "aoeu\023 derp"
+#define FOO2 "aoeu\0234 derp"
+#define FOO_CHAR '\077'
+    )SOURCE", 3,
+            R"(pub const FOO = c"aoeu\x13 derp")",
+            R"(pub const FOO2 = c"aoeu\x134 derp")",
+            R"(pub const FOO_CHAR = '\x3f')");
 }
 
 static void run_self_hosted_test(bool is_release_mode) {