Commit 275410dc33

Andrew Kelley <superjoe30@gmail.com>
2016-08-05 08:10:12
implement multiline C string syntax
1 parent d0b11af
Changed files (2)
src/tokenizer.cpp
@@ -189,6 +189,7 @@ enum TokenizeState {
     TokenizeStateLineString,
     TokenizeStateLineStringEnd,
     TokenizeStateLineStringContinue,
+    TokenizeStateLineStringContinueC,
     TokenizeStateSawEq,
     TokenizeStateSawBang,
     TokenizeStateSawLessThan,
@@ -932,6 +933,30 @@ void tokenize(Buf *buf, Tokenization *out) {
                 switch (c) {
                     case WHITESPACE:
                         break;
+                    case 'c':
+                        if (!t.cur_tok->data.str_lit.is_c_str) {
+                            t.pos -= 1;
+                            end_token(&t);
+                            t.state = TokenizeStateStart;
+                            break;
+                        }
+                        t.state = TokenizeStateLineStringContinueC;
+                        break;
+                    case '\\':
+                        if (t.cur_tok->data.str_lit.is_c_str) {
+                            tokenize_error(&t, "invalid character: '%c'", c);
+                        }
+                        t.state = TokenizeStateLineStringContinue;
+                        break;
+                    default:
+                        t.pos -= 1;
+                        end_token(&t);
+                        t.state = TokenizeStateStart;
+                        continue;
+                }
+                break;
+            case TokenizeStateLineStringContinueC:
+                switch (c) {
                     case '\\':
                         t.state = TokenizeStateLineStringContinue;
                         break;
@@ -970,6 +995,11 @@ void tokenize(Buf *buf, Tokenization *out) {
                         t.cur_tok->data.str_lit.is_c_str = true;
                         t.state = TokenizeStateString;
                         break;
+                    case '\\':
+                        set_token_id(&t, t.cur_tok, TokenIdStringLiteral);
+                        t.cur_tok->data.str_lit.is_c_str = true;
+                        t.state = TokenizeStateSawBackslash;
+                        break;
                     case SYMBOL_CHAR:
                         t.state = TokenizeStateSymbol;
                         buf_append_char(&t.cur_tok->data.str_lit.str, c);
@@ -1386,6 +1416,7 @@ void tokenize(Buf *buf, Tokenization *out) {
         case TokenizeStateSawDotDot:
         case TokenizeStateSawBackslash:
         case TokenizeStateLineStringContinue:
+        case TokenizeStateLineStringContinueC:
             tokenize_error(&t, "unexpected EOF");
             break;
         case TokenizeStateLineComment:
test/self_hosted.zig
@@ -693,6 +693,17 @@ fn multiline_string() {
     assert(str.eql(s1, s2));
 }
 
+#attribute("test")
+fn multiline_c_string() {
+    const s1 =
+        c\\one
+        c\\two)
+        c\\three
+    ;
+    const s2 = c"one\ntwo)\nthree";
+    assert(cstr.cmp(s1, s2) == 0);
+}
+
 
 
 #attribute("test")