Commit 0ea96c11ef

Vexu <git@vexu.eu>
2020-01-14 20:24:39
disallow multiline strings in test and library names
1 parent 0a41051
doc/langref.html.in
@@ -10212,13 +10212,13 @@ ContainerMembers
      / ContainerField
      /
 
-TestDecl &lt;- KEYWORD_test STRINGLITERAL Block
+TestDecl &lt;- KEYWORD_test STRINGLITERALSINGLE Block
 
 TopLevelComptime &lt;- KEYWORD_comptime BlockExpr
 
 TopLevelDecl
-    &lt;- (KEYWORD_export / KEYWORD_extern STRINGLITERAL? / KEYWORD_inline)? FnProto (SEMICOLON / Block)
-     / (KEYWORD_export / KEYWORD_extern STRINGLITERAL?)? KEYWORD_threadlocal? VarDecl
+    &lt;- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline)? FnProto (SEMICOLON / Block)
+     / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl
      / KEYWORD_usingnamespace Expr SEMICOLON
 
 FnProto &lt;- FnCC? KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? LinkSection? EXCLAMATIONMARK? (KEYWORD_var / TypeExpr)
@@ -10561,10 +10561,10 @@ INTEGER
      / "0o" [0-7]+ skip
      / "0x" hex+   skip
      /      [0-9]+ skip
+STRINGLITERALSINGLE &lt;- "\"" string_char* "\"" skip
 STRINGLITERAL
-    &lt;- "\"" string_char* "\"" skip
+    &lt;- STRINGLITERALSINGLE
      / line_string                 skip
-     / line_cstring                skip
 IDENTIFIER
     &lt;- !keyword [A-Za-z_] [A-Za-z0-9_]* skip
      / "@\"" string_char* "\""                            skip
lib/std/zig/parse.zig
@@ -183,10 +183,10 @@ fn parseContainerDocComments(arena: *Allocator, it: *TokenIterator, tree: *Tree)
     return &node.base;
 }
 
-/// TestDecl <- KEYWORD_test STRINGLITERAL Block
+/// TestDecl <- KEYWORD_test STRINGLITERALSINGLE Block
 fn parseTestDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
     const test_token = eatToken(it, .Keyword_test) orelse return null;
-    const name_node = try expectNode(arena, it, tree, parseStringLiteral, AstError{
+    const name_node = try expectNode(arena, it, tree, parseStringLiteralSingle, AstError{
         .ExpectedStringLiteral = AstError.ExpectedStringLiteral{ .token = it.index },
     });
     const block_node = try expectNode(arena, it, tree, parseBlock, AstError{
@@ -225,15 +225,15 @@ fn parseTopLevelComptime(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*
 }
 
 /// TopLevelDecl
-///     <- (KEYWORD_export / KEYWORD_extern STRINGLITERAL? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block)
-///      / (KEYWORD_export / KEYWORD_extern STRINGLITERAL?)? KEYWORD_threadlocal? VarDecl
+///     <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block)
+///      / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl
 ///      / KEYWORD_usingnamespace Expr SEMICOLON
 fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
     var lib_name: ?*Node = null;
     const extern_export_inline_token = blk: {
         if (eatToken(it, .Keyword_export)) |token| break :blk token;
         if (eatToken(it, .Keyword_extern)) |token| {
-            lib_name = try parseStringLiteral(arena, it, tree);
+            lib_name = try parseStringLiteralSingle(arena, it, tree);
             break :blk token;
         }
         if (eatToken(it, .Keyword_inline)) |token| break :blk token;
@@ -285,12 +285,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node
 
     if (extern_export_inline_token) |token| {
         if (lib_name) |string_literal_node|
-            if (string_literal_node.cast(Node.StringLiteral)) |single| {
-                putBackToken(it, single.token);
-            } else if (string_literal_node.cast(Node.MultilineStringLiteral)) |multi| {
-                while (multi.lines.pop()) |line|
-                    putBackToken(it, line);
-            } else unreachable;
+            putBackToken(it, string_literal_node.cast(Node.StringLiteral).?.token);
         putBackToken(it, token);
         return null;
     }
@@ -2776,8 +2771,7 @@ fn createLiteral(arena: *Allocator, comptime T: type, token: TokenIndex) !*Node
     return &result.base;
 }
 
-// string literal or multiline string literal
-fn parseStringLiteral(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
+fn parseStringLiteralSingle(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
     if (eatToken(it, .StringLiteral)) |token| {
         const node = try arena.create(Node.StringLiteral);
         node.* = Node.StringLiteral{
@@ -2785,6 +2779,12 @@ fn parseStringLiteral(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Nod
         };
         return &node.base;
     }
+    return null;
+}
+
+// string literal or multiline string literal
+fn parseStringLiteral(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
+    if (try parseStringLiteralSingle(arena, it, tree)) |node| return node;
 
     if (eatToken(it, .MultilineStringLiteralLine)) |first_line| {
         const node = try arena.create(Node.MultilineStringLiteral);
lib/std/zig/parser_test.zig
@@ -2721,16 +2721,6 @@ test "zig fmt: extern without container keyword returns error" {
     );
 }
 
-test "zig fmt: extern multiline lib name" {
-    try testError(
-        \\extern \\super
-        \\    \\long
-        \\    \\library
-        \\    \\name
-        \\
-    );
-}
-
 const std = @import("std");
 const mem = std.mem;
 const warn = std.debug.warn;
src/parser.cpp
@@ -141,16 +141,9 @@ static void ast_error(ParseContext *pc, Token *token, const char *format, ...) {
     exit(EXIT_FAILURE);
 }
 
-static Buf ast_token_str(Buf *input, Token *token) {
-    Buf str = BUF_INIT;
-    buf_init_from_mem(&str, buf_ptr(input) + token->start_pos, token->end_pos - token->start_pos);
-    return str;
-}
-
 ATTRIBUTE_NORETURN
 static void ast_invalid_token_error(ParseContext *pc, Token *token) {
-    Buf token_value = ast_token_str(pc->buf, token);
-    ast_error(pc, token, "invalid token: '%s'", buf_ptr(&token_value));
+    ast_error(pc, token, "invalid token: '%s'", token_name(token->id));
 }
 
 static AstNode *ast_create_node_no_line_info(ParseContext *pc, NodeType type) {
@@ -213,7 +206,7 @@ static void put_back_token(ParseContext *pc) {
 static Buf *token_buf(Token *token) {
     if (token == nullptr)
         return nullptr;
-    assert(token->id == TokenIdStringLiteral || token->id == TokenIdSymbol);
+    assert(token->id == TokenIdStringLiteral || token->id == TokenIdMultilineStringLiteral || token->id == TokenIdSymbol);
     return &token->data.str_lit.str;
 }
 
@@ -596,7 +589,7 @@ static AstNodeContainerDecl ast_parse_container_members(ParseContext *pc) {
     return res;
 }
 
-// TestDecl <- KEYWORD_test STRINGLITERAL Block
+// TestDecl <- KEYWORD_test STRINGLITERALSINGLE Block
 static AstNode *ast_parse_test_decl(ParseContext *pc) {
     Token *test = eat_token_if(pc, TokenIdKeywordTest);
     if (test == nullptr)
@@ -630,8 +623,8 @@ static AstNode *ast_parse_top_level_comptime(ParseContext *pc) {
 }
 
 // TopLevelDecl
-//     <- (KEYWORD_export / KEYWORD_extern STRINGLITERAL? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block)
-//      / (KEYWORD_export / KEYWORD_extern STRINGLITERAL?)? KEYWORD_threadlocal? VarDecl
+//     <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block)
+//      / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl
 //      / KEYWORD_use Expr SEMICOLON
 static AstNode *ast_parse_top_level_decl(ParseContext *pc, VisibMod visib_mod, Buf *doc_comments) {
     Token *first = eat_token_if(pc, TokenIdKeywordExport);
@@ -1729,6 +1722,8 @@ static AstNode *ast_parse_primary_type_expr(ParseContext *pc) {
         return ast_create_node(pc, NodeTypeUnreachable, unreachable);
 
     Token *string_lit = eat_token_if(pc, TokenIdStringLiteral);
+    if (string_lit == nullptr)
+        string_lit = eat_token_if(pc, TokenIdMultilineStringLiteral);
     if (string_lit != nullptr) {
         AstNode *res = ast_create_node(pc, NodeTypeStringLiteral, string_lit);
         res->data.string_literal.buf = token_buf(string_lit);
@@ -1957,7 +1952,9 @@ static AsmOutput *ast_parse_asm_output_item(ParseContext *pc) {
     Token *sym_name = expect_token(pc, TokenIdSymbol);
     expect_token(pc, TokenIdRBracket);
 
-    Token *str = expect_token(pc, TokenIdStringLiteral);
+    Token *str = eat_token_if(pc, TokenIdMultilineStringLiteral);
+    if (str == nullptr)
+        str = expect_token(pc, TokenIdStringLiteral);
     expect_token(pc, TokenIdLParen);
 
     Token *var_name = eat_token_if(pc, TokenIdSymbol);
@@ -1999,7 +1996,9 @@ static AsmInput *ast_parse_asm_input_item(ParseContext *pc) {
     Token *sym_name = expect_token(pc, TokenIdSymbol);
     expect_token(pc, TokenIdRBracket);
 
-    Token *constraint = expect_token(pc, TokenIdStringLiteral);
+    Token *constraint = eat_token_if(pc, TokenIdMultilineStringLiteral);
+    if (constraint == nullptr)
+        constraint = expect_token(pc, TokenIdStringLiteral);
     expect_token(pc, TokenIdLParen);
     AstNode *expr = ast_expect(pc, ast_parse_expr);
     expect_token(pc, TokenIdRParen);
@@ -2018,6 +2017,8 @@ static AstNode *ast_parse_asm_clobbers(ParseContext *pc) {
 
     ZigList<Buf *> clobber_list = ast_parse_list<Buf>(pc, TokenIdComma, [](ParseContext *context) {
         Token *str = eat_token_if(context, TokenIdStringLiteral);
+        if (str == nullptr)
+            str = eat_token_if(context, TokenIdMultilineStringLiteral);
         if (str != nullptr)
             return token_buf(str);
         return (Buf*)nullptr;
src/tokenizer.cpp
@@ -209,7 +209,6 @@ enum TokenizeState {
     TokenizeStateLineString,
     TokenizeStateLineStringEnd,
     TokenizeStateLineStringContinue,
-    TokenizeStateLineStringContinueC,
     TokenizeStateSawEq,
     TokenizeStateSawBang,
     TokenizeStateSawLessThan,
@@ -266,7 +265,7 @@ static void set_token_id(Tokenize *t, Token *token, TokenId id) {
     } else if (id == TokenIdFloatLiteral) {
         bigfloat_init_32(&token->data.float_lit.bigfloat, 0.0f);
         token->data.float_lit.overflow = false;
-    } else if (id == TokenIdStringLiteral || id == TokenIdSymbol) {
+    } else if (id == TokenIdStringLiteral || id == TokenIdMultilineStringLiteral || id == TokenIdSymbol) {
         memset(&token->data.str_lit.str, 0, sizeof(Buf));
         buf_resize(&token->data.str_lit.str, 0);
     }
@@ -503,7 +502,7 @@ void tokenize(Buf *buf, Tokenization *out) {
                         t.state = TokenizeStateSawSlash;
                         break;
                     case '\\':
-                        begin_token(&t, TokenIdStringLiteral);
+                        begin_token(&t, TokenIdMultilineStringLiteral);
                         t.state = TokenizeStateSawBackslash;
                         break;
                     case '%':
@@ -945,18 +944,6 @@ void tokenize(Buf *buf, Tokenization *out) {
                         continue;
                 }
                 break;
-            case TokenizeStateLineStringContinueC:
-                switch (c) {
-                    case '\\':
-                        t.state = TokenizeStateLineStringContinue;
-                        break;
-                    default:
-                        t.pos -= 1;
-                        end_token(&t);
-                        t.state = TokenizeStateStart;
-                        continue;
-                }
-                break;
             case TokenizeStateLineStringContinue:
                 switch (c) {
                     case '\\':
@@ -1471,7 +1458,6 @@ void tokenize(Buf *buf, Tokenization *out) {
         case TokenizeStateSawDotDot:
         case TokenizeStateSawBackslash:
         case TokenizeStateLineStringContinue:
-        case TokenizeStateLineStringContinueC:
             tokenize_error(&t, "unexpected EOF");
             break;
         case TokenizeStateLineComment:
@@ -1607,6 +1593,7 @@ const char * token_name(TokenId id) {
         case TokenIdStar: return "*";
         case TokenIdStarStar: return "**";
         case TokenIdStringLiteral: return "StringLiteral";
+        case TokenIdMultilineStringLiteral: return "MultilineStringLiteral";
         case TokenIdSymbol: return "Symbol";
         case TokenIdTilde: return "~";
         case TokenIdTimesEq: return "*=";
src/tokenizer.hpp
@@ -124,6 +124,7 @@ enum TokenId {
     TokenIdStar,
     TokenIdStarStar,
     TokenIdStringLiteral,
+    TokenIdMultilineStringLiteral,
     TokenIdSymbol,
     TokenIdTilde,
     TokenIdTimesEq,
@@ -165,7 +166,7 @@ struct Token {
         // TokenIdFloatLiteral
         TokenFloatLit float_lit;
 
-        // TokenIdStringLiteral or TokenIdSymbol
+        // TokenIdStringLiteral, TokenIdMultilineStringLiteral or TokenIdSymbol
         TokenStrLit str_lit;
 
         // TokenIdCharLiteral