Commit 821907317e

Andrew Kelley <superjoe30@gmail.com>
2015-11-27 08:40:26
support C-style comments, plus nesting
1 parent 4060ae9
src/tokenizer.cpp
@@ -87,6 +87,19 @@
     case DIGIT: \
     case '_'
 
+enum TokenizeState {
+    TokenizeStateStart,
+    TokenizeStateSymbol,
+    TokenizeStateNumber,
+    TokenizeStateString,
+    TokenizeStateSawDash,
+    TokenizeStateSawSlash,
+    TokenizeStateLineComment,
+    TokenizeStateMultiLineComment,
+    TokenizeStateMultiLineCommentSlash,
+    TokenizeStateMultiLineCommentStar,
+};
+
 
 struct Tokenize {
     Buf *buf;
@@ -96,6 +109,7 @@ struct Tokenize {
     int line;
     int column;
     Token *cur_tok;
+    int multi_line_comment_count;
 };
 
 __attribute__ ((format (printf, 2, 3)))
@@ -222,8 +236,78 @@ ZigList<Token> *tokenize(Buf *buf) {
                         begin_token(&t, TokenIdNumberSign);
                         end_token(&t);
                         break;
+                    case '/':
+                        t.state = TokenizeStateSawSlash;
+                        break;
+                    default:
+                        tokenize_error(&t, "invalid character: '%c'", c);
+                }
+                break;
+            case TokenizeStateSawSlash:
+                switch (c) {
+                    case '/':
+                        t.state = TokenizeStateLineComment;
+                        break;
+                    case '*':
+                        t.state = TokenizeStateMultiLineComment;
+                        t.multi_line_comment_count = 1;
+                        break;
                     default:
                         tokenize_error(&t, "invalid character: '%c'", c);
+                        break;
+                }
+                break;
+            case TokenizeStateLineComment:
+                switch (c) {
+                    case '\n':
+                        t.state = TokenizeStateStart;
+                        break;
+                    default:
+                        // do nothing
+                        break;
+                }
+                break;
+            case TokenizeStateMultiLineComment:
+                switch (c) {
+                    case '*':
+                        t.state = TokenizeStateMultiLineCommentStar;
+                        break;
+                    case '/':
+                        t.state = TokenizeStateMultiLineCommentSlash;
+                        break;
+                    default:
+                        // do nothing
+                        break;
+                }
+                break;
+            case TokenizeStateMultiLineCommentSlash:
+                switch (c) {
+                    case '*':
+                        t.state = TokenizeStateMultiLineComment;
+                        t.multi_line_comment_count += 1;
+                        break;
+                    case '/':
+                        break;
+                    default:
+                        t.state = TokenizeStateMultiLineComment;
+                        break;
+                }
+                break;
+            case TokenizeStateMultiLineCommentStar:
+                switch (c) {
+                    case '/':
+                        t.multi_line_comment_count -= 1;
+                        if (t.multi_line_comment_count == 0) {
+                            t.state = TokenizeStateStart;
+                        } else {
+                            t.state = TokenizeStateMultiLineComment;
+                        }
+                        break;
+                    case '*':
+                        break;
+                    default:
+                        t.state = TokenizeStateMultiLineComment;
+                        break;
                 }
                 break;
             case TokenizeStateSymbol:
@@ -295,7 +379,18 @@ ZigList<Token> *tokenize(Buf *buf) {
         case TokenizeStateSawDash:
             end_token(&t);
             break;
+        case TokenizeStateSawSlash:
+            tokenize_error(&t, "unexpected EOF");
+            break;
+        case TokenizeStateLineComment:
+            break;
+        case TokenizeStateMultiLineComment:
+        case TokenizeStateMultiLineCommentSlash:
+        case TokenizeStateMultiLineCommentStar:
+            tokenize_error(&t, "unterminated multi-line comment");
+            break;
     }
+    t.pos = -1;
     begin_token(&t, TokenIdEof);
     end_token(&t);
     assert(!t.cur_tok);
@@ -333,9 +428,11 @@ static const char * token_name(Token *token) {
 void print_tokens(Buf *buf, ZigList<Token> *tokens) {
     for (int i = 0; i < tokens->length; i += 1) {
         Token *token = &tokens->at(i);
-        printf("%s ", token_name(token));
-        fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stdout);
-        printf("\n");
+        fprintf(stderr, "%s ", token_name(token));
+        if (token->start_pos >= 0) {
+            fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stderr);
+        }
+        fprintf(stderr, "\n");
     }
 }
 
src/tokenizer.hpp
@@ -43,14 +43,6 @@ struct Token {
     int start_column;
 };
 
-enum TokenizeState {
-    TokenizeStateStart,
-    TokenizeStateSymbol,
-    TokenizeStateNumber,
-    TokenizeStateString,
-    TokenizeStateSawDash,
-};
-
 ZigList<Token> *tokenize(Buf *buf);
 
 void print_tokens(Buf *buf, ZigList<Token> *tokens);
test/standalone.cpp
@@ -75,6 +75,26 @@ static void add_all_test_cases(void) {
             exit(0);
         }
     )SOURCE", "OK\n");
+
+    add_simple_case("comments", R"SOURCE(
+        #link("c")
+        extern {
+            fn puts(s: *mut u8) -> i32;
+            fn exit(code: i32) -> unreachable;
+        }
+
+        /**
+         * multi line doc comment
+         */
+        fn another_function() -> i32 { return 0; }
+
+        /// this is a documentation comment
+        /// doc comment line 2
+        fn _start() -> unreachable {
+            puts(/* mid-line comment /* nested */ */ "OK");
+            exit(0);
+        }
+    )SOURCE", "OK\n");
 }
 
 static void run_test(TestCase *test_case) {
@@ -83,11 +103,12 @@ static void run_test(TestCase *test_case) {
     Buf zig_stderr = BUF_INIT;
     Buf zig_stdout = BUF_INIT;
     int return_code;
-    os_exec_process("./zig", test_case->compiler_args, &return_code, &zig_stderr, &zig_stdout);
+    static const char *zig_exe = "./zig";
+    os_exec_process(zig_exe, test_case->compiler_args, &return_code, &zig_stderr, &zig_stdout);
 
     if (return_code != 0) {
         printf("\nCompile failed with return code %d:\n", return_code);
-        printf("zig");
+        printf("%s", zig_exe);
         for (int i = 0; i < test_case->compiler_args.length; i += 1) {
             printf(" %s", test_case->compiler_args.at(i));
         }
README.md
@@ -31,7 +31,7 @@ readable, safe, optimal, and concise code to solve any computing problem.
 
 ## Roadmap
 
- * C style comments.
+ * empty function and return with no expression
  * Simple .so library
  * Multiple files
  * figure out integers