Commit e09932928a

Andrew Kelley <superjoe30@gmail.com>
2015-08-06 02:44:05
tokenize
1 parent 899c9fe
src/list.hpp
@@ -5,24 +5,21 @@
  * See http://opensource.org/licenses/MIT
  */
 
-#ifndef GROOVE_LIST_HPP
-#define GROOVE_LIST_HPP
+#ifndef ZIG_LIST_HPP
+#define ZIG_LIST_HPP
 
 #include "util.hpp"
 
 #include <assert.h>
 
 template<typename T>
-struct GrooveList {
+struct ZigList {
     void deinit() {
         deallocate(items);
     }
     void append(T item) {
-        int err = ensure_capacity(length + 1);
-        if (err)
-            return err;
+        ensure_capacity(length + 1);
         items[length++] = item;
-        return 0;
     }
     // remember that the pointer to this item is invalid after you
     // modify the length of the list
@@ -57,11 +54,8 @@ struct GrooveList {
 
     void resize(int new_length) {
         assert(new_length >= 0);
-        int err = ensure_capacity(new_length);
-        if (err)
-            return err;
+        ensure_capacity(new_length);
         length = new_length;
-        return 0;
     }
 
     void clear() {
@@ -76,7 +70,6 @@ struct GrooveList {
             items = reallocate_nonzero(items, better_capacity);
             capacity = better_capacity;
         }
-        return 0;
     }
 
     T * items;
src/main.cpp
@@ -7,6 +7,7 @@
 
 #include "config.h"
 #include "util.hpp"
+#include "list.hpp"
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -30,6 +31,13 @@ static Buf *alloc_buf(int size) {
     return buf;
 }
 
+/*
+static void fprint_buf(FILE *f, Buf *buf) {
+    if (fwrite(buf->ptr, 1, buf->len, f))
+        zig_panic("error writing: %s", strerror(errno));
+}
+*/
+
 static int usage(char *arg0) {
     fprintf(stderr, "Usage: %s --output outfile code.zig\n"
         "Other options:\n"
@@ -56,6 +64,289 @@ static struct Buf *fetch_file(FILE *f) {
     return buf;
 }
 
+#define WHITESPACE \
+    ' ': \
+    case '\t': \
+    case '\n': \
+    case '\f': \
+    case '\r': \
+    case 0xb
+
+#define DIGIT \
+    '0': \
+    case '1': \
+    case '2': \
+    case '3': \
+    case '4': \
+    case '5': \
+    case '6': \
+    case '7': \
+    case '8': \
+    case '9'
+
+#define ALPHA \
+    'a': \
+    case 'b': \
+    case 'c': \
+    case 'd': \
+    case 'e': \
+    case 'f': \
+    case 'g': \
+    case 'h': \
+    case 'i': \
+    case 'j': \
+    case 'k': \
+    case 'l': \
+    case 'm': \
+    case 'n': \
+    case 'o': \
+    case 'p': \
+    case 'q': \
+    case 'r': \
+    case 's': \
+    case 't': \
+    case 'u': \
+    case 'v': \
+    case 'w': \
+    case 'x': \
+    case 'y': \
+    case 'z': \
+    case 'A': \
+    case 'B': \
+    case 'C': \
+    case 'D': \
+    case 'E': \
+    case 'F': \
+    case 'G': \
+    case 'H': \
+    case 'I': \
+    case 'J': \
+    case 'K': \
+    case 'L': \
+    case 'M': \
+    case 'N': \
+    case 'O': \
+    case 'P': \
+    case 'Q': \
+    case 'R': \
+    case 'S': \
+    case 'T': \
+    case 'U': \
+    case 'V': \
+    case 'W': \
+    case 'X': \
+    case 'Y': \
+    case 'Z'
+
+enum TokenId {
+    TokenIdDirective,
+    TokenIdSymbol,
+    TokenIdLParen,
+    TokenIdRParen,
+    TokenIdComma,
+    TokenIdStar,
+    TokenIdLBrace,
+    TokenIdRBrace,
+    TokenIdStringLiteral,
+    TokenIdSemicolon,
+    TokenIdNumberLiteral,
+    TokenIdPlus,
+};
+
+struct Token {
+    TokenId id;
+    int start_pos;
+    int end_pos;
+};
+
+enum TokenizeState {
+    TokenizeStateStart,
+    TokenizeStateDirective,
+    TokenizeStateSymbol,
+    TokenizeStateString,
+    TokenizeStateNumber,
+};
+
+struct Tokenize {
+    int pos;
+    TokenizeState state;
+    ZigList<Token> *tokens;
+    int line;
+    int column;
+    Token *cur_tok;
+};
+
+__attribute__ ((format (printf, 2, 3)))
+static void tokenize_error(Tokenize *t, const char *format, ...) {
+    va_list ap;
+    va_start(ap, format);
+    fprintf(stderr, "Error. Line %d, column %d: ", t->line + 1, t->column + 1);
+    vfprintf(stderr, format, ap);
+    va_end(ap);
+    exit(EXIT_FAILURE);
+}
+
+static void begin_token(Tokenize *t, TokenId id) {
+    assert(!t->cur_tok);
+    t->tokens->add_one();
+    Token *token = &t->tokens->last();
+    token->id = id;
+    token->start_pos = t->pos;
+    t->cur_tok = token;
+}
+
+static void end_token(Tokenize *t) {
+    assert(t->cur_tok);
+    t->cur_tok->end_pos = t->pos + 1;
+    t->cur_tok = nullptr;
+}
+
+static void put_back(Tokenize *t, int count) {
+    t->pos -= count;
+}
+
+static ZigList<Token> *tokenize(Buf *buf) {
+    Tokenize t = {0};
+    t.tokens = allocate<ZigList<Token>>(1);
+    for (t.pos = 0; t.pos < buf->len; t.pos += 1) {
+        uint8_t c = buf->ptr[t.pos];
+        switch (t.state) {
+            case TokenizeStateStart:
+                switch (c) {
+                    case WHITESPACE:
+                        break;
+                    case ALPHA:
+                        t.state = TokenizeStateSymbol;
+                        begin_token(&t, TokenIdSymbol);
+                        break;
+                    case DIGIT:
+                        t.state = TokenizeStateNumber;
+                        begin_token(&t, TokenIdNumberLiteral);
+                        break;
+                    case '#':
+                        t.state = TokenizeStateDirective;
+                        begin_token(&t, TokenIdDirective);
+                        break;
+                    case '(':
+                        begin_token(&t, TokenIdLParen);
+                        end_token(&t);
+                        break;
+                    case ')':
+                        begin_token(&t, TokenIdLParen);
+                        end_token(&t);
+                        break;
+                    case ',':
+                        begin_token(&t, TokenIdComma);
+                        end_token(&t);
+                        break;
+                    case '*':
+                        begin_token(&t, TokenIdStar);
+                        end_token(&t);
+                        break;
+                    case '{':
+                        begin_token(&t, TokenIdLBrace);
+                        end_token(&t);
+                        break;
+                    case '}':
+                        begin_token(&t, TokenIdRBrace);
+                        end_token(&t);
+                        break;
+                    case '"':
+                        begin_token(&t, TokenIdStringLiteral);
+                        t.state = TokenizeStateString;
+                        break;
+                    case ';':
+                        begin_token(&t, TokenIdSemicolon);
+                        end_token(&t);
+                        break;
+                    case '+':
+                        begin_token(&t, TokenIdPlus);
+                        end_token(&t);
+                        break;
+                    default:
+                        tokenize_error(&t, "invalid character: '%c'", c);
+                }
+                break;
+            case TokenizeStateDirective:
+                if (c == '\n') {
+                    assert(t.cur_tok);
+                    t.cur_tok->end_pos = t.pos;
+                    t.cur_tok = nullptr;
+                    t.state = TokenizeStateStart;
+                }
+                break;
+            case TokenizeStateSymbol:
+                switch (c) {
+                    case ALPHA:
+                    case DIGIT:
+                    case '_':
+                        break;
+                    default:
+                        put_back(&t, 1);
+                        end_token(&t);
+                        t.state = TokenizeStateStart;
+                        break;
+                }
+                break;
+            case TokenizeStateString:
+                switch (c) {
+                    case '"':
+                        end_token(&t);
+                        t.state = TokenizeStateStart;
+                        break;
+                    default:
+                        break;
+                }
+                break;
+            case TokenizeStateNumber:
+                switch (c) {
+                    case DIGIT:
+                        break;
+                    default:
+                        put_back(&t, 1);
+                        end_token(&t);
+                        t.state = TokenizeStateStart;
+                        break;
+                }
+                break;
+        }
+        if (c == '\n') {
+            t.line += 1;
+            t.column = 0;
+        } else {
+            t.column += 1;
+        }
+    }
+    return t.tokens;
+}
+
+static const char * token_name(Token *token) {
+    switch (token->id) {
+        case TokenIdDirective: return "Directive";
+        case TokenIdSymbol: return "Symbol";
+        case TokenIdLParen: return "LParen";
+        case TokenIdRParen: return "RParen";
+        case TokenIdComma: return "Comma";
+        case TokenIdStar: return "Star";
+        case TokenIdLBrace: return "LBrace";
+        case TokenIdRBrace: return "RBrace";
+        case TokenIdStringLiteral: return "StringLiteral";
+        case TokenIdSemicolon: return "Semicolon";
+        case TokenIdNumberLiteral: return "NumberLiteral";
+        case TokenIdPlus: return "Plus";
+    }
+    return "(invalid token)";
+}
+
+static void print_tokens(Buf *buf, ZigList<Token> *tokens) {
+    for (int i = 0; i < tokens->length; i += 1) {
+        Token *token = &tokens->at(i);
+        printf("%s ", token_name(token));
+        fwrite(buf->ptr + token->start_pos, 1, token->end_pos - token->start_pos, stdout);
+        printf("\n");
+    }
+}
+
 int main(int argc, char **argv) {
     char *arg0 = argv[0];
     char *in_file = NULL;
@@ -99,7 +390,9 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "%s\n", in_data->ptr);
 
-    //tokenize(in_data);
+    ZigList<Token> *tokens = tokenize(in_data);
+
+    print_tokens(in_data, tokens);
 
 
     return EXIT_SUCCESS;
src/util.hpp
@@ -48,4 +48,19 @@ template <typename T, long n>
 constexpr long array_length(const T (&)[n]) {
     return n;
 }
+
+template <typename T>
+static inline T max(T a, T b) {
+    return (a >= b) ? a : b;
+}
+
+template <typename T>
+static inline T min(T a, T b) {
+    return (a <= b) ? a : b;
+}
+
+template<typename T>
+static inline T clamp(T min_value, T value, T max_value) {
+    return max(min(value, max_value), min_value);
+}
 #endif
README.md
@@ -1,1 +1,5 @@
 # zig lang
+
+C upgrade.
+
+Start with C.