Commit 798dbe487b

Andrew Kelley <superjoe30@gmail.com>
2017-12-05 05:09:03
simple tokenization
1 parent 31d9dc3
Changed files (2)
src
src-self-hosted
src/ir.cpp
@@ -8422,16 +8422,6 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
     if (type_is_invalid(wanted_type))
         return ira->codegen->invalid_instruction;
 
-    if (actual_type != wanted_type->data.enumeration.tag_int_type) {
-        ir_add_error(ira, source_instr,
-                buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
-                    buf_ptr(&actual_type->name),
-                    buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
-    assert(actual_type->id == TypeTableEntryIdInt);
-
     if (instr_is_comptime(target)) {
         ConstExprValue *val = ir_resolve_const(ira, target, UndefBad);
         if (!val)
@@ -8453,6 +8443,17 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
         return result;
     }
 
+    if (actual_type != wanted_type->data.enumeration.tag_int_type) {
+        ir_add_error(ira, source_instr,
+                buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
+                    buf_ptr(&actual_type->name),
+                    buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    assert(actual_type->id == TypeTableEntryIdInt);
+
+
     IrInstruction *result = ir_build_int_to_enum(&ira->new_irb, source_instr->scope,
             source_instr->source_node, target);
     result->value.type = wanted_type;
@@ -8822,6 +8823,20 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
         }
     }
 
+    // explicit cast from integer to enum type with no payload
+    if ((actual_type->id == TypeTableEntryIdInt || actual_type->id == TypeTableEntryIdNumLitInt) &&
+            wanted_type->id == TypeTableEntryIdEnum)
+    {
+        return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
+    }
+
+    // explicit cast from enum type with no payload to integer
+    if ((wanted_type->id == TypeTableEntryIdInt || wanted_type->id == TypeTableEntryIdNumLitInt) &&
+            actual_type->id == TypeTableEntryIdEnum)
+    {
+        return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
+    }
+
     // explicit cast from number literal to another type
     // explicit cast from number literal to &const integer
     if (actual_type->id == TypeTableEntryIdNumLitFloat ||
@@ -8886,16 +8901,6 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
         return ir_analyze_int_to_err(ira, source_instr, value);
     }
 
-    // explicit cast from integer to enum type with no payload
-    if (actual_type->id == TypeTableEntryIdInt && wanted_type->id == TypeTableEntryIdEnum) {
-        return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
-    }
-
-    // explicit cast from enum type with no payload to integer
-    if (wanted_type->id == TypeTableEntryIdInt && actual_type->id == TypeTableEntryIdEnum) {
-        return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
-    }
-
     // explicit cast from union to the enum type of the union
     if (actual_type->id == TypeTableEntryIdUnion && wanted_type->id == TypeTableEntryIdEnum) {
         type_ensure_zero_bits_known(ira->codegen, actual_type);
src-self-hosted/main.zig
@@ -3,18 +3,134 @@ const io = @import("std").io;
 const os = @import("std").os;
 const heap = @import("std").heap;
 const warn = @import("std").debug.warn;
-
+const assert = @import("std").debug.assert;
+const mem = @import("std").mem;
 
 const Token = struct {
+    id: Id,
+    start: usize,
+    end: usize,
+
+    const Keyword = enum {
+        @"align",
+        @"and",
+        @"asm",
+        @"break",
+        @"coldcc",
+        @"comptime",
+        @"const",
+        @"continue",
+        @"defer",
+        @"else",
+        @"enum",
+        @"error",
+        @"export",
+        @"extern",
+        @"false",
+        @"fn",
+        @"for",
+        @"goto",
+        @"if",
+        @"inline",
+        @"nakedcc",
+        @"noalias",
+        @"null",
+        @"or",
+        @"packed",
+        @"pub",
+        @"return",
+        @"stdcallcc",
+        @"struct",
+        @"switch",
+        @"test",
+        @"this",
+        @"true",
+        @"undefined",
+        @"union",
+        @"unreachable",
+        @"use",
+        @"var",
+        @"volatile",
+        @"while",
+    };
 
+    fn getKeyword(bytes: []const u8) -> ?Keyword {
+        comptime var i = 0;
+        inline while (i < @memberCount(Keyword)) : (i += 1) {
+            if (mem.eql(u8, @memberName(Keyword, i), bytes)) {
+                return Keyword(i);
+            }
+        }
+        return null;
+    }
+
+
+    const Id = union(enum) {
+        Invalid,
+        Identifier,
+        Keyword: Keyword,
+        Eof,
+    };
 };
 
 const Tokenizer = struct {
+    buffer: []const u8,
+    index: usize,
 
-    pub fn next() -> Token {
+    pub fn dump(self: &Tokenizer, token: &const Token) {
+        warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
+    }
 
+    pub fn init(buffer: []const u8) -> Tokenizer {
+        return Tokenizer {
+            .buffer = buffer,
+            .index = 0,
+        };
     }
 
+    const State = enum {
+        Start,
+        Identifier,
+    };
+
+    pub fn next(self: &Tokenizer) -> Token {
+        var state = State.Start;
+        var result = Token {
+            .id = Token.Id { .Eof = {} },
+            .start = self.index,
+            .end = undefined,
+        };
+        while (self.index < self.buffer.len) : (self.index += 1) {
+            const c = self.buffer[self.index];
+            switch (state) {
+                State.Start => switch (c) {
+                    ' ', '\n' => {
+                        result.start = self.index + 1;
+                    },
+                    'a'...'z', 'A'...'Z', '_' => {
+                        state = State.Identifier;
+                        result.id = Token.Id { .Identifier = {} };
+                    },
+                    else => {
+                        result.id = Token.Id { .Invalid = {} };
+                        self.index += 1;
+                        break;
+                    },
+                },
+                State.Identifier => switch (c) {
+                    'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
+                    else => {
+                        if (Token.getKeyword(self.buffer[result.start..self.index])) |keyword_id| {
+                            result.id = Token.Id { .Keyword = keyword_id };
+                        }
+                        break;
+                    },
+                },
+            }
+        }
+        result.end = self.index;
+        return result;
+    }
 };
 
 
@@ -36,4 +152,13 @@ pub fn main2() -> %void {
     const target_file_buf = %return io.readFileAlloc(target_file, allocator);
 
     warn("{}", target_file_buf);
+
+    var tokenizer = Tokenizer.init(target_file_buf);
+    while (true) {
+        const token = tokenizer.next();
+        tokenizer.dump(token);
+        if (@TagType(Token.Id)(token.id) == Token.Id.Eof) {
+            break;
+        }
+    }
 }