Commit `cc1c2bd568`

Andrew Kelley <andrew@ziglang.org>

2020-04-21 01:19:35

simplify ZIR spec; separate parsing/rendering from analysis

master

1 parent af4ccf3

Changed files (6)

lib

std

src-self-hosted

test

stage2

ir.zig

@@ -1012,25 +1012,27 @@ pub const Dir = struct {
     /// On success, caller owns returned buffer.
     /// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
     pub fn readFileAlloc(self: Dir, allocator: *mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 {
-        return self.readFileAllocAligned(allocator, file_path, max_bytes, @alignOf(u8));
+        return self.readFileAllocOptions(allocator, file_path, max_bytes, @alignOf(u8), null);
     }
 
     /// On success, caller owns returned buffer.
     /// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
-    pub fn readFileAllocAligned(
+    /// Allows specifying alignment and a sentinel value.
+    pub fn readFileAllocOptions(
         self: Dir,
         allocator: *mem.Allocator,
         file_path: []const u8,
         max_bytes: usize,
-        comptime A: u29,
-    ) ![]align(A) u8 {
+        comptime alignment: u29,
+        comptime optional_sentinel: ?u8,
+    ) !(if (optional_sentinel) |s| [:s]align(alignment) u8 else []align(alignment) u8) {
         var file = try self.openFile(file_path, .{});
         defer file.close();
 
         const size = math.cast(usize, try file.getEndPos()) catch math.maxInt(usize);
         if (size > max_bytes) return error.FileTooBig;
 
-        const buf = try allocator.alignedAlloc(u8, A, size);
+        const buf = try allocator.allocWithOptions(u8, size, alignment, optional_sentinel);
         errdefer allocator.free(buf);
 
         try file.inStream().readNoEof(buf);

@@ -105,6 +105,31 @@ pub const Allocator = struct {
         return self.alignedAlloc(T, null, n);
     }
 
+    pub fn allocWithOptions(
+        self: *Allocator,
+        comptime Elem: type,
+        n: usize,
+        /// null means naturally aligned
+        comptime optional_alignment: ?u29,
+        comptime optional_sentinel: ?Elem,
+    ) Error!AllocWithOptionsPayload(Elem, optional_alignment, optional_sentinel) {
+        if (optional_sentinel) |sentinel| {
+            const ptr = try self.alignedAlloc(Elem, optional_alignment, n + 1);
+            ptr[n] = sentinel;
+            return ptr[0..n :sentinel];
+        } else {
+            return alignedAlloc(Elem, optional_alignment, n);
+        }
+    }
+
+    fn AllocWithOptionsPayload(comptime Elem: type, comptime alignment: ?u29, comptime sentinel: ?Elem) type {
+        if (sentinel) |s| {
+            return [:s]align(alignment orelse @alignOf(T)) Elem;
+        } else {
+            return []align(alignment orelse @alignOf(T)) Elem;
+        }
+    }
+
     /// Allocates an array of `n + 1` items of type `T` and sets the first `n`
     /// items to `undefined` and the last item to `sentinel`. Depending on the
     /// Allocator implementation, it may be required to call `free` once the
@@ -113,10 +138,10 @@ pub const Allocator = struct {
     /// call `free` when done.
     ///
     /// For allocating a single item, see `create`.
+    ///
+    /// Deprecated; use `allocWithOptions`.
     pub fn allocSentinel(self: *Allocator, comptime Elem: type, n: usize, comptime sentinel: Elem) Error![:sentinel]Elem {
-        var ptr = try self.alloc(Elem, n + 1);
-        ptr[n] = sentinel;
-        return ptr[0..n :sentinel];
+        return self.allocWithOptions(Elem, n, null, sentinel);
     }
 
     pub fn alignedAlloc(

@@ -0,0 +1,712 @@
+//! This file has to do with parsing and rendering the ZIR text format.
+const std = @import("std");
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+const Value = @import("../value.zig").Value;
+const assert = std.debug.assert;
+const ir = @import("../ir.zig");
+const BigInt = std.math.big.Int;
+
+/// These are instructions that correspond to the ZIR text format. See `ir.Inst` for
+/// in-memory, analyzed instructions with types and values.
+pub const Inst = struct {
+    tag: Tag,
+
+    /// These names are used directly as the instruction names in the text format.
+    pub const Tag = enum {
+        str,
+        int,
+        ptrtoint,
+        fieldptr,
+        deref,
+        as,
+        @"asm",
+        @"unreachable",
+        @"fn",
+        @"export",
+        primitive,
+        fntype,
+    };
+
+    pub fn TagToType(tag: Tag) type {
+        return switch (tag) {
+            .str => Str,
+            .int => Int,
+            .ptrtoint => PtrToInt,
+            .fieldptr => FieldPtr,
+            .deref => Deref,
+            .as => As,
+            .@"asm" => Assembly,
+            .@"unreachable" => Unreachable,
+            .@"fn" => Fn,
+            .@"export" => Export,
+            .primitive => Primitive,
+            .fntype => FnType,
+        };
+    }
+
+    pub fn cast(base: *Inst, comptime T: type) ?*T {
+        const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag;
+        if (base.tag != expected_tag)
+            return null;
+
+        return @fieldParentPtr(T, "base", base);
+    }
+
+    pub const Str = struct {
+        base: Inst = Inst{ .tag = .str },
+
+        positionals: struct {
+            bytes: []u8,
+        },
+        kw_args: struct {},
+    };
+
+    pub const Int = struct {
+        base: Inst = Inst{ .tag = .int },
+
+        positionals: struct {
+            int: BigInt,
+        },
+        kw_args: struct {},
+    };
+
+    pub const PtrToInt = struct {
+        base: Inst = Inst{ .tag = .ptrtoint },
+
+        positionals: struct {
+            ptr: *Inst,
+        },
+        kw_args: struct {},
+    };
+
+    pub const FieldPtr = struct {
+        base: Inst = Inst{ .tag = .fieldptr },
+
+        positionals: struct {
+            object_ptr: *Inst,
+            field_name: *Inst,
+        },
+        kw_args: struct {},
+    };
+
+    pub const Deref = struct {
+        base: Inst = Inst{ .tag = .deref },
+
+        positionals: struct {
+            ptr: *Inst,
+        },
+        kw_args: struct {},
+    };
+
+    pub const As = struct {
+        base: Inst = Inst{ .tag = .as },
+
+        positionals: struct {
+            dest_type: *Inst,
+            value: *Inst,
+        },
+        kw_args: struct {},
+    };
+
+    pub const Assembly = struct {
+        base: Inst = Inst{ .tag = .@"asm" },
+
+        positionals: struct {
+            asm_source: *Inst,
+            return_type: *Inst,
+        },
+        kw_args: struct {
+            @"volatile": bool = false,
+            output: ?*Inst = null,
+            inputs: []*Inst = &[0]*Inst{},
+            clobbers: []*Inst = &[0]*Inst{},
+            args: []*Inst = &[0]*Inst{},
+        },
+    };
+
+    pub const Unreachable = struct {
+        base: Inst = Inst{ .tag = .@"unreachable" },
+
+        positionals: struct {},
+        kw_args: struct {},
+    };
+
+    pub const Fn = struct {
+        base: Inst = Inst{ .tag = .@"fn" },
+
+        positionals: struct {
+            fn_type: *Inst,
+            body: Body,
+        },
+        kw_args: struct {},
+
+        pub const Body = struct {
+            instructions: []*Inst,
+        };
+    };
+
+    pub const Export = struct {
+        base: Inst = Inst{ .tag = .@"export" },
+
+        positionals: struct {
+            symbol_name: *Inst,
+            value: *Inst,
+        },
+        kw_args: struct {},
+    };
+
+    pub const Primitive = struct {
+        base: Inst = Inst{ .tag = .primitive },
+
+        positionals: struct {
+            tag: BuiltinType,
+        },
+        kw_args: struct {},
+
+        pub const BuiltinType = enum {
+            @"isize",
+            @"usize",
+            @"c_short",
+            @"c_ushort",
+            @"c_int",
+            @"c_uint",
+            @"c_long",
+            @"c_ulong",
+            @"c_longlong",
+            @"c_ulonglong",
+            @"c_longdouble",
+            @"c_void",
+            @"f16",
+            @"f32",
+            @"f64",
+            @"f128",
+            @"bool",
+            @"void",
+            @"noreturn",
+            @"type",
+            @"anyerror",
+            @"comptime_int",
+            @"comptime_float",
+        };
+    };
+
+    pub const FnType = struct {
+        base: Inst = Inst{ .tag = .fntype },
+
+        positionals: struct {
+            param_types: []*Inst,
+            return_type: *Inst,
+        },
+        kw_args: struct {
+            cc: std.builtin.CallingConvention = .Unspecified,
+        },
+    };
+};
+
+pub const ErrorMsg = struct {
+    byte_offset: usize,
+    msg: []const u8,
+};
+
+pub const Module = struct {
+    decls: []*Inst,
+    errors: []ErrorMsg,
+
+    pub fn deinit(self: *Module) void {
+        // TODO resource deallocation
+        self.* = undefined;
+    }
+
+    /// This is a debugging utility for rendering the tree to stderr.
+    pub fn dump(self: Module) void {
+        self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {};
+    }
+
+    const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body });
+
+    pub fn writeToStream(self: Module, allocator: *Allocator, stream: var) !void {
+        // First, build a map of *Inst to @ or % indexes
+        var inst_table = InstPtrTable.init(allocator);
+        defer inst_table.deinit();
+
+        try inst_table.ensureCapacity(self.decls.len);
+
+        for (self.decls) |decl, decl_i| {
+            try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null });
+
+            if (decl.cast(Inst.Fn)) |fn_inst| {
+                for (fn_inst.positionals.body.instructions) |inst, inst_i| {
+                    try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body });
+                }
+            }
+        }
+
+        for (self.decls) |decl, i| {
+            try stream.print("@{} ", .{i});
+            try self.writeInstToStream(stream, decl, &inst_table);
+            try stream.writeByte('\n');
+        }
+    }
+
+    fn writeInstToStream(
+        self: Module,
+        stream: var,
+        decl: *Inst,
+        inst_table: *const InstPtrTable,
+    ) @TypeOf(stream).Error!void {
+        // TODO I tried implementing this with an inline for loop and hit a compiler bug
+        switch (decl.tag) {
+            .str => return self.writeInstToStreamGeneric(stream, .str, decl, inst_table),
+            .int => return self.writeInstToStreamGeneric(stream, .int, decl, inst_table),
+            .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table),
+            .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table),
+            .deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table),
+            .as => return self.writeInstToStreamGeneric(stream, .as, decl, inst_table),
+            .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table),
+            .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table),
+            .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table),
+            .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table),
+            .primitive => return self.writeInstToStreamGeneric(stream, .primitive, decl, inst_table),
+            .fntype => return self.writeInstToStreamGeneric(stream, .fntype, decl, inst_table),
+        }
+    }
+
+    fn writeInstToStreamGeneric(
+        self: Module,
+        stream: var,
+        comptime inst_tag: Inst.Tag,
+        base: *Inst,
+        inst_table: *const InstPtrTable,
+    ) !void {
+        const SpecificInst = Inst.TagToType(inst_tag);
+        const inst = @fieldParentPtr(SpecificInst, "base", base);
+        const Positionals = @TypeOf(inst.positionals);
+        try stream.writeAll("= " ++ @tagName(inst_tag) ++ "(");
+        const pos_fields = @typeInfo(Positionals).Struct.fields;
+        inline for (pos_fields) |arg_field, i| {
+            if (i != 0) {
+                try stream.writeAll(", ");
+            }
+            try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table);
+        }
+
+        comptime var need_comma = pos_fields.len != 0;
+        const KW_Args = @TypeOf(inst.kw_args);
+        inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| {
+            if (need_comma) {
+                try stream.writeAll(", ");
+            }
+            if (@typeInfo(arg_field.field_type) == .Optional) {
+                if (@field(inst.kw_args, arg_field.name)) |non_optional| {
+                    try stream.print("{}=", .{arg_field.name});
+                    try self.writeParamToStream(stream, non_optional, inst_table);
+                    need_comma = true;
+                }
+            } else {
+                try stream.print("{}=", .{arg_field.name});
+                try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table);
+                need_comma = true;
+            }
+        }
+
+        try stream.writeByte(')');
+    }
+
+    fn writeParamToStream(self: Module, stream: var, param: var, inst_table: *const InstPtrTable) !void {
+        if (@typeInfo(@TypeOf(param)) == .Enum) {
+            return stream.writeAll(@tagName(param));
+        }
+        switch (@TypeOf(param)) {
+            Value => return stream.print("{}", .{param}),
+            *Inst => return self.writeInstParamToStream(stream, param, inst_table),
+            []*Inst => {
+                try stream.writeByte('[');
+                for (param) |inst, i| {
+                    if (i != 0) {
+                        try stream.writeAll(", ");
+                    }
+                    try self.writeInstParamToStream(stream, inst, inst_table);
+                }
+                try stream.writeByte(']');
+            },
+            Inst.Fn.Body => {
+                try stream.writeAll("{\n");
+                for (param.instructions) |inst, i| {
+                    try stream.print("  %{} ", .{i});
+                    try self.writeInstToStream(stream, inst, inst_table);
+                    try stream.writeByte('\n');
+                }
+                try stream.writeByte('}');
+            },
+            bool => return stream.writeByte("01"[@boolToInt(param)]),
+            []u8 => return std.zig.renderStringLiteral(param, stream),
+            BigInt => return stream.print("{}", .{param}),
+            else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)),
+        }
+    }
+
+    fn writeInstParamToStream(self: Module, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void {
+        const info = inst_table.getValue(inst).?;
+        const prefix = if (info.fn_body == null) "@" else "%";
+        try stream.print("{}{}", .{ prefix, info.index });
+    }
+};
+
+pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module {
+    var global_name_map = std.StringHashMap(usize).init(allocator);
+    defer global_name_map.deinit();
+
+    var parser: Parser = .{
+        .allocator = allocator,
+        .i = 0,
+        .source = source,
+        .decls = std.ArrayList(*Inst).init(allocator),
+        .errors = std.ArrayList(ErrorMsg).init(allocator),
+        .global_name_map = &global_name_map,
+    };
+    parser.parseRoot() catch |err| switch (err) {
+        error.ParseFailure => {
+            assert(parser.errors.items.len != 0);
+        },
+        else => |e| return e,
+    };
+    return Module{
+        .decls = parser.decls.toOwnedSlice(),
+        .errors = parser.errors.toOwnedSlice(),
+    };
+}
+
+const Parser = struct {
+    allocator: *Allocator,
+    i: usize,
+    source: [:0]const u8,
+    errors: std.ArrayList(ErrorMsg),
+    decls: std.ArrayList(*Inst),
+    global_name_map: *std.StringHashMap(usize),
+
+    const Body = struct {
+        instructions: std.ArrayList(*Inst),
+        name_map: std.StringHashMap(usize),
+    };
+
+    fn parseBody(self: *Parser) !Inst.Fn.Body {
+        var body_context = Body{
+            .instructions = std.ArrayList(*Inst).init(self.allocator),
+            .name_map = std.StringHashMap(usize).init(self.allocator),
+        };
+        defer body_context.instructions.deinit();
+        defer body_context.name_map.deinit();
+
+        try requireEatBytes(self, "{");
+        skipSpace(self);
+
+        while (true) : (self.i += 1) switch (self.source[self.i]) {
+            ';' => _ = try skipToAndOver(self, '\n'),
+            '%' => {
+                self.i += 1;
+                const ident = try skipToAndOver(self, ' ');
+                skipSpace(self);
+                try requireEatBytes(self, "=");
+                skipSpace(self);
+                const inst = try parseInstruction(self, &body_context);
+                const ident_index = body_context.instructions.items.len;
+                if (try body_context.name_map.put(ident, ident_index)) |_| {
+                    return self.fail("redefinition of identifier '{}'", .{ident});
+                }
+                try body_context.instructions.append(inst);
+                continue;
+            },
+            ' ', '\n' => continue,
+            '}' => {
+                self.i += 1;
+                break;
+            },
+            else => |byte| return self.failByte(byte),
+        };
+
+        return Inst.Fn.Body{
+            .instructions = body_context.instructions.toOwnedSlice(),
+        };
+    }
+
+    fn parseStringLiteral(self: *Parser) ![]u8 {
+        const start = self.i;
+        try self.requireEatBytes("\"");
+
+        while (true) : (self.i += 1) switch (self.source[self.i]) {
+            '"' => {
+                self.i += 1;
+                const span = self.source[start..self.i];
+                var bad_index: usize = undefined;
+                const parsed = std.zig.parseStringLiteral(self.allocator, span, &bad_index) catch |err| switch (err) {
+                    error.InvalidCharacter => {
+                        self.i = start + bad_index;
+                        const bad_byte = self.source[self.i];
+                        return self.fail("invalid string literal character: '{c}'\n", .{bad_byte});
+                    },
+                    else => |e| return e,
+                };
+                return parsed;
+            },
+            '\\' => {
+                self.i += 1;
+                continue;
+            },
+            0 => return self.failByte(0),
+            else => continue,
+        };
+    }
+
+    fn parseIntegerLiteral(self: *Parser) !BigInt {
+        const start = self.i;
+        if (self.source[self.i] == '-') self.i += 1;
+        while (true) : (self.i += 1) switch (self.source[self.i]) {
+            '0'...'9' => continue,
+            else => break,
+        };
+        const number_text = self.source[start..self.i];
+        var result = try BigInt.init(self.allocator);
+        result.setString(10, number_text) catch |err| {
+            self.i = start;
+            switch (err) {
+                error.InvalidBase => unreachable,
+                error.InvalidCharForDigit => return self.fail("invalid digit in integer literal", .{}),
+                error.DigitTooLargeForBase => return self.fail("digit too large in integer literal", .{}),
+                else => |e| return e,
+            }
+        };
+        return result;
+    }
+
+    fn parseRoot(self: *Parser) !void {
+        // The IR format is designed so that it can be tokenized and parsed at the same time.
+        while (true) : (self.i += 1) switch (self.source[self.i]) {
+            ';' => _ = try skipToAndOver(self, '\n'),
+            '@' => {
+                self.i += 1;
+                const ident = try skipToAndOver(self, ' ');
+                skipSpace(self);
+                try requireEatBytes(self, "=");
+                skipSpace(self);
+                const inst = try parseInstruction(self, null);
+                const ident_index = self.decls.items.len;
+                if (try self.global_name_map.put(ident, ident_index)) |_| {
+                    return self.fail("redefinition of identifier '{}'", .{ident});
+                }
+                try self.decls.append(inst);
+                continue;
+            },
+            ' ', '\n' => continue,
+            0 => break,
+            else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}),
+        };
+    }
+
+    fn eatByte(self: *Parser, byte: u8) bool {
+        if (self.source[self.i] != byte) return false;
+        self.i += 1;
+        return true;
+    }
+
+    fn skipSpace(self: *Parser) void {
+        while (self.source[self.i] == ' ' or self.source[self.i] == '\n') {
+            self.i += 1;
+        }
+    }
+
+    fn requireEatBytes(self: *Parser, bytes: []const u8) !void {
+        const start = self.i;
+        for (bytes) |byte| {
+            if (self.source[self.i] != byte) {
+                self.i = start;
+                return self.fail("expected '{}'", .{bytes});
+            }
+            self.i += 1;
+        }
+    }
+
+    fn skipToAndOver(self: *Parser, byte: u8) ![]const u8 {
+        const start_i = self.i;
+        while (self.source[self.i] != 0) : (self.i += 1) {
+            if (self.source[self.i] == byte) {
+                const result = self.source[start_i..self.i];
+                self.i += 1;
+                return result;
+            }
+        }
+        return self.fail("unexpected EOF", .{});
+    }
+
+    /// ParseFailure is an internal error code; handled in `parse`.
+    const InnerError = error{ ParseFailure, OutOfMemory };
+
+    fn failByte(self: *Parser, byte: u8) InnerError {
+        if (byte == 0) {
+            return self.fail("unexpected EOF", .{});
+        } else {
+            return self.fail("unexpected byte: '{c}'", .{byte});
+        }
+    }
+
+    fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError {
+        @setCold(true);
+        const msg = try std.fmt.allocPrint(self.allocator, format, args);
+        (try self.errors.addOne()).* = .{
+            .byte_offset = self.i,
+            .msg = msg,
+        };
+        return error.ParseFailure;
+    }
+
+    fn parseInstruction(self: *Parser, body_ctx: ?*Body) InnerError!*Inst {
+        const fn_name = try skipToAndOver(self, '(');
+        inline for (@typeInfo(Inst.Tag).Enum.fields) |field| {
+            if (mem.eql(u8, field.name, fn_name)) {
+                const tag = @field(Inst.Tag, field.name);
+                return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx);
+            }
+        }
+        return self.fail("unknown instruction '{}'", .{fn_name});
+    }
+
+    fn parseInstructionGeneric(
+        self: *Parser,
+        comptime fn_name: []const u8,
+        comptime InstType: type,
+        body_ctx: ?*Body,
+    ) !*Inst {
+        const inst_specific = try self.allocator.create(InstType);
+        inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?;
+
+        if (@hasField(InstType, "ty")) {
+            inst_specific.ty = opt_type orelse {
+                return self.fail("instruction '" ++ fn_name ++ "' requires type", .{});
+            };
+        }
+
+        const Positionals = @TypeOf(inst_specific.positionals);
+        inline for (@typeInfo(Positionals).Struct.fields) |arg_field| {
+            if (self.source[self.i] == ',') {
+                self.i += 1;
+                skipSpace(self);
+            } else if (self.source[self.i] == ')') {
+                return self.fail("expected positional parameter '{}'", .{arg_field.name});
+            }
+            @field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric(
+                self,
+                arg_field.field_type,
+                body_ctx,
+            );
+            skipSpace(self);
+        }
+
+        const KW_Args = @TypeOf(inst_specific.kw_args);
+        inst_specific.kw_args = .{}; // assign defaults
+        skipSpace(self);
+        while (eatByte(self, ',')) {
+            skipSpace(self);
+            const name = try skipToAndOver(self, '=');
+            inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| {
+                const field_name = arg_field.name;
+                if (mem.eql(u8, name, field_name)) {
+                    const NonOptional = switch (@typeInfo(arg_field.field_type)) {
+                        .Optional => |info| info.child,
+                        else => arg_field.field_type,
+                    };
+                    @field(inst_specific.kw_args, field_name) = try parseParameterGeneric(self, NonOptional, body_ctx);
+                    break;
+                }
+            } else {
+                return self.fail("unrecognized keyword parameter: '{}'", .{name});
+            }
+            skipSpace(self);
+        }
+        try requireEatBytes(self, ")");
+
+        return &inst_specific.base;
+    }
+
+    fn parseParameterGeneric(self: *Parser, comptime T: type, body_ctx: ?*Body) !T {
+        if (@typeInfo(T) == .Enum) {
+            const start = self.i;
+            while (true) : (self.i += 1) switch (self.source[self.i]) {
+                ' ', '\n', ',', ')' => {
+                    const enum_name = self.source[start..self.i];
+                    return std.meta.stringToEnum(T, enum_name) orelse {
+                        return self.fail("tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) });
+                    };
+                },
+                0 => return self.failByte(0),
+                else => continue,
+            };
+        }
+        switch (T) {
+            Inst.Fn.Body => return parseBody(self),
+            bool => {
+                const bool_value = switch (self.source[self.i]) {
+                    '0' => false,
+                    '1' => true,
+                    else => |byte| return self.fail("expected '0' or '1' for boolean value, found {c}", .{byte}),
+                };
+                self.i += 1;
+                return bool_value;
+            },
+            []*Inst => {
+                try requireEatBytes(self, "[");
+                skipSpace(self);
+                if (eatByte(self, ']')) return &[0]*Inst{};
+
+                var instructions = std.ArrayList(*Inst).init(self.allocator);
+                defer instructions.deinit();
+                while (true) {
+                    skipSpace(self);
+                    try instructions.append(try parseParameterInst(self, body_ctx));
+                    skipSpace(self);
+                    if (!eatByte(self, ',')) break;
+                }
+                try requireEatBytes(self, "]");
+                return instructions.toOwnedSlice();
+            },
+            *Inst => return parseParameterInst(self, body_ctx),
+            Value => return self.fail("TODO implement parseParameterGeneric for type Value", .{}),
+            []u8 => return self.parseStringLiteral(),
+            BigInt => return self.parseIntegerLiteral(),
+            else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)),
+        }
+        return self.fail("TODO parse parameter {}", .{@typeName(T)});
+    }
+
+    fn parseParameterInst(self: *Parser, body_ctx: ?*Body) !*Inst {
+        const local_ref = switch (self.source[self.i]) {
+            '@' => false,
+            '%' => true,
+            else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}),
+        };
+        const map = if (local_ref)
+            if (body_ctx) |bc|
+                &bc.name_map
+            else
+                return self.fail("referencing a % instruction in global scope", .{})
+        else
+            self.global_name_map;
+
+        self.i += 1;
+        const name_start = self.i;
+        while (true) : (self.i += 1) switch (self.source[self.i]) {
+            0, ' ', '\n', ',', ')', ']' => break,
+            else => continue,
+        };
+        const ident = self.source[name_start..self.i];
+        const kv = map.get(ident) orelse {
+            const bad_name = self.source[name_start - 1 .. self.i];
+            self.i = name_start - 1;
+            return self.fail("unrecognized identifier: {}", .{bad_name});
+        };
+        if (local_ref) {
+            return body_ctx.?.instructions.items[kv.value];
+        } else {
+            return self.decls.items[kv.value];
+        }
+    }
+};

@@ -4,41 +4,26 @@ const Allocator = std.mem.Allocator;
 const Value = @import("value.zig").Value;
 const Type = @import("type.zig").Type;
 const assert = std.debug.assert;
+const text = @import("ir/text.zig");
 
+/// These are in-memory, analyzed instructions. See `text.Inst` for the representation
+/// of instructions that correspond to the ZIR text format.
 pub const Inst = struct {
-    tag: Tag,
+    pub fn ty(base: *Inst) ?Type {
+        switch (base.tag) {
+            .constant => return base.cast(Constant).?.ty,
+            .@"asm" => return base.cast(Assembly).?.ty,
+            .@"fn" => return base.cast(Fn).?.ty,
 
-    /// These names are used for the IR text format.
-    pub const Tag = enum {
-        constant,
-        ptrtoint,
-        fieldptr,
-        deref,
-        @"asm",
-        @"unreachable",
-        @"fn",
-        @"export",
-    };
-
-    pub fn TagToType(tag: Tag) type {
-        return switch (tag) {
-            .constant => Constant,
-            .ptrtoint => PtrToInt,
-            .fieldptr => FieldPtr,
-            .deref => Deref,
-            .@"asm" => Assembly,
-            .@"unreachable" => Unreachable,
-            .@"fn" => Fn,
-            .@"export" => Export,
-        };
-    }
+            .ptrtoint => return Type.initTag(.@"usize"),
+            .@"unreachable" => return Type.initTag(.@"noreturn"),
+            .@"export" => return Type.initTag(.@"void"),
+            .fntype, .primitive => return Type.initTag(.@"type"),
 
-    pub fn cast(base: *Inst, comptime T: type) ?*T {
-        const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag;
-        if (base.tag != expected_tag)
-            return null;
-
-        return @fieldParentPtr(T, "base", base);
+            .fieldptr,
+            .deref,
+            => return null,
+        }
     }
 
     /// This struct owns the `Value` memory. When the struct is deallocated,
@@ -53,644 +38,70 @@ pub const Inst = struct {
         },
         kw_args: struct {},
     };
-
-    pub const PtrToInt = struct {
-        base: Inst = Inst{ .tag = .ptrtoint },
-
-        positionals: struct {
-            ptr: *Inst,
-        },
-        kw_args: struct {},
-    };
-
-    pub const FieldPtr = struct {
-        base: Inst = Inst{ .tag = .fieldptr },
-
-        positionals: struct {
-            object_ptr: *Inst,
-            field_name: *Inst,
-        },
-        kw_args: struct {},
-    };
-
-    pub const Deref = struct {
-        base: Inst = Inst{ .tag = .deref },
-
-        positionals: struct {
-            ptr: *Inst,
-        },
-        kw_args: struct {},
-    };
-
-    pub const Assembly = struct {
-        base: Inst = Inst{ .tag = .@"asm" },
-
-        positionals: struct {
-            asm_source: *Inst,
-        },
-        kw_args: struct {
-            @"volatile": bool = false,
-            output: ?*Inst = null,
-            inputs: []*Inst = &[0]*Inst{},
-            clobbers: []*Inst = &[0]*Inst{},
-            args: []*Inst = &[0]*Inst{},
-        },
-    };
-
-    pub const Unreachable = struct {
-        base: Inst = Inst{ .tag = .@"unreachable" },
-
-        positionals: struct {},
-        kw_args: struct {},
-    };
-
-    pub const Fn = struct {
-        base: Inst = Inst{ .tag = .@"fn" },
-
-        positionals: struct {
-            body: Body,
-        },
-        kw_args: struct {
-            cc: std.builtin.CallingConvention = .Unspecified,
-        },
-
-        pub const Body = struct {
-            instructions: []*Inst,
-        };
-    };
-
-    pub const Export = struct {
-        base: Inst = Inst{ .tag = .@"export" },
-
-        positionals: struct {
-            symbol_name: *Inst,
-            value: *Inst,
-        },
-        kw_args: struct {},
-    };
-};
-
-pub const ErrorMsg = struct {
-    byte_offset: usize,
-    msg: []const u8,
-};
-
-pub const Tree = struct {
-    decls: []*Inst,
-    errors: []ErrorMsg,
-
-    pub fn deinit(self: *Tree) void {
-        // TODO resource deallocation
-        self.* = undefined;
-    }
-
-    /// This is a debugging utility for rendering the tree to stderr.
-    pub fn dump(self: Tree) void {
-        self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {};
-    }
-
-    const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body });
-
-    pub fn writeToStream(self: Tree, allocator: *Allocator, stream: var) !void {
-        // First, build a map of *Inst to @ or % indexes
-        var inst_table = InstPtrTable.init(allocator);
-        defer inst_table.deinit();
-
-        try inst_table.ensureCapacity(self.decls.len);
-
-        for (self.decls) |decl, decl_i| {
-            try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null });
-
-            if (decl.cast(Inst.Fn)) |fn_inst| {
-                for (fn_inst.positionals.body.instructions) |inst, inst_i| {
-                    try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body });
-                }
-            }
-        }
-
-        for (self.decls) |decl, i| {
-            try stream.print("@{} ", .{i});
-            try self.writeInstToStream(stream, decl, &inst_table);
-            try stream.writeByte('\n');
-        }
-    }
-
-    fn writeInstToStream(
-        self: Tree,
-        stream: var,
-        decl: *Inst,
-        inst_table: *const InstPtrTable,
-    ) @TypeOf(stream).Error!void {
-        // TODO I tried implementing this with an inline for loop and hit a compiler bug
-        switch (decl.tag) {
-            .constant => return self.writeInstToStreamGeneric(stream, .constant, decl, inst_table),
-            .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table),
-            .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table),
-            .deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table),
-            .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table),
-            .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table),
-            .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table),
-            .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table),
-        }
-    }
-
-    fn writeInstToStreamGeneric(
-        self: Tree,
-        stream: var,
-        comptime inst_tag: Inst.Tag,
-        base: *Inst,
-        inst_table: *const InstPtrTable,
-    ) !void {
-        const SpecificInst = Inst.TagToType(inst_tag);
-        const inst = @fieldParentPtr(SpecificInst, "base", base);
-        if (@hasField(SpecificInst, "ty")) {
-            try stream.print(": {} ", .{inst.ty});
-        }
-        if (inst_tag == .constant) {
-            if (inst.positionals.value.cast(Value.Payload.Bytes)) |bytes_value| {
-                try stream.writeAll("= ");
-                return std.zig.renderStringLiteral(bytes_value.data, stream);
-            } else if (inst.positionals.value.cast(Value.Payload.Int_u64)) |v| {
-                return stream.print("= {}", .{v.int});
-            } else if (inst.positionals.value.cast(Value.Payload.Int_i64)) |v| {
-                return stream.print("= {}", .{v.int});
-            }
-        }
-        const Positionals = @TypeOf(inst.positionals);
-        try stream.writeAll("= " ++ @tagName(inst_tag) ++ "(");
-        const pos_fields = @typeInfo(Positionals).Struct.fields;
-        inline for (pos_fields) |arg_field, i| {
-            if (i != 0) {
-                try stream.writeAll(", ");
-            }
-            try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table);
-        }
-
-        comptime var need_comma = pos_fields.len != 0;
-        const KW_Args = @TypeOf(inst.kw_args);
-        inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| {
-            if (need_comma) {
-                try stream.writeAll(",\n    ");
-            }
-            if (@typeInfo(arg_field.field_type) == .Optional) {
-                if (@field(inst.kw_args, arg_field.name)) |non_optional| {
-                    try stream.print("{}=", .{arg_field.name});
-                    try self.writeParamToStream(stream, non_optional, inst_table);
-                    need_comma = true;
-                }
-            } else {
-                try stream.print("{}=", .{arg_field.name});
-                try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table);
-                need_comma = true;
-            }
-        }
-
-        try stream.writeByte(')');
-    }
-
-    fn writeParamToStream(self: Tree, stream: var, param: var, inst_table: *const InstPtrTable) !void {
-        if (@typeInfo(@TypeOf(param)) == .Enum) {
-            return stream.writeAll(@tagName(param));
-        }
-        switch (@TypeOf(param)) {
-            Value => {
-                try stream.print("{}", .{param});
-            },
-            *Inst => return self.writeInstParamToStream(stream, param, inst_table),
-            []*Inst => {
-                try stream.writeByte('[');
-                for (param) |inst, i| {
-                    if (i != 0) {
-                        try stream.writeAll(", ");
-                    }
-                    try self.writeInstParamToStream(stream, inst, inst_table);
-                }
-                try stream.writeByte(']');
-            },
-            Inst.Fn.Body => {
-                try stream.writeAll("{\n");
-                for (param.instructions) |inst, i| {
-                    try stream.print("  %{} ", .{i});
-                    try self.writeInstToStream(stream, inst, inst_table);
-                    try stream.writeByte('\n');
-                }
-                try stream.writeByte('}');
-            },
-            bool => return stream.writeByte("01"[@boolToInt(param)]),
-            else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)),
-        }
-    }
-
-    fn writeInstParamToStream(self: Tree, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void {
-        const info = inst_table.getValue(inst).?;
-        const prefix = if (info.fn_body == null) "@" else "%";
-        try stream.print("{}{}", .{ prefix, info.index });
-    }
 };
 
-const ParseContext = struct {
+const Analyze = struct {
     allocator: *Allocator,
-    i: usize,
-    source: []const u8,
+    old_tree: *const Module,
     errors: std.ArrayList(ErrorMsg),
     decls: std.ArrayList(*Inst),
-    global_name_map: *std.StringHashMap(usize),
-};
 
-pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!Tree {
-    var global_name_map = std.StringHashMap(usize).init(allocator);
-    defer global_name_map.deinit();
+    const NewInst = struct {
+        ptr: *Inst,
+    };
+};
 
-    var ctx: ParseContext = .{
+pub fn analyze(allocator: *Allocator, old_tree: Module) !Module {
+    var ctx = Analyze{
         .allocator = allocator,
-        .i = 0,
-        .source = source,
+        .old_tree = &old_tree,
         .decls = std.ArrayList(*Inst).init(allocator),
         .errors = std.ArrayList(ErrorMsg).init(allocator),
-        .global_name_map = &global_name_map,
+        .inst_table = std.HashMap(*Inst, Analyze.InstData).init(allocator),
     };
-    parseRoot(&ctx) catch |err| switch (err) {
-        error.ParseFailure => {
+    defer ctx.decls.deinit();
+    defer ctx.errors.deinit();
+    defer inst_table.deinit();
+
+    analyzeRoot(&ctx) catch |err| switch (err) {
+        error.AnalyzeFailure => {
             assert(ctx.errors.items.len != 0);
         },
         else => |e| return e,
     };
-    return Tree{
+    return Module{
         .decls = ctx.decls.toOwnedSlice(),
         .errors = ctx.errors.toOwnedSlice(),
     };
 }
 
-pub fn parseRoot(ctx: *ParseContext) !void {
-    // The IR format is designed so that it can be tokenized and parsed at the same time.
-    while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
-        ';' => _ = try skipToAndOver(ctx, '\n'),
-        '@' => {
-            ctx.i += 1;
-            const ident = try skipToAndOver(ctx, ' ');
-            const opt_type = try parseOptionalType(ctx);
-            const inst = try parseInstruction(ctx, opt_type, null);
-            const ident_index = ctx.decls.items.len;
-            if (try ctx.global_name_map.put(ident, ident_index)) |_| {
-                return parseError(ctx, "redefinition of identifier '{}'", .{ident});
-            }
-            try ctx.decls.append(inst);
-            continue;
-        },
-        ' ', '\n' => continue,
-        else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
-    };
-}
-
-fn eatByte(ctx: *ParseContext, byte: u8) bool {
-    if (ctx.i >= ctx.source.len) return false;
-    if (ctx.source[ctx.i] != byte) return false;
-    ctx.i += 1;
-    return true;
-}
-
-fn skipSpace(ctx: *ParseContext) void {
-    while (ctx.i < ctx.source.len and (ctx.source[ctx.i] == ' ' or ctx.source[ctx.i] == '\n')) {
-        ctx.i += 1;
-    }
-}
-
-fn requireEatBytes(ctx: *ParseContext, bytes: []const u8) !void {
-    if (ctx.i + bytes.len > ctx.source.len)
-        return parseError(ctx, "unexpected EOF", .{});
-    if (!mem.eql(u8, ctx.source[ctx.i..][0..bytes.len], bytes))
-        return parseError(ctx, "expected '{}'", .{bytes});
-    ctx.i += bytes.len;
-}
-
-fn skipToAndOver(ctx: *ParseContext, byte: u8) ![]const u8 {
-    const start_i = ctx.i;
-    while (ctx.i < ctx.source.len) : (ctx.i += 1) {
-        if (ctx.source[ctx.i] == byte) {
-            const result = ctx.source[start_i..ctx.i];
-            ctx.i += 1;
-            return result;
+fn analyzeRoot(ctx: *Analyze) !void {
+    for (old_tree.decls) |decl| {
+        if (decl.cast(Inst.Export)) |export_inst| {
+            try analyzeExport(ctx, export_inst);
         }
     }
-    return parseError(ctx, "unexpected EOF", .{});
 }
 
-fn parseError(ctx: *ParseContext, comptime format: []const u8, args: var) error{ ParseFailure, OutOfMemory } {
-    const msg = try std.fmt.allocPrint(ctx.allocator, format, args);
-    (try ctx.errors.addOne()).* = .{
-        .byte_offset = ctx.i,
-        .msg = msg,
+fn analyzeExport(ctx: *Analyze, export_inst: *Inst.Export) !void {
+    const old_decl = export_inst.positionals.value;
+    const new_info = ctx.inst_table.get(old_exp_target) orelse blk: {
+        const new_decl = try analyzeDecl(ctx, old_decl);
+        const new_info: Analyze.NewInst = .{ .ptr = new_decl };
+        try ctx.inst_table.put(old_decl, new_info);
+        break :blk new_info;
     };
-    return error.ParseFailure;
-}
-
-/// Regardless of whether a `Type` is returned, it skips past the '='.
-fn parseOptionalType(ctx: *ParseContext) !?Type {
-    skipSpace(ctx);
-    if (eatByte(ctx, ':')) {
-        const type_text_untrimmed = try skipToAndOver(ctx, '=');
-        skipSpace(ctx);
-        const type_text = mem.trim(u8, type_text_untrimmed, " \n");
-        if (mem.eql(u8, type_text, "usize")) {
-            return Type.initTag(.int_usize);
-        } else if (mem.eql(u8, type_text, "noreturn")) {
-            return Type.initTag(.no_return);
-        } else {
-            return parseError(ctx, "TODO parse type '{}'", .{type_text});
-        }
-    } else {
-        skipSpace(ctx);
-        try requireEatBytes(ctx, "=");
-        skipSpace(ctx);
-        return null;
-    }
-}
-
-fn parseInstruction(
-    ctx: *ParseContext,
-    opt_type: ?Type,
-    body_ctx: ?*BodyContext,
-) error{ OutOfMemory, ParseFailure }!*Inst {
-    switch (ctx.source[ctx.i]) {
-        '"' => return parseStringLiteralConst(ctx, opt_type),
-        '0'...'9' => return parseIntegerLiteralConst(ctx, opt_type),
-        else => {},
-    }
-    const fn_name = try skipToAndOver(ctx, '(');
-    inline for (@typeInfo(Inst.Tag).Enum.fields) |field| {
-        if (mem.eql(u8, field.name, fn_name)) {
-            const tag = @field(Inst.Tag, field.name);
-            return parseInstructionGeneric(ctx, field.name, Inst.TagToType(tag), opt_type, body_ctx);
-        }
-    }
-    return parseError(ctx, "unknown instruction '{}'", .{fn_name});
-}
-
-fn parseInstructionGeneric(
-    ctx: *ParseContext,
-    comptime fn_name: []const u8,
-    comptime InstType: type,
-    opt_type: ?Type,
-    body_ctx: ?*BodyContext,
-) !*Inst {
-    const inst_specific = try ctx.allocator.create(InstType);
-    inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?;
 
-    if (@hasField(InstType, "ty")) {
-        inst_specific.ty = opt_type orelse {
-            return parseError(ctx, "instruction '" ++ fn_name ++ "' requires type", .{});
-        };
-    }
-
-    const Positionals = @TypeOf(inst_specific.positionals);
-    inline for (@typeInfo(Positionals).Struct.fields) |arg_field| {
-        if (ctx.source[ctx.i] == ',') {
-            ctx.i += 1;
-            skipSpace(ctx);
-        } else if (ctx.source[ctx.i] == ')') {
-            return parseError(ctx, "expected positional parameter '{}'", .{arg_field.name});
-        }
-        @field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric(
-            ctx,
-            arg_field.field_type,
-            body_ctx,
-        );
-        skipSpace(ctx);
-    }
-
-    const KW_Args = @TypeOf(inst_specific.kw_args);
-    inst_specific.kw_args = .{}; // assign defaults
-    skipSpace(ctx);
-    while (eatByte(ctx, ',')) {
-        skipSpace(ctx);
-        const name = try skipToAndOver(ctx, '=');
-        inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| {
-            const field_name = arg_field.name;
-            if (mem.eql(u8, name, field_name)) {
-                const NonOptional = switch (@typeInfo(arg_field.field_type)) {
-                    .Optional => |info| info.child,
-                    else => arg_field.field_type,
-                };
-                @field(inst_specific.kw_args, field_name) = try parseParameterGeneric(ctx, NonOptional, body_ctx);
-                break;
-            }
-        } else {
-            return parseError(ctx, "unrecognized keyword parameter: '{}'", .{name});
-        }
-        skipSpace(ctx);
-    }
-    try requireEatBytes(ctx, ")");
-
-    return &inst_specific.base;
-}
-
-fn parseParameterGeneric(ctx: *ParseContext, comptime T: type, body_ctx: ?*BodyContext) !T {
-    if (@typeInfo(T) == .Enum) {
-        const start = ctx.i;
-        while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
-            ' ', '\n', ',', ')' => {
-                const enum_name = ctx.source[start..ctx.i];
-                return std.meta.stringToEnum(T, enum_name) orelse {
-                    return parseError(ctx, "tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) });
-                };
-            },
-            else => continue,
-        };
-        return parseError(ctx, "unexpected EOF in enum parameter", .{});
-    }
-    switch (T) {
-        Inst.Fn.Body => return parseBody(ctx),
-        bool => {
-            const bool_value = switch (ctx.source[ctx.i]) {
-                '0' => false,
-                '1' => true,
-                else => |byte| return parseError(ctx, "expected '0' or '1' for boolean value, found {c}", .{byte}),
-            };
-            ctx.i += 1;
-            return bool_value;
-        },
-        []*Inst => {
-            try requireEatBytes(ctx, "[");
-            skipSpace(ctx);
-            if (eatByte(ctx, ']')) return &[0]*Inst{};
-
-            var instructions = std.ArrayList(*Inst).init(ctx.allocator);
-            defer instructions.deinit();
-            while (true) {
-                skipSpace(ctx);
-                try instructions.append(try parseParameterInst(ctx, body_ctx));
-                skipSpace(ctx);
-                if (!eatByte(ctx, ',')) break;
-            }
-            try requireEatBytes(ctx, "]");
-            return instructions.toOwnedSlice();
-        },
-        *Inst => return parseParameterInst(ctx, body_ctx),
-        Value => return parseError(ctx, "TODO implement parseParameterGeneric for type Value", .{}),
-        else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)),
-    }
-    return parseError(ctx, "TODO parse parameter {}", .{@typeName(T)});
-}
-
-fn parseParameterInst(ctx: *ParseContext, body_ctx: ?*BodyContext) !*Inst {
-    const local_ref = switch (ctx.source[ctx.i]) {
-        '@' => false,
-        '%' => true,
-        '"' => {
-            const str_lit_inst = try parseStringLiteralConst(ctx, null);
-            try ctx.decls.append(str_lit_inst);
-            return str_lit_inst;
-        },
-        else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
-    };
-    const map = if (local_ref)
-        if (body_ctx) |bc|
-            &bc.name_map
-        else
-            return parseError(ctx, "referencing a % instruction in global scope", .{})
-    else
-        ctx.global_name_map;
-
-    ctx.i += 1;
-    const name_start = ctx.i;
-    while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
-        ' ', '\n', ',', ')', ']' => break,
-        else => continue,
-    };
-    const ident = ctx.source[name_start..ctx.i];
-    const kv = map.get(ident) orelse {
-        const bad_name = ctx.source[name_start - 1 .. ctx.i];
-        ctx.i = name_start - 1;
-        return parseError(ctx, "unrecognized identifier: {}", .{bad_name});
-    };
-    if (local_ref) {
-        return body_ctx.?.instructions.items[kv.value];
-    } else {
-        return ctx.decls.items[kv.value];
-    }
-}
-
-const BodyContext = struct {
-    instructions: std.ArrayList(*Inst),
-    name_map: std.StringHashMap(usize),
-};
-
-fn parseBody(ctx: *ParseContext) !Inst.Fn.Body {
-    var body_context = BodyContext{
-        .instructions = std.ArrayList(*Inst).init(ctx.allocator),
-        .name_map = std.StringHashMap(usize).init(ctx.allocator),
-    };
-    defer body_context.instructions.deinit();
-    defer body_context.name_map.deinit();
-
-    try requireEatBytes(ctx, "{");
-    skipSpace(ctx);
-
-    while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
-        ';' => _ = try skipToAndOver(ctx, '\n'),
-        '%' => {
-            ctx.i += 1;
-            const ident = try skipToAndOver(ctx, ' ');
-            const opt_type = try parseOptionalType(ctx);
-            const inst = try parseInstruction(ctx, opt_type, &body_context);
-            const ident_index = body_context.instructions.items.len;
-            if (try body_context.name_map.put(ident, ident_index)) |_| {
-                return parseError(ctx, "redefinition of identifier '{}'", .{ident});
-            }
-            try body_context.instructions.append(inst);
-            continue;
-        },
-        ' ', '\n' => continue,
-        '}' => {
-            ctx.i += 1;
-            break;
-        },
-        else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
-    };
-
-    return Inst.Fn.Body{
-        .instructions = body_context.instructions.toOwnedSlice(),
-    };
-}
-
-fn parseStringLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst {
-    const start = ctx.i;
-    ctx.i += 1; // skip over '"'
-
-    while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
-        '"' => {
-            ctx.i += 1;
-            const span = ctx.source[start..ctx.i];
-            var bad_index: usize = undefined;
-            const parsed = std.zig.parseStringLiteral(ctx.allocator, span, &bad_index) catch |err| switch (err) {
-                error.InvalidCharacter => {
-                    ctx.i = start + bad_index;
-                    const bad_byte = ctx.source[ctx.i];
-                    return parseError(ctx, "invalid string literal character: '{c}'\n", .{bad_byte});
-                },
-                else => |e| return e,
-            };
-            const const_inst = try ctx.allocator.create(Inst.Constant);
-            errdefer ctx.allocator.destroy(const_inst);
-
-            const bytes_payload = try ctx.allocator.create(Value.Payload.Bytes);
-            errdefer ctx.allocator.destroy(bytes_payload);
-            bytes_payload.* = .{ .data = parsed };
-
-            const ty = opt_type orelse blk: {
-                const array_payload = try ctx.allocator.create(Type.Payload.Array_u8_Sentinel0);
-                errdefer ctx.allocator.destroy(array_payload);
-                array_payload.* = .{ .len = parsed.len };
-
-                const ty_payload = try ctx.allocator.create(Type.Payload.SingleConstPointer);
-                errdefer ctx.allocator.destroy(ty_payload);
-                ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) };
-
-                break :blk Type.initPayload(&ty_payload.base);
-            };
-
-            const_inst.* = .{
-                .ty = ty,
-                .positionals = .{ .value = Value.initPayload(&bytes_payload.base) },
-                .kw_args = .{},
-            };
-            return &const_inst.base;
-        },
-        '\\' => {
-            ctx.i += 1;
-            if (ctx.i >= ctx.source.len) break;
-            continue;
-        },
-        else => continue,
-    };
-    return parseError(ctx, "unexpected EOF in string literal", .{});
-}
-
-fn parseIntegerLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst {
-    const start = ctx.i;
-    while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
-        '0'...'9' => continue,
-        else => break,
-    };
-    const number_text = ctx.source[start..ctx.i];
-    const number = std.fmt.parseInt(u64, number_text, 10) catch |err| switch (err) {
-        error.Overflow => return parseError(ctx, "TODO handle big integers", .{}),
-        error.InvalidCharacter => return parseError(ctx, "invalid integer literal", .{}),
-    };
-
-    const int_payload = try ctx.allocator.create(Value.Payload.Int_u64);
-    errdefer ctx.allocator.destroy(int_payload);
-    int_payload.* = .{ .int = number };
-
-    const const_inst = try ctx.allocator.create(Inst.Constant);
-    errdefer ctx.allocator.destroy(const_inst);
-
-    const_inst.* = .{
-        .ty = opt_type orelse Type.initTag(.int_comptime),
-        .positionals = .{ .value = Value.initPayload(&int_payload.base) },
-        .kw_args = .{},
-    };
-    return &const_inst.base;
+    //const exp_type = new_info.ptr.ty();
+    //switch (exp_type.zigTypeTag()) {
+    //    .Fn => {
+    //        if () |kv| {
+    //            kv.value
+    //        }
+    //        return analyzeExportFn(ctx, exp_target.cast(Inst.,
+    //    },
+    //    else => return ctx.fail("unable to export type '{}'", .{exp_type}),
+    //}
 }
 
 pub fn main() anyerror!void {
@@ -703,9 +114,9 @@ pub fn main() anyerror!void {
     const src_path = args[1];
     const debug_error_trace = true;
 
-    const source = try std.fs.cwd().readFileAlloc(allocator, src_path, std.math.maxInt(u32));
+    const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0);
 
-    var tree = try parse(allocator, source);
+    var tree = try text.parse(allocator, source);
     defer tree.deinit();
 
     if (tree.errors.len != 0) {
@@ -719,8 +130,19 @@ pub fn main() anyerror!void {
 
     tree.dump();
 
-    //const new_tree = try semanticallyAnalyze(tree);
+    //const new_tree = try analyze(allocator, tree);
     //defer new_tree.deinit();
+
+    //if (new_tree.errors.len != 0) {
+    //    for (new_tree.errors) |err_msg| {
+    //        const loc = findLineColumn(source, err_msg.byte_offset);
+    //        std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
+    //    }
+    //    if (debug_error_trace) return error.ParseFailure;
+    //    std.process.exit(1);
+    //}
+
+    //new_tree.dump();
 }
 
 fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {
@@ -741,4 +163,4 @@ fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize,
 }
 
 // Performance optimization ideas:
-// * make the source code sentinel-terminated, so that all the checks against the length can be skipped
+// * when analyzing use a field in the Inst instead of HashMap to track corresponding instructions

@@ -18,7 +18,7 @@ pub const Type = extern union {
 
     pub fn zigTypeTag(self: Type) std.builtin.TypeId {
         switch (self.tag()) {
-            .int_u8, .int_usize => return .Int,
+            .@"u8", .@"usize" => return .Int,
             .array_u8, .array_u8_sentinel_0 => return .Array,
             .single_const_pointer => return .Pointer,
         }
@@ -52,10 +52,35 @@ pub const Type = extern union {
         var ty = self;
         while (true) {
             switch (ty.tag()) {
-                .no_return => return out_stream.writeAll("noreturn"),
-                .int_comptime => return out_stream.writeAll("comptime_int"),
-                .int_u8 => return out_stream.writeAll("u8"),
-                .int_usize => return out_stream.writeAll("usize"),
+                @"u8",
+                @"i8",
+                @"isize",
+                @"usize",
+                @"noreturn",
+                @"void",
+                @"c_short",
+                @"c_ushort",
+                @"c_int",
+                @"c_uint",
+                @"c_long",
+                @"c_ulong",
+                @"c_longlong",
+                @"c_ulonglong",
+                @"c_longdouble",
+                @"c_void",
+                @"f16",
+                @"f32",
+                @"f64",
+                @"f128",
+                @"bool",
+                @"void",
+                @"type",
+                @"anyerror",
+                @"comptime_int",
+                @"comptime_float",
+                @"noreturn",
+                => |t| return out_stream.writeAll(@tagName(t)),
+
                 .array_u8_sentinel_0 => {
                     const payload = @fieldParentPtr(Payload.Array_u8_Sentinel0, "base", ty.ptr_otherwise);
                     return out_stream.print("[{}:0]u8", .{payload.len});
@@ -85,17 +110,38 @@ pub const Type = extern union {
     /// See `zigTypeTag` for the function that corresponds to `std.builtin.TypeId`.
     pub const Tag = enum {
         // The first section of this enum are tags that require no payload.
-        no_return,
-        int_comptime,
-        int_u8,
-        int_usize, // See last_no_payload_tag below.
+        @"u8",
+        @"i8",
+        @"isize",
+        @"usize",
+        @"c_short",
+        @"c_ushort",
+        @"c_int",
+        @"c_uint",
+        @"c_long",
+        @"c_ulong",
+        @"c_longlong",
+        @"c_ulonglong",
+        @"c_longdouble",
+        @"c_void",
+        @"f16",
+        @"f32",
+        @"f64",
+        @"f128",
+        @"bool",
+        @"void",
+        @"type",
+        @"anyerror",
+        @"comptime_int",
+        @"comptime_float",
+        @"noreturn", // See last_no_payload_tag below.
         // After this, the tag requires a payload.
 
         array_u8_sentinel_0,
         array,
         single_const_pointer,
 
-        pub const last_no_payload_tag = Tag.int_usize;
+        pub const last_no_payload_tag = Tag.@"noreturn";
         pub const no_payload_count = @enumToInt(last_no_payload_tag) + 1;
     };

@@ -1,33 +1,50 @@
 test "hello world IR" {
     exeCmp(
-        \\@0 = "Hello, world!\n"
+        \\@0 = str("Hello, world!\n")
+        \\@1 = primitive(void)
+        \\@2 = primitive(usize)
+        \\@3 = fntype([], @1, cc=Naked)
+        \\@4 = int(0)
+        \\@5 = int(1)
+        \\@6 = int(231)
+        \\@7 = str("len")
         \\
-        \\@1 = fn({
-        \\  %0 : usize = 1 ;SYS_write
-        \\  %1 : usize = 1 ;STDOUT_FILENO
+        \\@8 = fn(@3, {
+        \\  %0 = as(@2, @5) ; SYS_write
+        \\  %1 = as(@2, @5) ; STDOUT_FILENO
         \\  %2 = ptrtoint(@0) ; msg ptr
-        \\  %3 = fieldptr(@0, "len") ; msg len ptr
+        \\  %3 = fieldptr(@0, @7) ; msg len ptr
         \\  %4 = deref(%3) ; msg len
-        \\  %5 = asm("syscall",
+        \\  %sysoutreg = str("={rax}")
+        \\  %rax = str("{rax}")
+        \\  %rdi = str("{rdi}")
+        \\  %rsi = str("{rsi}")
+        \\  %rdx = str("{rdx}")
+        \\  %rcx = str("rcx")
+        \\  %r11 = str("r11")
+        \\  %memory = str("memory")
+        \\  %syscall = str("syscall")
+        \\  %5 = asm(%syscall, @2,
         \\    volatile=1,
-        \\    output="={rax}",
-        \\    inputs=["{rax}", "{rdi}", "{rsi}", "{rdx}"],
-        \\    clobbers=["rcx", "r11", "memory"],
+        \\    output=%sysoutreg,
+        \\    inputs=[%rax, %rdi, %rsi, %rdx],
+        \\    clobbers=[%rcx, %r11, %memory],
         \\    args=[%0, %1, %2, %4])
         \\
-        \\  %6 : usize = 231 ;SYS_exit_group
-        \\  %7 : usize = 0   ;exit code
-        \\  %8 = asm("syscall",
+        \\  %6 = as(@2, @6) ;SYS_exit_group
+        \\  %7 = as(@2, @4) ;exit code
+        \\  %8 = asm(%syscall, @2,
         \\    volatile=1,
-        \\    output="={rax}",
-        \\    inputs=["{rax}", "{rdi}"],
-        \\    clobbers=["rcx", "r11", "memory"],
+        \\    output=%sysoutreg,
+        \\    inputs=[%rax, %rdi],
+        \\    clobbers=[%rcx, %r11, %memory],
         \\    args=[%6, %7])
         \\
         \\  %9 = unreachable()
-        \\}, cc=naked)
+        \\})
         \\
-        \\@2 = export("_start", @1)
+        \\@9 = str("_start")
+        \\@10 = export(@9, @8)
     ,
         \\Hello, world!
         \\

Commit cc1c2bd568

Commit `cc1c2bd568`