Commit cc1c2bd568
lib/std/fs.zig
@@ -1012,25 +1012,27 @@ pub const Dir = struct {
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
pub fn readFileAlloc(self: Dir, allocator: *mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 {
- return self.readFileAllocAligned(allocator, file_path, max_bytes, @alignOf(u8));
+ return self.readFileAllocOptions(allocator, file_path, max_bytes, @alignOf(u8), null);
}
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
- pub fn readFileAllocAligned(
+ /// Allows specifying alignment and a sentinel value.
+ pub fn readFileAllocOptions(
self: Dir,
allocator: *mem.Allocator,
file_path: []const u8,
max_bytes: usize,
- comptime A: u29,
- ) ![]align(A) u8 {
+ comptime alignment: u29,
+ comptime optional_sentinel: ?u8,
+ ) !(if (optional_sentinel) |s| [:s]align(alignment) u8 else []align(alignment) u8) {
var file = try self.openFile(file_path, .{});
defer file.close();
const size = math.cast(usize, try file.getEndPos()) catch math.maxInt(usize);
if (size > max_bytes) return error.FileTooBig;
- const buf = try allocator.alignedAlloc(u8, A, size);
+ const buf = try allocator.allocWithOptions(u8, size, alignment, optional_sentinel);
errdefer allocator.free(buf);
try file.inStream().readNoEof(buf);
lib/std/mem.zig
@@ -105,6 +105,31 @@ pub const Allocator = struct {
return self.alignedAlloc(T, null, n);
}
+ pub fn allocWithOptions(
+ self: *Allocator,
+ comptime Elem: type,
+ n: usize,
+ /// null means naturally aligned
+ comptime optional_alignment: ?u29,
+ comptime optional_sentinel: ?Elem,
+ ) Error!AllocWithOptionsPayload(Elem, optional_alignment, optional_sentinel) {
+ if (optional_sentinel) |sentinel| {
+ const ptr = try self.alignedAlloc(Elem, optional_alignment, n + 1);
+ ptr[n] = sentinel;
+ return ptr[0..n :sentinel];
+ } else {
+ return alignedAlloc(Elem, optional_alignment, n);
+ }
+ }
+
+ fn AllocWithOptionsPayload(comptime Elem: type, comptime alignment: ?u29, comptime sentinel: ?Elem) type {
+ if (sentinel) |s| {
+ return [:s]align(alignment orelse @alignOf(T)) Elem;
+ } else {
+ return []align(alignment orelse @alignOf(T)) Elem;
+ }
+ }
+
/// Allocates an array of `n + 1` items of type `T` and sets the first `n`
/// items to `undefined` and the last item to `sentinel`. Depending on the
/// Allocator implementation, it may be required to call `free` once the
@@ -113,10 +138,10 @@ pub const Allocator = struct {
/// call `free` when done.
///
/// For allocating a single item, see `create`.
+ ///
+ /// Deprecated; use `allocWithOptions`.
pub fn allocSentinel(self: *Allocator, comptime Elem: type, n: usize, comptime sentinel: Elem) Error![:sentinel]Elem {
- var ptr = try self.alloc(Elem, n + 1);
- ptr[n] = sentinel;
- return ptr[0..n :sentinel];
+ return self.allocWithOptions(Elem, n, null, sentinel);
}
pub fn alignedAlloc(
src-self-hosted/ir/text.zig
@@ -0,0 +1,712 @@
+//! This file has to do with parsing and rendering the ZIR text format.
+const std = @import("std");
+const mem = std.mem;
+const Allocator = std.mem.Allocator;
+const Value = @import("../value.zig").Value;
+const assert = std.debug.assert;
+const ir = @import("../ir.zig");
+const BigInt = std.math.big.Int;
+
+/// These are instructions that correspond to the ZIR text format. See `ir.Inst` for
+/// in-memory, analyzed instructions with types and values.
+pub const Inst = struct {
+ tag: Tag,
+
+ /// These names are used directly as the instruction names in the text format.
+ pub const Tag = enum {
+ str,
+ int,
+ ptrtoint,
+ fieldptr,
+ deref,
+ as,
+ @"asm",
+ @"unreachable",
+ @"fn",
+ @"export",
+ primitive,
+ fntype,
+ };
+
+ pub fn TagToType(tag: Tag) type {
+ return switch (tag) {
+ .str => Str,
+ .int => Int,
+ .ptrtoint => PtrToInt,
+ .fieldptr => FieldPtr,
+ .deref => Deref,
+ .as => As,
+ .@"asm" => Assembly,
+ .@"unreachable" => Unreachable,
+ .@"fn" => Fn,
+ .@"export" => Export,
+ .primitive => Primitive,
+ .fntype => FnType,
+ };
+ }
+
+ pub fn cast(base: *Inst, comptime T: type) ?*T {
+ const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag;
+ if (base.tag != expected_tag)
+ return null;
+
+ return @fieldParentPtr(T, "base", base);
+ }
+
+ pub const Str = struct {
+ base: Inst = Inst{ .tag = .str },
+
+ positionals: struct {
+ bytes: []u8,
+ },
+ kw_args: struct {},
+ };
+
+ pub const Int = struct {
+ base: Inst = Inst{ .tag = .int },
+
+ positionals: struct {
+ int: BigInt,
+ },
+ kw_args: struct {},
+ };
+
+ pub const PtrToInt = struct {
+ base: Inst = Inst{ .tag = .ptrtoint },
+
+ positionals: struct {
+ ptr: *Inst,
+ },
+ kw_args: struct {},
+ };
+
+ pub const FieldPtr = struct {
+ base: Inst = Inst{ .tag = .fieldptr },
+
+ positionals: struct {
+ object_ptr: *Inst,
+ field_name: *Inst,
+ },
+ kw_args: struct {},
+ };
+
+ pub const Deref = struct {
+ base: Inst = Inst{ .tag = .deref },
+
+ positionals: struct {
+ ptr: *Inst,
+ },
+ kw_args: struct {},
+ };
+
+ pub const As = struct {
+ base: Inst = Inst{ .tag = .as },
+
+ positionals: struct {
+ dest_type: *Inst,
+ value: *Inst,
+ },
+ kw_args: struct {},
+ };
+
+ pub const Assembly = struct {
+ base: Inst = Inst{ .tag = .@"asm" },
+
+ positionals: struct {
+ asm_source: *Inst,
+ return_type: *Inst,
+ },
+ kw_args: struct {
+ @"volatile": bool = false,
+ output: ?*Inst = null,
+ inputs: []*Inst = &[0]*Inst{},
+ clobbers: []*Inst = &[0]*Inst{},
+ args: []*Inst = &[0]*Inst{},
+ },
+ };
+
+ pub const Unreachable = struct {
+ base: Inst = Inst{ .tag = .@"unreachable" },
+
+ positionals: struct {},
+ kw_args: struct {},
+ };
+
+ pub const Fn = struct {
+ base: Inst = Inst{ .tag = .@"fn" },
+
+ positionals: struct {
+ fn_type: *Inst,
+ body: Body,
+ },
+ kw_args: struct {},
+
+ pub const Body = struct {
+ instructions: []*Inst,
+ };
+ };
+
+ pub const Export = struct {
+ base: Inst = Inst{ .tag = .@"export" },
+
+ positionals: struct {
+ symbol_name: *Inst,
+ value: *Inst,
+ },
+ kw_args: struct {},
+ };
+
+ pub const Primitive = struct {
+ base: Inst = Inst{ .tag = .primitive },
+
+ positionals: struct {
+ tag: BuiltinType,
+ },
+ kw_args: struct {},
+
+ pub const BuiltinType = enum {
+ @"isize",
+ @"usize",
+ @"c_short",
+ @"c_ushort",
+ @"c_int",
+ @"c_uint",
+ @"c_long",
+ @"c_ulong",
+ @"c_longlong",
+ @"c_ulonglong",
+ @"c_longdouble",
+ @"c_void",
+ @"f16",
+ @"f32",
+ @"f64",
+ @"f128",
+ @"bool",
+ @"void",
+ @"noreturn",
+ @"type",
+ @"anyerror",
+ @"comptime_int",
+ @"comptime_float",
+ };
+ };
+
+ pub const FnType = struct {
+ base: Inst = Inst{ .tag = .fntype },
+
+ positionals: struct {
+ param_types: []*Inst,
+ return_type: *Inst,
+ },
+ kw_args: struct {
+ cc: std.builtin.CallingConvention = .Unspecified,
+ },
+ };
+};
+
+pub const ErrorMsg = struct {
+ byte_offset: usize,
+ msg: []const u8,
+};
+
+pub const Module = struct {
+ decls: []*Inst,
+ errors: []ErrorMsg,
+
+ pub fn deinit(self: *Module) void {
+ // TODO resource deallocation
+ self.* = undefined;
+ }
+
+ /// This is a debugging utility for rendering the tree to stderr.
+ pub fn dump(self: Module) void {
+ self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {};
+ }
+
+ const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body });
+
+ pub fn writeToStream(self: Module, allocator: *Allocator, stream: var) !void {
+ // First, build a map of *Inst to @ or % indexes
+ var inst_table = InstPtrTable.init(allocator);
+ defer inst_table.deinit();
+
+ try inst_table.ensureCapacity(self.decls.len);
+
+ for (self.decls) |decl, decl_i| {
+ try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null });
+
+ if (decl.cast(Inst.Fn)) |fn_inst| {
+ for (fn_inst.positionals.body.instructions) |inst, inst_i| {
+ try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body });
+ }
+ }
+ }
+
+ for (self.decls) |decl, i| {
+ try stream.print("@{} ", .{i});
+ try self.writeInstToStream(stream, decl, &inst_table);
+ try stream.writeByte('\n');
+ }
+ }
+
+ fn writeInstToStream(
+ self: Module,
+ stream: var,
+ decl: *Inst,
+ inst_table: *const InstPtrTable,
+ ) @TypeOf(stream).Error!void {
+ // TODO I tried implementing this with an inline for loop and hit a compiler bug
+ switch (decl.tag) {
+ .str => return self.writeInstToStreamGeneric(stream, .str, decl, inst_table),
+ .int => return self.writeInstToStreamGeneric(stream, .int, decl, inst_table),
+ .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table),
+ .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table),
+ .deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table),
+ .as => return self.writeInstToStreamGeneric(stream, .as, decl, inst_table),
+ .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table),
+ .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table),
+ .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table),
+ .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table),
+ .primitive => return self.writeInstToStreamGeneric(stream, .primitive, decl, inst_table),
+ .fntype => return self.writeInstToStreamGeneric(stream, .fntype, decl, inst_table),
+ }
+ }
+
+ fn writeInstToStreamGeneric(
+ self: Module,
+ stream: var,
+ comptime inst_tag: Inst.Tag,
+ base: *Inst,
+ inst_table: *const InstPtrTable,
+ ) !void {
+ const SpecificInst = Inst.TagToType(inst_tag);
+ const inst = @fieldParentPtr(SpecificInst, "base", base);
+ const Positionals = @TypeOf(inst.positionals);
+ try stream.writeAll("= " ++ @tagName(inst_tag) ++ "(");
+ const pos_fields = @typeInfo(Positionals).Struct.fields;
+ inline for (pos_fields) |arg_field, i| {
+ if (i != 0) {
+ try stream.writeAll(", ");
+ }
+ try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table);
+ }
+
+ comptime var need_comma = pos_fields.len != 0;
+ const KW_Args = @TypeOf(inst.kw_args);
+ inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| {
+ if (need_comma) {
+ try stream.writeAll(", ");
+ }
+ if (@typeInfo(arg_field.field_type) == .Optional) {
+ if (@field(inst.kw_args, arg_field.name)) |non_optional| {
+ try stream.print("{}=", .{arg_field.name});
+ try self.writeParamToStream(stream, non_optional, inst_table);
+ need_comma = true;
+ }
+ } else {
+ try stream.print("{}=", .{arg_field.name});
+ try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table);
+ need_comma = true;
+ }
+ }
+
+ try stream.writeByte(')');
+ }
+
+ fn writeParamToStream(self: Module, stream: var, param: var, inst_table: *const InstPtrTable) !void {
+ if (@typeInfo(@TypeOf(param)) == .Enum) {
+ return stream.writeAll(@tagName(param));
+ }
+ switch (@TypeOf(param)) {
+ Value => return stream.print("{}", .{param}),
+ *Inst => return self.writeInstParamToStream(stream, param, inst_table),
+ []*Inst => {
+ try stream.writeByte('[');
+ for (param) |inst, i| {
+ if (i != 0) {
+ try stream.writeAll(", ");
+ }
+ try self.writeInstParamToStream(stream, inst, inst_table);
+ }
+ try stream.writeByte(']');
+ },
+ Inst.Fn.Body => {
+ try stream.writeAll("{\n");
+ for (param.instructions) |inst, i| {
+ try stream.print(" %{} ", .{i});
+ try self.writeInstToStream(stream, inst, inst_table);
+ try stream.writeByte('\n');
+ }
+ try stream.writeByte('}');
+ },
+ bool => return stream.writeByte("01"[@boolToInt(param)]),
+ []u8 => return std.zig.renderStringLiteral(param, stream),
+ BigInt => return stream.print("{}", .{param}),
+ else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)),
+ }
+ }
+
+ fn writeInstParamToStream(self: Module, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void {
+ const info = inst_table.getValue(inst).?;
+ const prefix = if (info.fn_body == null) "@" else "%";
+ try stream.print("{}{}", .{ prefix, info.index });
+ }
+};
+
+pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module {
+ var global_name_map = std.StringHashMap(usize).init(allocator);
+ defer global_name_map.deinit();
+
+ var parser: Parser = .{
+ .allocator = allocator,
+ .i = 0,
+ .source = source,
+ .decls = std.ArrayList(*Inst).init(allocator),
+ .errors = std.ArrayList(ErrorMsg).init(allocator),
+ .global_name_map = &global_name_map,
+ };
+ parser.parseRoot() catch |err| switch (err) {
+ error.ParseFailure => {
+ assert(parser.errors.items.len != 0);
+ },
+ else => |e| return e,
+ };
+ return Module{
+ .decls = parser.decls.toOwnedSlice(),
+ .errors = parser.errors.toOwnedSlice(),
+ };
+}
+
+const Parser = struct {
+ allocator: *Allocator,
+ i: usize,
+ source: [:0]const u8,
+ errors: std.ArrayList(ErrorMsg),
+ decls: std.ArrayList(*Inst),
+ global_name_map: *std.StringHashMap(usize),
+
+ const Body = struct {
+ instructions: std.ArrayList(*Inst),
+ name_map: std.StringHashMap(usize),
+ };
+
+ fn parseBody(self: *Parser) !Inst.Fn.Body {
+ var body_context = Body{
+ .instructions = std.ArrayList(*Inst).init(self.allocator),
+ .name_map = std.StringHashMap(usize).init(self.allocator),
+ };
+ defer body_context.instructions.deinit();
+ defer body_context.name_map.deinit();
+
+ try requireEatBytes(self, "{");
+ skipSpace(self);
+
+ while (true) : (self.i += 1) switch (self.source[self.i]) {
+ ';' => _ = try skipToAndOver(self, '\n'),
+ '%' => {
+ self.i += 1;
+ const ident = try skipToAndOver(self, ' ');
+ skipSpace(self);
+ try requireEatBytes(self, "=");
+ skipSpace(self);
+ const inst = try parseInstruction(self, &body_context);
+ const ident_index = body_context.instructions.items.len;
+ if (try body_context.name_map.put(ident, ident_index)) |_| {
+ return self.fail("redefinition of identifier '{}'", .{ident});
+ }
+ try body_context.instructions.append(inst);
+ continue;
+ },
+ ' ', '\n' => continue,
+ '}' => {
+ self.i += 1;
+ break;
+ },
+ else => |byte| return self.failByte(byte),
+ };
+
+ return Inst.Fn.Body{
+ .instructions = body_context.instructions.toOwnedSlice(),
+ };
+ }
+
+ fn parseStringLiteral(self: *Parser) ![]u8 {
+ const start = self.i;
+ try self.requireEatBytes("\"");
+
+ while (true) : (self.i += 1) switch (self.source[self.i]) {
+ '"' => {
+ self.i += 1;
+ const span = self.source[start..self.i];
+ var bad_index: usize = undefined;
+ const parsed = std.zig.parseStringLiteral(self.allocator, span, &bad_index) catch |err| switch (err) {
+ error.InvalidCharacter => {
+ self.i = start + bad_index;
+ const bad_byte = self.source[self.i];
+ return self.fail("invalid string literal character: '{c}'\n", .{bad_byte});
+ },
+ else => |e| return e,
+ };
+ return parsed;
+ },
+ '\\' => {
+ self.i += 1;
+ continue;
+ },
+ 0 => return self.failByte(0),
+ else => continue,
+ };
+ }
+
+ fn parseIntegerLiteral(self: *Parser) !BigInt {
+ const start = self.i;
+ if (self.source[self.i] == '-') self.i += 1;
+ while (true) : (self.i += 1) switch (self.source[self.i]) {
+ '0'...'9' => continue,
+ else => break,
+ };
+ const number_text = self.source[start..self.i];
+ var result = try BigInt.init(self.allocator);
+ result.setString(10, number_text) catch |err| {
+ self.i = start;
+ switch (err) {
+ error.InvalidBase => unreachable,
+ error.InvalidCharForDigit => return self.fail("invalid digit in integer literal", .{}),
+ error.DigitTooLargeForBase => return self.fail("digit too large in integer literal", .{}),
+ else => |e| return e,
+ }
+ };
+ return result;
+ }
+
+ fn parseRoot(self: *Parser) !void {
+ // The IR format is designed so that it can be tokenized and parsed at the same time.
+ while (true) : (self.i += 1) switch (self.source[self.i]) {
+ ';' => _ = try skipToAndOver(self, '\n'),
+ '@' => {
+ self.i += 1;
+ const ident = try skipToAndOver(self, ' ');
+ skipSpace(self);
+ try requireEatBytes(self, "=");
+ skipSpace(self);
+ const inst = try parseInstruction(self, null);
+ const ident_index = self.decls.items.len;
+ if (try self.global_name_map.put(ident, ident_index)) |_| {
+ return self.fail("redefinition of identifier '{}'", .{ident});
+ }
+ try self.decls.append(inst);
+ continue;
+ },
+ ' ', '\n' => continue,
+ 0 => break,
+ else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}),
+ };
+ }
+
+ fn eatByte(self: *Parser, byte: u8) bool {
+ if (self.source[self.i] != byte) return false;
+ self.i += 1;
+ return true;
+ }
+
+ fn skipSpace(self: *Parser) void {
+ while (self.source[self.i] == ' ' or self.source[self.i] == '\n') {
+ self.i += 1;
+ }
+ }
+
+ fn requireEatBytes(self: *Parser, bytes: []const u8) !void {
+ const start = self.i;
+ for (bytes) |byte| {
+ if (self.source[self.i] != byte) {
+ self.i = start;
+ return self.fail("expected '{}'", .{bytes});
+ }
+ self.i += 1;
+ }
+ }
+
+ fn skipToAndOver(self: *Parser, byte: u8) ![]const u8 {
+ const start_i = self.i;
+ while (self.source[self.i] != 0) : (self.i += 1) {
+ if (self.source[self.i] == byte) {
+ const result = self.source[start_i..self.i];
+ self.i += 1;
+ return result;
+ }
+ }
+ return self.fail("unexpected EOF", .{});
+ }
+
+ /// ParseFailure is an internal error code; handled in `parse`.
+ const InnerError = error{ ParseFailure, OutOfMemory };
+
+ fn failByte(self: *Parser, byte: u8) InnerError {
+ if (byte == 0) {
+ return self.fail("unexpected EOF", .{});
+ } else {
+ return self.fail("unexpected byte: '{c}'", .{byte});
+ }
+ }
+
+ fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError {
+ @setCold(true);
+ const msg = try std.fmt.allocPrint(self.allocator, format, args);
+ (try self.errors.addOne()).* = .{
+ .byte_offset = self.i,
+ .msg = msg,
+ };
+ return error.ParseFailure;
+ }
+
+ fn parseInstruction(self: *Parser, body_ctx: ?*Body) InnerError!*Inst {
+ const fn_name = try skipToAndOver(self, '(');
+ inline for (@typeInfo(Inst.Tag).Enum.fields) |field| {
+ if (mem.eql(u8, field.name, fn_name)) {
+ const tag = @field(Inst.Tag, field.name);
+ return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx);
+ }
+ }
+ return self.fail("unknown instruction '{}'", .{fn_name});
+ }
+
+ fn parseInstructionGeneric(
+ self: *Parser,
+ comptime fn_name: []const u8,
+ comptime InstType: type,
+ body_ctx: ?*Body,
+ ) !*Inst {
+ const inst_specific = try self.allocator.create(InstType);
+ inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?;
+
+ if (@hasField(InstType, "ty")) {
+ inst_specific.ty = opt_type orelse {
+ return self.fail("instruction '" ++ fn_name ++ "' requires type", .{});
+ };
+ }
+
+ const Positionals = @TypeOf(inst_specific.positionals);
+ inline for (@typeInfo(Positionals).Struct.fields) |arg_field| {
+ if (self.source[self.i] == ',') {
+ self.i += 1;
+ skipSpace(self);
+ } else if (self.source[self.i] == ')') {
+ return self.fail("expected positional parameter '{}'", .{arg_field.name});
+ }
+ @field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric(
+ self,
+ arg_field.field_type,
+ body_ctx,
+ );
+ skipSpace(self);
+ }
+
+ const KW_Args = @TypeOf(inst_specific.kw_args);
+ inst_specific.kw_args = .{}; // assign defaults
+ skipSpace(self);
+ while (eatByte(self, ',')) {
+ skipSpace(self);
+ const name = try skipToAndOver(self, '=');
+ inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| {
+ const field_name = arg_field.name;
+ if (mem.eql(u8, name, field_name)) {
+ const NonOptional = switch (@typeInfo(arg_field.field_type)) {
+ .Optional => |info| info.child,
+ else => arg_field.field_type,
+ };
+ @field(inst_specific.kw_args, field_name) = try parseParameterGeneric(self, NonOptional, body_ctx);
+ break;
+ }
+ } else {
+ return self.fail("unrecognized keyword parameter: '{}'", .{name});
+ }
+ skipSpace(self);
+ }
+ try requireEatBytes(self, ")");
+
+ return &inst_specific.base;
+ }
+
+ fn parseParameterGeneric(self: *Parser, comptime T: type, body_ctx: ?*Body) !T {
+ if (@typeInfo(T) == .Enum) {
+ const start = self.i;
+ while (true) : (self.i += 1) switch (self.source[self.i]) {
+ ' ', '\n', ',', ')' => {
+ const enum_name = self.source[start..self.i];
+ return std.meta.stringToEnum(T, enum_name) orelse {
+ return self.fail("tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) });
+ };
+ },
+ 0 => return self.failByte(0),
+ else => continue,
+ };
+ }
+ switch (T) {
+ Inst.Fn.Body => return parseBody(self),
+ bool => {
+ const bool_value = switch (self.source[self.i]) {
+ '0' => false,
+ '1' => true,
+ else => |byte| return self.fail("expected '0' or '1' for boolean value, found {c}", .{byte}),
+ };
+ self.i += 1;
+ return bool_value;
+ },
+ []*Inst => {
+ try requireEatBytes(self, "[");
+ skipSpace(self);
+ if (eatByte(self, ']')) return &[0]*Inst{};
+
+ var instructions = std.ArrayList(*Inst).init(self.allocator);
+ defer instructions.deinit();
+ while (true) {
+ skipSpace(self);
+ try instructions.append(try parseParameterInst(self, body_ctx));
+ skipSpace(self);
+ if (!eatByte(self, ',')) break;
+ }
+ try requireEatBytes(self, "]");
+ return instructions.toOwnedSlice();
+ },
+ *Inst => return parseParameterInst(self, body_ctx),
+ Value => return self.fail("TODO implement parseParameterGeneric for type Value", .{}),
+ []u8 => return self.parseStringLiteral(),
+ BigInt => return self.parseIntegerLiteral(),
+ else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)),
+ }
+ return self.fail("TODO parse parameter {}", .{@typeName(T)});
+ }
+
+ fn parseParameterInst(self: *Parser, body_ctx: ?*Body) !*Inst {
+ const local_ref = switch (self.source[self.i]) {
+ '@' => false,
+ '%' => true,
+ else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}),
+ };
+ const map = if (local_ref)
+ if (body_ctx) |bc|
+ &bc.name_map
+ else
+ return self.fail("referencing a % instruction in global scope", .{})
+ else
+ self.global_name_map;
+
+ self.i += 1;
+ const name_start = self.i;
+ while (true) : (self.i += 1) switch (self.source[self.i]) {
+ 0, ' ', '\n', ',', ')', ']' => break,
+ else => continue,
+ };
+ const ident = self.source[name_start..self.i];
+ const kv = map.get(ident) orelse {
+ const bad_name = self.source[name_start - 1 .. self.i];
+ self.i = name_start - 1;
+ return self.fail("unrecognized identifier: {}", .{bad_name});
+ };
+ if (local_ref) {
+ return body_ctx.?.instructions.items[kv.value];
+ } else {
+ return self.decls.items[kv.value];
+ }
+ }
+};
src-self-hosted/ir.zig
@@ -4,41 +4,26 @@ const Allocator = std.mem.Allocator;
const Value = @import("value.zig").Value;
const Type = @import("type.zig").Type;
const assert = std.debug.assert;
+const text = @import("ir/text.zig");
+/// These are in-memory, analyzed instructions. See `text.Inst` for the representation
+/// of instructions that correspond to the ZIR text format.
pub const Inst = struct {
- tag: Tag,
+ pub fn ty(base: *Inst) ?Type {
+ switch (base.tag) {
+ .constant => return base.cast(Constant).?.ty,
+ .@"asm" => return base.cast(Assembly).?.ty,
+ .@"fn" => return base.cast(Fn).?.ty,
- /// These names are used for the IR text format.
- pub const Tag = enum {
- constant,
- ptrtoint,
- fieldptr,
- deref,
- @"asm",
- @"unreachable",
- @"fn",
- @"export",
- };
-
- pub fn TagToType(tag: Tag) type {
- return switch (tag) {
- .constant => Constant,
- .ptrtoint => PtrToInt,
- .fieldptr => FieldPtr,
- .deref => Deref,
- .@"asm" => Assembly,
- .@"unreachable" => Unreachable,
- .@"fn" => Fn,
- .@"export" => Export,
- };
- }
+ .ptrtoint => return Type.initTag(.@"usize"),
+ .@"unreachable" => return Type.initTag(.@"noreturn"),
+ .@"export" => return Type.initTag(.@"void"),
+ .fntype, .primitive => return Type.initTag(.@"type"),
- pub fn cast(base: *Inst, comptime T: type) ?*T {
- const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag;
- if (base.tag != expected_tag)
- return null;
-
- return @fieldParentPtr(T, "base", base);
+ .fieldptr,
+ .deref,
+ => return null,
+ }
}
/// This struct owns the `Value` memory. When the struct is deallocated,
@@ -53,644 +38,70 @@ pub const Inst = struct {
},
kw_args: struct {},
};
-
- pub const PtrToInt = struct {
- base: Inst = Inst{ .tag = .ptrtoint },
-
- positionals: struct {
- ptr: *Inst,
- },
- kw_args: struct {},
- };
-
- pub const FieldPtr = struct {
- base: Inst = Inst{ .tag = .fieldptr },
-
- positionals: struct {
- object_ptr: *Inst,
- field_name: *Inst,
- },
- kw_args: struct {},
- };
-
- pub const Deref = struct {
- base: Inst = Inst{ .tag = .deref },
-
- positionals: struct {
- ptr: *Inst,
- },
- kw_args: struct {},
- };
-
- pub const Assembly = struct {
- base: Inst = Inst{ .tag = .@"asm" },
-
- positionals: struct {
- asm_source: *Inst,
- },
- kw_args: struct {
- @"volatile": bool = false,
- output: ?*Inst = null,
- inputs: []*Inst = &[0]*Inst{},
- clobbers: []*Inst = &[0]*Inst{},
- args: []*Inst = &[0]*Inst{},
- },
- };
-
- pub const Unreachable = struct {
- base: Inst = Inst{ .tag = .@"unreachable" },
-
- positionals: struct {},
- kw_args: struct {},
- };
-
- pub const Fn = struct {
- base: Inst = Inst{ .tag = .@"fn" },
-
- positionals: struct {
- body: Body,
- },
- kw_args: struct {
- cc: std.builtin.CallingConvention = .Unspecified,
- },
-
- pub const Body = struct {
- instructions: []*Inst,
- };
- };
-
- pub const Export = struct {
- base: Inst = Inst{ .tag = .@"export" },
-
- positionals: struct {
- symbol_name: *Inst,
- value: *Inst,
- },
- kw_args: struct {},
- };
-};
-
-pub const ErrorMsg = struct {
- byte_offset: usize,
- msg: []const u8,
-};
-
-pub const Tree = struct {
- decls: []*Inst,
- errors: []ErrorMsg,
-
- pub fn deinit(self: *Tree) void {
- // TODO resource deallocation
- self.* = undefined;
- }
-
- /// This is a debugging utility for rendering the tree to stderr.
- pub fn dump(self: Tree) void {
- self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {};
- }
-
- const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body });
-
- pub fn writeToStream(self: Tree, allocator: *Allocator, stream: var) !void {
- // First, build a map of *Inst to @ or % indexes
- var inst_table = InstPtrTable.init(allocator);
- defer inst_table.deinit();
-
- try inst_table.ensureCapacity(self.decls.len);
-
- for (self.decls) |decl, decl_i| {
- try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null });
-
- if (decl.cast(Inst.Fn)) |fn_inst| {
- for (fn_inst.positionals.body.instructions) |inst, inst_i| {
- try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body });
- }
- }
- }
-
- for (self.decls) |decl, i| {
- try stream.print("@{} ", .{i});
- try self.writeInstToStream(stream, decl, &inst_table);
- try stream.writeByte('\n');
- }
- }
-
- fn writeInstToStream(
- self: Tree,
- stream: var,
- decl: *Inst,
- inst_table: *const InstPtrTable,
- ) @TypeOf(stream).Error!void {
- // TODO I tried implementing this with an inline for loop and hit a compiler bug
- switch (decl.tag) {
- .constant => return self.writeInstToStreamGeneric(stream, .constant, decl, inst_table),
- .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table),
- .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table),
- .deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table),
- .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table),
- .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table),
- .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table),
- .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table),
- }
- }
-
- fn writeInstToStreamGeneric(
- self: Tree,
- stream: var,
- comptime inst_tag: Inst.Tag,
- base: *Inst,
- inst_table: *const InstPtrTable,
- ) !void {
- const SpecificInst = Inst.TagToType(inst_tag);
- const inst = @fieldParentPtr(SpecificInst, "base", base);
- if (@hasField(SpecificInst, "ty")) {
- try stream.print(": {} ", .{inst.ty});
- }
- if (inst_tag == .constant) {
- if (inst.positionals.value.cast(Value.Payload.Bytes)) |bytes_value| {
- try stream.writeAll("= ");
- return std.zig.renderStringLiteral(bytes_value.data, stream);
- } else if (inst.positionals.value.cast(Value.Payload.Int_u64)) |v| {
- return stream.print("= {}", .{v.int});
- } else if (inst.positionals.value.cast(Value.Payload.Int_i64)) |v| {
- return stream.print("= {}", .{v.int});
- }
- }
- const Positionals = @TypeOf(inst.positionals);
- try stream.writeAll("= " ++ @tagName(inst_tag) ++ "(");
- const pos_fields = @typeInfo(Positionals).Struct.fields;
- inline for (pos_fields) |arg_field, i| {
- if (i != 0) {
- try stream.writeAll(", ");
- }
- try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table);
- }
-
- comptime var need_comma = pos_fields.len != 0;
- const KW_Args = @TypeOf(inst.kw_args);
- inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| {
- if (need_comma) {
- try stream.writeAll(",\n ");
- }
- if (@typeInfo(arg_field.field_type) == .Optional) {
- if (@field(inst.kw_args, arg_field.name)) |non_optional| {
- try stream.print("{}=", .{arg_field.name});
- try self.writeParamToStream(stream, non_optional, inst_table);
- need_comma = true;
- }
- } else {
- try stream.print("{}=", .{arg_field.name});
- try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table);
- need_comma = true;
- }
- }
-
- try stream.writeByte(')');
- }
-
- fn writeParamToStream(self: Tree, stream: var, param: var, inst_table: *const InstPtrTable) !void {
- if (@typeInfo(@TypeOf(param)) == .Enum) {
- return stream.writeAll(@tagName(param));
- }
- switch (@TypeOf(param)) {
- Value => {
- try stream.print("{}", .{param});
- },
- *Inst => return self.writeInstParamToStream(stream, param, inst_table),
- []*Inst => {
- try stream.writeByte('[');
- for (param) |inst, i| {
- if (i != 0) {
- try stream.writeAll(", ");
- }
- try self.writeInstParamToStream(stream, inst, inst_table);
- }
- try stream.writeByte(']');
- },
- Inst.Fn.Body => {
- try stream.writeAll("{\n");
- for (param.instructions) |inst, i| {
- try stream.print(" %{} ", .{i});
- try self.writeInstToStream(stream, inst, inst_table);
- try stream.writeByte('\n');
- }
- try stream.writeByte('}');
- },
- bool => return stream.writeByte("01"[@boolToInt(param)]),
- else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)),
- }
- }
-
- fn writeInstParamToStream(self: Tree, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void {
- const info = inst_table.getValue(inst).?;
- const prefix = if (info.fn_body == null) "@" else "%";
- try stream.print("{}{}", .{ prefix, info.index });
- }
};
-const ParseContext = struct {
+const Analyze = struct {
allocator: *Allocator,
- i: usize,
- source: []const u8,
+ old_tree: *const Module,
errors: std.ArrayList(ErrorMsg),
decls: std.ArrayList(*Inst),
- global_name_map: *std.StringHashMap(usize),
-};
-pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!Tree {
- var global_name_map = std.StringHashMap(usize).init(allocator);
- defer global_name_map.deinit();
+ const NewInst = struct {
+ ptr: *Inst,
+ };
+};
- var ctx: ParseContext = .{
+pub fn analyze(allocator: *Allocator, old_tree: Module) !Module {
+ var ctx = Analyze{
.allocator = allocator,
- .i = 0,
- .source = source,
+ .old_tree = &old_tree,
.decls = std.ArrayList(*Inst).init(allocator),
.errors = std.ArrayList(ErrorMsg).init(allocator),
- .global_name_map = &global_name_map,
+ .inst_table = std.HashMap(*Inst, Analyze.InstData).init(allocator),
};
- parseRoot(&ctx) catch |err| switch (err) {
- error.ParseFailure => {
+ defer ctx.decls.deinit();
+ defer ctx.errors.deinit();
+ defer inst_table.deinit();
+
+ analyzeRoot(&ctx) catch |err| switch (err) {
+ error.AnalyzeFailure => {
assert(ctx.errors.items.len != 0);
},
else => |e| return e,
};
- return Tree{
+ return Module{
.decls = ctx.decls.toOwnedSlice(),
.errors = ctx.errors.toOwnedSlice(),
};
}
-pub fn parseRoot(ctx: *ParseContext) !void {
- // The IR format is designed so that it can be tokenized and parsed at the same time.
- while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
- ';' => _ = try skipToAndOver(ctx, '\n'),
- '@' => {
- ctx.i += 1;
- const ident = try skipToAndOver(ctx, ' ');
- const opt_type = try parseOptionalType(ctx);
- const inst = try parseInstruction(ctx, opt_type, null);
- const ident_index = ctx.decls.items.len;
- if (try ctx.global_name_map.put(ident, ident_index)) |_| {
- return parseError(ctx, "redefinition of identifier '{}'", .{ident});
- }
- try ctx.decls.append(inst);
- continue;
- },
- ' ', '\n' => continue,
- else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
- };
-}
-
-fn eatByte(ctx: *ParseContext, byte: u8) bool {
- if (ctx.i >= ctx.source.len) return false;
- if (ctx.source[ctx.i] != byte) return false;
- ctx.i += 1;
- return true;
-}
-
-fn skipSpace(ctx: *ParseContext) void {
- while (ctx.i < ctx.source.len and (ctx.source[ctx.i] == ' ' or ctx.source[ctx.i] == '\n')) {
- ctx.i += 1;
- }
-}
-
-fn requireEatBytes(ctx: *ParseContext, bytes: []const u8) !void {
- if (ctx.i + bytes.len > ctx.source.len)
- return parseError(ctx, "unexpected EOF", .{});
- if (!mem.eql(u8, ctx.source[ctx.i..][0..bytes.len], bytes))
- return parseError(ctx, "expected '{}'", .{bytes});
- ctx.i += bytes.len;
-}
-
-fn skipToAndOver(ctx: *ParseContext, byte: u8) ![]const u8 {
- const start_i = ctx.i;
- while (ctx.i < ctx.source.len) : (ctx.i += 1) {
- if (ctx.source[ctx.i] == byte) {
- const result = ctx.source[start_i..ctx.i];
- ctx.i += 1;
- return result;
+fn analyzeRoot(ctx: *Analyze) !void {
+ for (old_tree.decls) |decl| {
+ if (decl.cast(Inst.Export)) |export_inst| {
+ try analyzeExport(ctx, export_inst);
}
}
- return parseError(ctx, "unexpected EOF", .{});
}
-fn parseError(ctx: *ParseContext, comptime format: []const u8, args: var) error{ ParseFailure, OutOfMemory } {
- const msg = try std.fmt.allocPrint(ctx.allocator, format, args);
- (try ctx.errors.addOne()).* = .{
- .byte_offset = ctx.i,
- .msg = msg,
+fn analyzeExport(ctx: *Analyze, export_inst: *Inst.Export) !void {
+ const old_decl = export_inst.positionals.value;
+ const new_info = ctx.inst_table.get(old_exp_target) orelse blk: {
+ const new_decl = try analyzeDecl(ctx, old_decl);
+ const new_info: Analyze.NewInst = .{ .ptr = new_decl };
+ try ctx.inst_table.put(old_decl, new_info);
+ break :blk new_info;
};
- return error.ParseFailure;
-}
-
-/// Regardless of whether a `Type` is returned, it skips past the '='.
-fn parseOptionalType(ctx: *ParseContext) !?Type {
- skipSpace(ctx);
- if (eatByte(ctx, ':')) {
- const type_text_untrimmed = try skipToAndOver(ctx, '=');
- skipSpace(ctx);
- const type_text = mem.trim(u8, type_text_untrimmed, " \n");
- if (mem.eql(u8, type_text, "usize")) {
- return Type.initTag(.int_usize);
- } else if (mem.eql(u8, type_text, "noreturn")) {
- return Type.initTag(.no_return);
- } else {
- return parseError(ctx, "TODO parse type '{}'", .{type_text});
- }
- } else {
- skipSpace(ctx);
- try requireEatBytes(ctx, "=");
- skipSpace(ctx);
- return null;
- }
-}
-
-fn parseInstruction(
- ctx: *ParseContext,
- opt_type: ?Type,
- body_ctx: ?*BodyContext,
-) error{ OutOfMemory, ParseFailure }!*Inst {
- switch (ctx.source[ctx.i]) {
- '"' => return parseStringLiteralConst(ctx, opt_type),
- '0'...'9' => return parseIntegerLiteralConst(ctx, opt_type),
- else => {},
- }
- const fn_name = try skipToAndOver(ctx, '(');
- inline for (@typeInfo(Inst.Tag).Enum.fields) |field| {
- if (mem.eql(u8, field.name, fn_name)) {
- const tag = @field(Inst.Tag, field.name);
- return parseInstructionGeneric(ctx, field.name, Inst.TagToType(tag), opt_type, body_ctx);
- }
- }
- return parseError(ctx, "unknown instruction '{}'", .{fn_name});
-}
-
-fn parseInstructionGeneric(
- ctx: *ParseContext,
- comptime fn_name: []const u8,
- comptime InstType: type,
- opt_type: ?Type,
- body_ctx: ?*BodyContext,
-) !*Inst {
- const inst_specific = try ctx.allocator.create(InstType);
- inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?;
- if (@hasField(InstType, "ty")) {
- inst_specific.ty = opt_type orelse {
- return parseError(ctx, "instruction '" ++ fn_name ++ "' requires type", .{});
- };
- }
-
- const Positionals = @TypeOf(inst_specific.positionals);
- inline for (@typeInfo(Positionals).Struct.fields) |arg_field| {
- if (ctx.source[ctx.i] == ',') {
- ctx.i += 1;
- skipSpace(ctx);
- } else if (ctx.source[ctx.i] == ')') {
- return parseError(ctx, "expected positional parameter '{}'", .{arg_field.name});
- }
- @field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric(
- ctx,
- arg_field.field_type,
- body_ctx,
- );
- skipSpace(ctx);
- }
-
- const KW_Args = @TypeOf(inst_specific.kw_args);
- inst_specific.kw_args = .{}; // assign defaults
- skipSpace(ctx);
- while (eatByte(ctx, ',')) {
- skipSpace(ctx);
- const name = try skipToAndOver(ctx, '=');
- inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| {
- const field_name = arg_field.name;
- if (mem.eql(u8, name, field_name)) {
- const NonOptional = switch (@typeInfo(arg_field.field_type)) {
- .Optional => |info| info.child,
- else => arg_field.field_type,
- };
- @field(inst_specific.kw_args, field_name) = try parseParameterGeneric(ctx, NonOptional, body_ctx);
- break;
- }
- } else {
- return parseError(ctx, "unrecognized keyword parameter: '{}'", .{name});
- }
- skipSpace(ctx);
- }
- try requireEatBytes(ctx, ")");
-
- return &inst_specific.base;
-}
-
-fn parseParameterGeneric(ctx: *ParseContext, comptime T: type, body_ctx: ?*BodyContext) !T {
- if (@typeInfo(T) == .Enum) {
- const start = ctx.i;
- while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
- ' ', '\n', ',', ')' => {
- const enum_name = ctx.source[start..ctx.i];
- return std.meta.stringToEnum(T, enum_name) orelse {
- return parseError(ctx, "tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) });
- };
- },
- else => continue,
- };
- return parseError(ctx, "unexpected EOF in enum parameter", .{});
- }
- switch (T) {
- Inst.Fn.Body => return parseBody(ctx),
- bool => {
- const bool_value = switch (ctx.source[ctx.i]) {
- '0' => false,
- '1' => true,
- else => |byte| return parseError(ctx, "expected '0' or '1' for boolean value, found {c}", .{byte}),
- };
- ctx.i += 1;
- return bool_value;
- },
- []*Inst => {
- try requireEatBytes(ctx, "[");
- skipSpace(ctx);
- if (eatByte(ctx, ']')) return &[0]*Inst{};
-
- var instructions = std.ArrayList(*Inst).init(ctx.allocator);
- defer instructions.deinit();
- while (true) {
- skipSpace(ctx);
- try instructions.append(try parseParameterInst(ctx, body_ctx));
- skipSpace(ctx);
- if (!eatByte(ctx, ',')) break;
- }
- try requireEatBytes(ctx, "]");
- return instructions.toOwnedSlice();
- },
- *Inst => return parseParameterInst(ctx, body_ctx),
- Value => return parseError(ctx, "TODO implement parseParameterGeneric for type Value", .{}),
- else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)),
- }
- return parseError(ctx, "TODO parse parameter {}", .{@typeName(T)});
-}
-
-fn parseParameterInst(ctx: *ParseContext, body_ctx: ?*BodyContext) !*Inst {
- const local_ref = switch (ctx.source[ctx.i]) {
- '@' => false,
- '%' => true,
- '"' => {
- const str_lit_inst = try parseStringLiteralConst(ctx, null);
- try ctx.decls.append(str_lit_inst);
- return str_lit_inst;
- },
- else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
- };
- const map = if (local_ref)
- if (body_ctx) |bc|
- &bc.name_map
- else
- return parseError(ctx, "referencing a % instruction in global scope", .{})
- else
- ctx.global_name_map;
-
- ctx.i += 1;
- const name_start = ctx.i;
- while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
- ' ', '\n', ',', ')', ']' => break,
- else => continue,
- };
- const ident = ctx.source[name_start..ctx.i];
- const kv = map.get(ident) orelse {
- const bad_name = ctx.source[name_start - 1 .. ctx.i];
- ctx.i = name_start - 1;
- return parseError(ctx, "unrecognized identifier: {}", .{bad_name});
- };
- if (local_ref) {
- return body_ctx.?.instructions.items[kv.value];
- } else {
- return ctx.decls.items[kv.value];
- }
-}
-
-const BodyContext = struct {
- instructions: std.ArrayList(*Inst),
- name_map: std.StringHashMap(usize),
-};
-
-fn parseBody(ctx: *ParseContext) !Inst.Fn.Body {
- var body_context = BodyContext{
- .instructions = std.ArrayList(*Inst).init(ctx.allocator),
- .name_map = std.StringHashMap(usize).init(ctx.allocator),
- };
- defer body_context.instructions.deinit();
- defer body_context.name_map.deinit();
-
- try requireEatBytes(ctx, "{");
- skipSpace(ctx);
-
- while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
- ';' => _ = try skipToAndOver(ctx, '\n'),
- '%' => {
- ctx.i += 1;
- const ident = try skipToAndOver(ctx, ' ');
- const opt_type = try parseOptionalType(ctx);
- const inst = try parseInstruction(ctx, opt_type, &body_context);
- const ident_index = body_context.instructions.items.len;
- if (try body_context.name_map.put(ident, ident_index)) |_| {
- return parseError(ctx, "redefinition of identifier '{}'", .{ident});
- }
- try body_context.instructions.append(inst);
- continue;
- },
- ' ', '\n' => continue,
- '}' => {
- ctx.i += 1;
- break;
- },
- else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
- };
-
- return Inst.Fn.Body{
- .instructions = body_context.instructions.toOwnedSlice(),
- };
-}
-
-fn parseStringLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst {
- const start = ctx.i;
- ctx.i += 1; // skip over '"'
-
- while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
- '"' => {
- ctx.i += 1;
- const span = ctx.source[start..ctx.i];
- var bad_index: usize = undefined;
- const parsed = std.zig.parseStringLiteral(ctx.allocator, span, &bad_index) catch |err| switch (err) {
- error.InvalidCharacter => {
- ctx.i = start + bad_index;
- const bad_byte = ctx.source[ctx.i];
- return parseError(ctx, "invalid string literal character: '{c}'\n", .{bad_byte});
- },
- else => |e| return e,
- };
- const const_inst = try ctx.allocator.create(Inst.Constant);
- errdefer ctx.allocator.destroy(const_inst);
-
- const bytes_payload = try ctx.allocator.create(Value.Payload.Bytes);
- errdefer ctx.allocator.destroy(bytes_payload);
- bytes_payload.* = .{ .data = parsed };
-
- const ty = opt_type orelse blk: {
- const array_payload = try ctx.allocator.create(Type.Payload.Array_u8_Sentinel0);
- errdefer ctx.allocator.destroy(array_payload);
- array_payload.* = .{ .len = parsed.len };
-
- const ty_payload = try ctx.allocator.create(Type.Payload.SingleConstPointer);
- errdefer ctx.allocator.destroy(ty_payload);
- ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) };
-
- break :blk Type.initPayload(&ty_payload.base);
- };
-
- const_inst.* = .{
- .ty = ty,
- .positionals = .{ .value = Value.initPayload(&bytes_payload.base) },
- .kw_args = .{},
- };
- return &const_inst.base;
- },
- '\\' => {
- ctx.i += 1;
- if (ctx.i >= ctx.source.len) break;
- continue;
- },
- else => continue,
- };
- return parseError(ctx, "unexpected EOF in string literal", .{});
-}
-
-fn parseIntegerLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst {
- const start = ctx.i;
- while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
- '0'...'9' => continue,
- else => break,
- };
- const number_text = ctx.source[start..ctx.i];
- const number = std.fmt.parseInt(u64, number_text, 10) catch |err| switch (err) {
- error.Overflow => return parseError(ctx, "TODO handle big integers", .{}),
- error.InvalidCharacter => return parseError(ctx, "invalid integer literal", .{}),
- };
-
- const int_payload = try ctx.allocator.create(Value.Payload.Int_u64);
- errdefer ctx.allocator.destroy(int_payload);
- int_payload.* = .{ .int = number };
-
- const const_inst = try ctx.allocator.create(Inst.Constant);
- errdefer ctx.allocator.destroy(const_inst);
-
- const_inst.* = .{
- .ty = opt_type orelse Type.initTag(.int_comptime),
- .positionals = .{ .value = Value.initPayload(&int_payload.base) },
- .kw_args = .{},
- };
- return &const_inst.base;
+ //const exp_type = new_info.ptr.ty();
+ //switch (exp_type.zigTypeTag()) {
+ // .Fn => {
+ // if () |kv| {
+ // kv.value
+ // }
+ // return analyzeExportFn(ctx, exp_target.cast(Inst.,
+ // },
+ // else => return ctx.fail("unable to export type '{}'", .{exp_type}),
+ //}
}
pub fn main() anyerror!void {
@@ -703,9 +114,9 @@ pub fn main() anyerror!void {
const src_path = args[1];
const debug_error_trace = true;
- const source = try std.fs.cwd().readFileAlloc(allocator, src_path, std.math.maxInt(u32));
+ const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0);
- var tree = try parse(allocator, source);
+ var tree = try text.parse(allocator, source);
defer tree.deinit();
if (tree.errors.len != 0) {
@@ -719,8 +130,19 @@ pub fn main() anyerror!void {
tree.dump();
- //const new_tree = try semanticallyAnalyze(tree);
+ //const new_tree = try analyze(allocator, tree);
//defer new_tree.deinit();
+
+ //if (new_tree.errors.len != 0) {
+ // for (new_tree.errors) |err_msg| {
+ // const loc = findLineColumn(source, err_msg.byte_offset);
+ // std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
+ // }
+ // if (debug_error_trace) return error.ParseFailure;
+ // std.process.exit(1);
+ //}
+
+ //new_tree.dump();
}
fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {
@@ -741,4 +163,4 @@ fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize,
}
// Performance optimization ideas:
-// * make the source code sentinel-terminated, so that all the checks against the length can be skipped
+// * when analyzing use a field in the Inst instead of HashMap to track corresponding instructions
src-self-hosted/type.zig
@@ -18,7 +18,7 @@ pub const Type = extern union {
pub fn zigTypeTag(self: Type) std.builtin.TypeId {
switch (self.tag()) {
- .int_u8, .int_usize => return .Int,
+ .@"u8", .@"usize" => return .Int,
.array_u8, .array_u8_sentinel_0 => return .Array,
.single_const_pointer => return .Pointer,
}
@@ -52,10 +52,35 @@ pub const Type = extern union {
var ty = self;
while (true) {
switch (ty.tag()) {
- .no_return => return out_stream.writeAll("noreturn"),
- .int_comptime => return out_stream.writeAll("comptime_int"),
- .int_u8 => return out_stream.writeAll("u8"),
- .int_usize => return out_stream.writeAll("usize"),
+ @"u8",
+ @"i8",
+ @"isize",
+ @"usize",
+ @"noreturn",
+ @"void",
+ @"c_short",
+ @"c_ushort",
+ @"c_int",
+ @"c_uint",
+ @"c_long",
+ @"c_ulong",
+ @"c_longlong",
+ @"c_ulonglong",
+ @"c_longdouble",
+ @"c_void",
+ @"f16",
+ @"f32",
+ @"f64",
+ @"f128",
+ @"bool",
+ @"void",
+ @"type",
+ @"anyerror",
+ @"comptime_int",
+ @"comptime_float",
+ @"noreturn",
+ => |t| return out_stream.writeAll(@tagName(t)),
+
.array_u8_sentinel_0 => {
const payload = @fieldParentPtr(Payload.Array_u8_Sentinel0, "base", ty.ptr_otherwise);
return out_stream.print("[{}:0]u8", .{payload.len});
@@ -85,17 +110,38 @@ pub const Type = extern union {
/// See `zigTypeTag` for the function that corresponds to `std.builtin.TypeId`.
pub const Tag = enum {
// The first section of this enum are tags that require no payload.
- no_return,
- int_comptime,
- int_u8,
- int_usize, // See last_no_payload_tag below.
+ @"u8",
+ @"i8",
+ @"isize",
+ @"usize",
+ @"c_short",
+ @"c_ushort",
+ @"c_int",
+ @"c_uint",
+ @"c_long",
+ @"c_ulong",
+ @"c_longlong",
+ @"c_ulonglong",
+ @"c_longdouble",
+ @"c_void",
+ @"f16",
+ @"f32",
+ @"f64",
+ @"f128",
+ @"bool",
+ @"void",
+ @"type",
+ @"anyerror",
+ @"comptime_int",
+ @"comptime_float",
+ @"noreturn", // See last_no_payload_tag below.
// After this, the tag requires a payload.
array_u8_sentinel_0,
array,
single_const_pointer,
- pub const last_no_payload_tag = Tag.int_usize;
+ pub const last_no_payload_tag = Tag.@"noreturn";
pub const no_payload_count = @enumToInt(last_no_payload_tag) + 1;
};
test/stage2/ir.zig
@@ -1,33 +1,50 @@
test "hello world IR" {
exeCmp(
- \\@0 = "Hello, world!\n"
+ \\@0 = str("Hello, world!\n")
+ \\@1 = primitive(void)
+ \\@2 = primitive(usize)
+ \\@3 = fntype([], @1, cc=Naked)
+ \\@4 = int(0)
+ \\@5 = int(1)
+ \\@6 = int(231)
+ \\@7 = str("len")
\\
- \\@1 = fn({
- \\ %0 : usize = 1 ;SYS_write
- \\ %1 : usize = 1 ;STDOUT_FILENO
+ \\@8 = fn(@3, {
+ \\ %0 = as(@2, @5) ; SYS_write
+ \\ %1 = as(@2, @5) ; STDOUT_FILENO
\\ %2 = ptrtoint(@0) ; msg ptr
- \\ %3 = fieldptr(@0, "len") ; msg len ptr
+ \\ %3 = fieldptr(@0, @7) ; msg len ptr
\\ %4 = deref(%3) ; msg len
- \\ %5 = asm("syscall",
+ \\ %sysoutreg = str("={rax}")
+ \\ %rax = str("{rax}")
+ \\ %rdi = str("{rdi}")
+ \\ %rsi = str("{rsi}")
+ \\ %rdx = str("{rdx}")
+ \\ %rcx = str("rcx")
+ \\ %r11 = str("r11")
+ \\ %memory = str("memory")
+ \\ %syscall = str("syscall")
+ \\ %5 = asm(%syscall, @2,
\\ volatile=1,
- \\ output="={rax}",
- \\ inputs=["{rax}", "{rdi}", "{rsi}", "{rdx}"],
- \\ clobbers=["rcx", "r11", "memory"],
+ \\ output=%sysoutreg,
+ \\ inputs=[%rax, %rdi, %rsi, %rdx],
+ \\ clobbers=[%rcx, %r11, %memory],
\\ args=[%0, %1, %2, %4])
\\
- \\ %6 : usize = 231 ;SYS_exit_group
- \\ %7 : usize = 0 ;exit code
- \\ %8 = asm("syscall",
+ \\ %6 = as(@2, @6) ;SYS_exit_group
+ \\ %7 = as(@2, @4) ;exit code
+ \\ %8 = asm(%syscall, @2,
\\ volatile=1,
- \\ output="={rax}",
- \\ inputs=["{rax}", "{rdi}"],
- \\ clobbers=["rcx", "r11", "memory"],
+ \\ output=%sysoutreg,
+ \\ inputs=[%rax, %rdi],
+ \\ clobbers=[%rcx, %r11, %memory],
\\ args=[%6, %7])
\\
\\ %9 = unreachable()
- \\}, cc=naked)
+ \\})
\\
- \\@2 = export("_start", @1)
+ \\@9 = str("_start")
+ \\@10 = export(@9, @8)
,
\\Hello, world!
\\