Commit b2deaf8027

Andrew Kelley <andrew@ziglang.org>
2021-03-26 07:00:38
stage2: improve source locations of Decl access
* zir.Code: introduce a decls array. This is so that `decl_val` and `decl_ref` instructions can refer to a Decl with a u32 and therefore they can also store a source location. This is needed for proper compile error reporting. * astgen uses a hash map to avoid redundantly adding a Decl to the decls array. * fixed reporting "instruction illegal outside function body" instead of the desired message "unable to resolve comptime value". * astgen skips emitting dbg_stmt instructions in comptime scopes. * astgen has some logic to avoid adding unnecessary type coercion instructions for common values.
1 parent 4bfcd10
src/astgen.zig
@@ -952,7 +952,9 @@ fn blockExprStmts(
 
     var scope = parent_scope;
     for (statements) |statement| {
-        _ = try gz.addNode(.dbg_stmt_node, statement);
+        if (!gz.force_comptime) {
+            _ = try gz.addNode(.dbg_stmt_node, statement);
+        }
         switch (node_tags[statement]) {
             .global_var_decl => scope = try varDecl(mod, scope, statement, &block_arena.allocator, tree.globalVarDecl(statement)),
             .local_var_decl => scope = try varDecl(mod, scope, statement, &block_arena.allocator, tree.localVarDecl(statement)),
@@ -2846,14 +2848,17 @@ fn identifier(
         };
     }
 
-    if (mod.lookupDeclName(scope, ident_name)) |decl| {
-        return if (rl == .ref)
-            gz.addDecl(.decl_ref, decl)
-        else
-            rvalue(mod, scope, rl, try gz.addDecl(.decl_val, decl), ident);
+    const gop = try gz.zir_code.decl_map.getOrPut(mod.gpa, ident_name);
+    if (!gop.found_existing) {
+        const decl = mod.lookupDeclName(scope, ident_name) orelse
+            return mod.failNode(scope, ident, "use of undeclared identifier '{s}'", .{ident_name});
+        try gz.zir_code.decls.append(mod.gpa, decl);
+    }
+    const decl_index = @intCast(u32, gop.index);
+    switch (rl) {
+        .ref => return gz.addDecl(.decl_ref, decl_index, ident),
+        else => return rvalue(mod, scope, rl, try gz.addDecl(.decl_val, decl_index, ident), ident),
     }
-
-    return mod.failNode(scope, ident, "use of undeclared identifier '{s}'", .{ident_name});
 }
 
 fn stringLiteral(
@@ -3743,10 +3748,70 @@ fn rvalue(
             const src_token = tree.firstToken(src_node);
             return gz.addUnTok(.ref, result, src_token);
         },
-        .ty => |ty_inst| return gz.addPlNode(.as_node, src_node, zir.Inst.As{
-            .dest_type = ty_inst,
-            .operand = result,
-        }),
+        .ty => |ty_inst| {
+            // Quickly eliminate some common, unnecessary type coercion.
+            const as_ty = @as(u64, @enumToInt(zir.Inst.Ref.type_type)) << 32;
+            const as_comptime_int = @as(u64, @enumToInt(zir.Inst.Ref.comptime_int_type)) << 32;
+            const as_bool = @as(u64, @enumToInt(zir.Inst.Ref.bool_type)) << 32;
+            const as_usize = @as(u64, @enumToInt(zir.Inst.Ref.usize_type)) << 32;
+            const as_void = @as(u64, @enumToInt(zir.Inst.Ref.void_type)) << 32;
+            switch ((@as(u64, @enumToInt(ty_inst)) << 32) | @as(u64, @enumToInt(result))) {
+                as_ty | @enumToInt(zir.Inst.Ref.u8_type),
+                as_ty | @enumToInt(zir.Inst.Ref.i8_type),
+                as_ty | @enumToInt(zir.Inst.Ref.u16_type),
+                as_ty | @enumToInt(zir.Inst.Ref.i16_type),
+                as_ty | @enumToInt(zir.Inst.Ref.u32_type),
+                as_ty | @enumToInt(zir.Inst.Ref.i32_type),
+                as_ty | @enumToInt(zir.Inst.Ref.u64_type),
+                as_ty | @enumToInt(zir.Inst.Ref.i64_type),
+                as_ty | @enumToInt(zir.Inst.Ref.usize_type),
+                as_ty | @enumToInt(zir.Inst.Ref.isize_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_short_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_ushort_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_int_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_uint_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_long_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_ulong_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_longlong_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_ulonglong_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_longdouble_type),
+                as_ty | @enumToInt(zir.Inst.Ref.f16_type),
+                as_ty | @enumToInt(zir.Inst.Ref.f32_type),
+                as_ty | @enumToInt(zir.Inst.Ref.f64_type),
+                as_ty | @enumToInt(zir.Inst.Ref.f128_type),
+                as_ty | @enumToInt(zir.Inst.Ref.c_void_type),
+                as_ty | @enumToInt(zir.Inst.Ref.bool_type),
+                as_ty | @enumToInt(zir.Inst.Ref.void_type),
+                as_ty | @enumToInt(zir.Inst.Ref.type_type),
+                as_ty | @enumToInt(zir.Inst.Ref.anyerror_type),
+                as_ty | @enumToInt(zir.Inst.Ref.comptime_int_type),
+                as_ty | @enumToInt(zir.Inst.Ref.comptime_float_type),
+                as_ty | @enumToInt(zir.Inst.Ref.noreturn_type),
+                as_ty | @enumToInt(zir.Inst.Ref.null_type),
+                as_ty | @enumToInt(zir.Inst.Ref.undefined_type),
+                as_ty | @enumToInt(zir.Inst.Ref.fn_noreturn_no_args_type),
+                as_ty | @enumToInt(zir.Inst.Ref.fn_void_no_args_type),
+                as_ty | @enumToInt(zir.Inst.Ref.fn_naked_noreturn_no_args_type),
+                as_ty | @enumToInt(zir.Inst.Ref.fn_ccc_void_no_args_type),
+                as_ty | @enumToInt(zir.Inst.Ref.single_const_pointer_to_comptime_int_type),
+                as_ty | @enumToInt(zir.Inst.Ref.const_slice_u8_type),
+                as_ty | @enumToInt(zir.Inst.Ref.enum_literal_type),
+                as_comptime_int | @enumToInt(zir.Inst.Ref.zero),
+                as_comptime_int | @enumToInt(zir.Inst.Ref.one),
+                as_bool | @enumToInt(zir.Inst.Ref.bool_true),
+                as_bool | @enumToInt(zir.Inst.Ref.bool_false),
+                as_usize | @enumToInt(zir.Inst.Ref.zero_usize),
+                as_usize | @enumToInt(zir.Inst.Ref.one_usize),
+                as_void | @enumToInt(zir.Inst.Ref.void_value),
+                => return result, // type of result is already correct
+
+                // Need an explicit type coercion instruction.
+                else => return gz.addPlNode(.as_node, src_node, zir.Inst.As{
+                    .dest_type = ty_inst,
+                    .operand = result,
+                }),
+            }
+        },
         .ptr => |ptr_inst| {
             _ = try gz.addPlNode(.store_node, src_node, zir.Inst.Bin{
                 .lhs = ptr_inst,
src/Module.zig
@@ -103,7 +103,7 @@ stage1_flags: packed struct {
 
 emit_h: ?Compilation.EmitLoc,
 
-compile_log_text: std.ArrayListUnmanaged(u8) = .{},
+compile_log_text: ArrayListUnmanaged(u8) = .{},
 
 pub const Export = struct {
     options: std.builtin.ExportOptions,
@@ -335,7 +335,7 @@ pub const Decl = struct {
 
 /// This state is attached to every Decl when Module emit_h is non-null.
 pub const EmitH = struct {
-    fwd_decl: std.ArrayListUnmanaged(u8) = .{},
+    fwd_decl: ArrayListUnmanaged(u8) = .{},
 };
 
 /// Some Fn struct memory is owned by the Decl's TypedValue.Managed arena allocator.
@@ -916,7 +916,7 @@ pub const Scope = struct {
         zir_code: *WipZirCode,
         /// Keeps track of the list of instructions in this scope only. Indexes
         /// to instructions in `zir_code`.
-        instructions: std.ArrayListUnmanaged(zir.Inst.Index) = .{},
+        instructions: ArrayListUnmanaged(zir.Inst.Index) = .{},
         label: ?Label = null,
         break_block: zir.Inst.Index = 0,
         continue_block: zir.Inst.Index = 0,
@@ -935,11 +935,11 @@ pub const Scope = struct {
         break_count: usize = 0,
         /// Tracks `break :foo bar` instructions so they can possibly be elided later if
         /// the labeled block ends up not needing a result location pointer.
-        labeled_breaks: std.ArrayListUnmanaged(zir.Inst.Index) = .{},
+        labeled_breaks: ArrayListUnmanaged(zir.Inst.Index) = .{},
         /// Tracks `store_to_block_ptr` instructions that correspond to break instructions
         /// so they can possibly be elided later if the labeled block ends up not needing
         /// a result location pointer.
-        labeled_store_to_block_ptr_list: std.ArrayListUnmanaged(zir.Inst.Index) = .{},
+        labeled_store_to_block_ptr_list: ArrayListUnmanaged(zir.Inst.Index) = .{},
 
         pub const Label = struct {
             token: ast.TokenIndex,
@@ -957,6 +957,7 @@ pub const Scope = struct {
                 .instructions = gz.zir_code.instructions.toOwnedSlice(),
                 .string_bytes = gz.zir_code.string_bytes.toOwnedSlice(gpa),
                 .extra = gz.zir_code.extra.toOwnedSlice(gpa),
+                .decls = gz.zir_code.decls.toOwnedSlice(gpa),
             };
         }
 
@@ -1253,11 +1254,15 @@ pub const Scope = struct {
         pub fn addDecl(
             gz: *GenZir,
             tag: zir.Inst.Tag,
-            decl: *Decl,
+            decl_index: u32,
+            src_node: ast.Node.Index,
         ) !zir.Inst.Ref {
             return gz.add(.{
                 .tag = tag,
-                .data = .{ .decl = decl },
+                .data = .{ .pl_node = .{
+                    .src_node = gz.zir_code.decl.nodeIndexToRelative(src_node),
+                    .payload_index = decl_index,
+                } },
             });
         }
 
@@ -1379,8 +1384,10 @@ pub const Scope = struct {
 /// The `GenZir.finish` function converts this to a `zir.Code`.
 pub const WipZirCode = struct {
     instructions: std.MultiArrayList(zir.Inst) = .{},
-    string_bytes: std.ArrayListUnmanaged(u8) = .{},
-    extra: std.ArrayListUnmanaged(u32) = .{},
+    string_bytes: ArrayListUnmanaged(u8) = .{},
+    extra: ArrayListUnmanaged(u32) = .{},
+    decl_map: std.StringArrayHashMapUnmanaged(void) = .{},
+    decls: ArrayListUnmanaged(*Decl) = .{},
     /// The end of special indexes. `zir.Inst.Ref` subtracts against this number to convert
     /// to `zir.Inst.Index`. The default here is correct if there are 0 parameters.
     ref_start_index: u32 = zir.Inst.Ref.typed_value_map.len,
@@ -1442,6 +1449,8 @@ pub const WipZirCode = struct {
         wzc.instructions.deinit(wzc.gpa);
         wzc.extra.deinit(wzc.gpa);
         wzc.string_bytes.deinit(wzc.gpa);
+        wzc.decl_map.deinit(wzc.gpa);
+        wzc.decls.deinit(wzc.gpa);
     }
 };
 
@@ -4062,7 +4071,7 @@ pub fn identifierTokenString(mod: *Module, scope: *Scope, token: ast.TokenIndex)
     if (!mem.startsWith(u8, ident_name, "@")) {
         return ident_name;
     }
-    var buf: std.ArrayListUnmanaged(u8) = .{};
+    var buf: ArrayListUnmanaged(u8) = .{};
     defer buf.deinit(mod.gpa);
     try parseStrLit(mod, scope, token, &buf, ident_name, 1);
     return buf.toOwnedSlice(mod.gpa);
@@ -4075,7 +4084,7 @@ pub fn appendIdentStr(
     mod: *Module,
     scope: *Scope,
     token: ast.TokenIndex,
-    buf: *std.ArrayListUnmanaged(u8),
+    buf: *ArrayListUnmanaged(u8),
 ) InnerError!void {
     const tree = scope.tree();
     const token_tags = tree.tokens.items(.tag);
@@ -4093,7 +4102,7 @@ pub fn parseStrLit(
     mod: *Module,
     scope: *Scope,
     token: ast.TokenIndex,
-    buf: *std.ArrayListUnmanaged(u8),
+    buf: *ArrayListUnmanaged(u8),
     bytes: []const u8,
     offset: u32,
 ) InnerError!void {
src/Sema.zig
@@ -1102,10 +1102,15 @@ fn zirDbgStmtNode(sema: *Sema, block: *Scope.Block, inst: zir.Inst.Index) InnerE
     const tracy = trace(@src());
     defer tracy.end();
 
+    // We do not set sema.src here because dbg_stmt instructions are only emitted for
+    // ZIR code that possibly will need to generate runtime code. So error messages
+    // and other source locations must not rely on sema.src being set from dbg_stmt
+    // instructions.
     if (block.is_comptime) return;
 
     const src_node = sema.code.instructions.items(.data)[inst].node;
     const src: LazySrcLoc = .{ .node_offset = src_node };
+
     const src_loc = src.toSrcLoc(&block.base);
     const abs_byte_off = try src_loc.byteOffset();
     _ = try block.addDbgStmt(src, abs_byte_off);
@@ -1115,16 +1120,20 @@ fn zirDeclRef(sema: *Sema, block: *Scope.Block, inst: zir.Inst.Index) InnerError
     const tracy = trace(@src());
     defer tracy.end();
 
-    const decl = sema.code.instructions.items(.data)[inst].decl;
-    return sema.analyzeDeclRef(block, .unneeded, decl);
+    const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
+    const src = inst_data.src();
+    const decl = sema.code.decls[inst_data.payload_index];
+    return sema.analyzeDeclRef(block, src, decl);
 }
 
 fn zirDeclVal(sema: *Sema, block: *Scope.Block, inst: zir.Inst.Index) InnerError!*Inst {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const decl = sema.code.instructions.items(.data)[inst].decl;
-    return sema.analyzeDeclVal(block, .unneeded, decl);
+    const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
+    const src = inst_data.src();
+    const decl = sema.code.decls[inst_data.payload_index];
+    return sema.analyzeDeclVal(block, src, decl);
 }
 
 fn zirCallNone(
@@ -3211,10 +3220,10 @@ fn requireFunctionBlock(sema: *Sema, block: *Scope.Block, src: LazySrcLoc) !void
 }
 
 fn requireRuntimeBlock(sema: *Sema, block: *Scope.Block, src: LazySrcLoc) !void {
-    try sema.requireFunctionBlock(block, src);
     if (block.is_comptime) {
         return sema.mod.fail(&block.base, src, "unable to resolve comptime value", .{});
     }
+    try sema.requireFunctionBlock(block, src);
 }
 
 fn validateVarType(sema: *Sema, block: *Scope.Block, src: LazySrcLoc, ty: Type) !void {
src/zir.zig
@@ -37,6 +37,8 @@ pub const Code = struct {
     string_bytes: []u8,
     /// The meaning of this data is determined by `Inst.Tag` value.
     extra: []u32,
+    /// Used for decl_val and decl_ref instructions.
+    decls: []*Module.Decl,
 
     /// Returns the requested data, as well as the new index which is at the start of the
     /// trailers for the object.
@@ -76,6 +78,7 @@ pub const Code = struct {
         code.instructions.deinit(gpa);
         gpa.free(code.string_bytes);
         gpa.free(code.extra);
+        gpa.free(code.decls);
         code.* = undefined;
     }
 
@@ -103,7 +106,7 @@ pub const Code = struct {
         const stderr = std.io.getStdErr().writer();
         try stderr.print("ZIR {s} {s} %0 ", .{ kind, decl_name });
         try writer.writeInstToStream(stderr, 0);
-        try stderr.print("}} // ZIR {s} {s}\n\n", .{ kind, decl_name });
+        try stderr.print(" // end ZIR {s} {s}\n\n", .{ kind, decl_name });
     }
 };
 
@@ -115,7 +118,7 @@ pub const Inst = struct {
     data: Data,
 
     /// These names are used directly as the instruction names in the text format.
-    pub const Tag = enum {
+    pub const Tag = enum(u8) {
         /// Arithmetic addition, asserts no integer overflow.
         /// Uses the `pl_node` union field. Payload is `Bin`.
         add,
@@ -274,10 +277,10 @@ pub const Inst = struct {
         /// Uses the `node` union field.
         dbg_stmt_node,
         /// Represents a pointer to a global decl.
-        /// Uses the `decl` union field.
+        /// Uses the `pl_node` union field. `payload_index` is into `decls`.
         decl_ref,
         /// Equivalent to a decl_ref followed by load.
-        /// Uses the `decl` union field.
+        /// Uses the `pl_node` union field. `payload_index` is into `decls`.
         decl_val,
         /// Load the value from a pointer. Assumes `x.*` syntax.
         /// Uses `un_node` field. AST node is the `x.*` syntax.
@@ -612,10 +615,6 @@ pub const Inst = struct {
         // /// validated by the switch_br instruction.
         // switch_range,
 
-        comptime {
-            assert(@sizeOf(Tag) == 1);
-        }
-
         /// Returns whether the instruction is one of the control flow "noreturn" types.
         /// Function calls do not count.
         pub fn isNoReturn(tag: Tag) bool {
@@ -1099,7 +1098,6 @@ pub const Inst = struct {
             }
         },
         bin: Bin,
-        decl: *Module.Decl,
         @"const": *TypedValue,
         /// For strings which may contain null bytes.
         str: struct {
@@ -1503,6 +1501,10 @@ const Writer = struct {
             .typeof_peer,
             => try self.writePlNodeMultiOp(stream, inst),
 
+            .decl_ref,
+            .decl_val,
+            => try self.writePlNodeDecl(stream, inst),
+
             .as_node => try self.writeAs(stream, inst),
 
             .breakpoint,
@@ -1513,10 +1515,6 @@ const Writer = struct {
             .repeat_inline,
             => try self.writeNode(stream, inst),
 
-            .decl_ref,
-            .decl_val,
-            => try self.writeDecl(stream, inst),
-
             .error_value,
             .enum_literal,
             => try self.writeStrTok(stream, inst),
@@ -1715,6 +1713,13 @@ const Writer = struct {
         try self.writeSrc(stream, inst_data.src());
     }
 
+    fn writePlNodeDecl(self: *Writer, stream: anytype, inst: Inst.Index) !void {
+        const inst_data = self.code.instructions.items(.data)[inst].pl_node;
+        const decl = self.code.decls[inst_data.payload_index];
+        try stream.print("{s}) ", .{decl.name});
+        try self.writeSrc(stream, inst_data.src());
+    }
+
     fn writeAs(self: *Writer, stream: anytype, inst: Inst.Index) !void {
         const inst_data = self.code.instructions.items(.data)[inst].pl_node;
         const extra = self.code.extraData(Inst.As, inst_data.payload_index).data;
@@ -1736,15 +1741,6 @@ const Writer = struct {
         try self.writeSrc(stream, src);
     }
 
-    fn writeDecl(
-        self: *Writer,
-        stream: anytype,
-        inst: Inst.Index,
-    ) (@TypeOf(stream).Error || error{OutOfMemory})!void {
-        const decl = self.code.instructions.items(.data)[inst].decl;
-        try stream.print("{s})", .{decl.name});
-    }
-
     fn writeStrTok(
         self: *Writer,
         stream: anytype,
test/stage2/test.zig
@@ -1112,21 +1112,21 @@ pub fn addCases(ctx: *TestContext) !void {
         });
     }
 
-    //{
-    //    var case = ctx.obj("extern variable has no type", linux_x64);
-    //    case.addError(
-    //        \\comptime {
-    //        \\    _ = foo;
-    //        \\}
-    //        \\extern var foo: i32;
-    //    , &[_][]const u8{":2:9: error: unable to resolve comptime value"});
-    //    case.addError(
-    //        \\export fn entry() void {
-    //        \\    _ = foo;
-    //        \\}
-    //        \\extern var foo;
-    //    , &[_][]const u8{":4:8: error: unable to infer variable type"});
-    //}
+    {
+        var case = ctx.obj("extern variable has no type", linux_x64);
+        case.addError(
+            \\comptime {
+            \\    _ = foo;
+            \\}
+            \\extern var foo: i32;
+        , &[_][]const u8{":2:9: error: unable to resolve comptime value"});
+        case.addError(
+            \\export fn entry() void {
+            \\    _ = foo;
+            \\}
+            \\extern var foo;
+        , &[_][]const u8{":4:8: error: unable to infer variable type"});
+    }
 
     //{
     //    var case = ctx.exe("break/continue", linux_x64);
BRANCH_TODO
@@ -38,3 +38,6 @@ Performance optimizations to look into:
  * astgen for loops using pointer arithmetic because it's faster and if the programmer
    wants an index capture, that will just be a convenience variable that zig sets up
    independently.
+ * in astgen, if a decl_val would be to a const variable or to a function, there could be
+   a special zir.Inst.Ref form that means to refer to a decl as the operand. This
+   would elide all the decl_val instructions in the ZIR.