Commit 6917a8c258

mlugg <mlugg@mlugg.co.uk>
2023-07-29 07:03:51
AstGen: handle `ty` result location for struct and array init correctly
Well, this was a journey! The original issue I was trying to fix is covered by the new behavior test in array.zig: in essence, `ty` and `coerced_ty` result locations were not correctly propagated. While fixing this, I noticed a similar bug in struct inits: the type was propagated to *fields* fine, but the actual struct init was unnecessarily anonymous, which could lead to unnecessary copies. Note that the behavior test added in struct.zig was already passing - the bug here didn't change any easy-to-test behavior - but I figured I'd add it anyway. This is a little harder than it seems, because the result type may not itself be an array/struct type: it could be an optional / error union wrapper. A new ZIR instruction is introduced to unwrap these. This is also made a little tricky by the fact that it's possible for result types to be unknown at the time of semantic analysis (due to `anytype` parameters), leading to generic poison. In these cases, we must essentially downgrade to an anonymous initialization. Fixing these issues exposed *another* bug, related to type resolution in Sema. That issue is now tracked by #16603. As a temporary workaround for this bug, a few result locations for builtin function operands have been disabled in AstGen. This is technically a breaking change, but it's very minor: I doubt it'll cause any breakage in the wild.
1 parent 0461a64
src/AstGen.zig
@@ -1509,9 +1509,11 @@ fn arrayInitExpr(
             const tag: Zir.Inst.Tag = if (types.array != .none) .array_init else .array_init_anon;
             return arrayInitExprInner(gz, scope, node, array_init.ast.elements, types.array, types.elem, tag);
         },
-        .ty, .coerced_ty => {
-            const tag: Zir.Inst.Tag = if (types.array != .none) .array_init else .array_init_anon;
-            const result = try arrayInitExprInner(gz, scope, node, array_init.ast.elements, types.array, types.elem, tag);
+        .ty, .coerced_ty => |ty_inst| {
+            const arr_ty = if (types.array != .none) types.array else blk: {
+                break :blk try gz.addUnNode(.opt_eu_base_ty, ty_inst, node);
+            };
+            const result = try arrayInitExprInner(gz, scope, node, array_init.ast.elements, arr_ty, types.elem, .array_init);
             return rvalue(gz, ri, result, node);
         },
         .ptr => |ptr_res| {
@@ -1748,7 +1750,9 @@ fn structInitExpr(
         },
         .ty, .coerced_ty => |ty_inst| {
             if (struct_init.ast.type_expr == 0) {
-                const result = try structInitExprRlNone(gz, scope, node, struct_init, ty_inst, .struct_init_anon);
+                const struct_ty_inst = try gz.addUnNode(.opt_eu_base_ty, ty_inst, node);
+                _ = try gz.addUnNode(.validate_struct_init_ty, struct_ty_inst, node);
+                const result = try structInitExprRlTy(gz, scope, node, struct_init, struct_ty_inst, .struct_init);
                 return rvalue(gz, ri, result, node);
             }
             const inner_ty_inst = try typeExpr(gz, scope, struct_init.ast.type_expr);
@@ -2743,6 +2747,7 @@ fn addEnsureResult(gz: *GenZir, maybe_unused_result: Zir.Inst.Ref, statement: As
             .for_len,
             .@"try",
             .try_ptr,
+            .opt_eu_base_ty,
             => break :b false,
 
             .extended => switch (gz.astgen.instructions.items(.data)[inst].extended.opcode) {
@@ -8314,7 +8319,10 @@ fn builtinCall(
                                 local_val.used = ident_token;
                                 _ = try gz.addPlNode(.export_value, node, Zir.Inst.ExportValue{
                                     .operand = local_val.inst,
-                                    .options = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .export_options_type } }, params[1]),
+                                    // TODO: the result location here should be `.{ .coerced_ty = .export_options_type }`, but
+                                    // that currently hits assertions in Sema due to type resolution issues.
+                                    // See #16603
+                                    .options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]),
                                 });
                                 return rvalue(gz, ri, .void_value, node);
                             }
@@ -8329,7 +8337,10 @@ fn builtinCall(
                                 const loaded = try gz.addUnNode(.load, local_ptr.ptr, node);
                                 _ = try gz.addPlNode(.export_value, node, Zir.Inst.ExportValue{
                                     .operand = loaded,
-                                    .options = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .export_options_type } }, params[1]),
+                                    // TODO: the result location here should be `.{ .coerced_ty = .export_options_type }`, but
+                                    // that currently hits assertions in Sema due to type resolution issues.
+                                    // See #16603
+                                    .options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]),
                                 });
                                 return rvalue(gz, ri, .void_value, node);
                             }
@@ -8363,7 +8374,10 @@ fn builtinCall(
                 },
                 else => return astgen.failNode(params[0], "symbol to export must identify a declaration", .{}),
             }
-            const options = try comptimeExpr(gz, scope, .{ .rl = .{ .ty = .export_options_type } }, params[1]);
+            // TODO: the result location here should be `.{ .coerced_ty = .export_options_type }`, but
+            // that currently hits assertions in Sema due to type resolution issues.
+            // See #16603
+            const options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]);
             _ = try gz.addPlNode(.@"export", node, Zir.Inst.Export{
                 .namespace = namespace,
                 .decl_name = decl_name,
@@ -8373,7 +8387,10 @@ fn builtinCall(
         },
         .@"extern" => {
             const type_inst = try typeExpr(gz, scope, params[0]);
-            const options = try comptimeExpr(gz, scope, .{ .rl = .{ .ty = .extern_options_type } }, params[1]);
+            // TODO: the result location here should be `.{ .coerced_ty = .extern_options_type }`, but
+            // that currently hits assertions in Sema due to type resolution issues.
+            // See #16603
+            const options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]);
             const result = try gz.addExtendedPayload(.builtin_extern, Zir.Inst.BinNode{
                 .node = gz.nodeIndexToRelative(node),
                 .lhs = type_inst,
@@ -8477,7 +8494,10 @@ fn builtinCall(
         // zig fmt: on
 
         .Type => {
-            const operand = try expr(gz, scope, .{ .rl = .{ .coerced_ty = .type_info_type } }, params[0]);
+            // TODO: the result location here should be `.{ .coerced_ty = .type_info_type }`, but
+            // that currently hits assertions in Sema due to type resolution issues.
+            // See #16603
+            const operand = try expr(gz, scope, .{ .rl = .none }, params[0]);
 
             const gpa = gz.astgen.gpa;
 
@@ -8755,7 +8775,10 @@ fn builtinCall(
         },
         .prefetch => {
             const ptr = try expr(gz, scope, .{ .rl = .none }, params[0]);
-            const options = try comptimeExpr(gz, scope, .{ .rl = .{ .ty = .prefetch_options_type } }, params[1]);
+            // TODO: the result location here should be `.{ .coerced_ty = .preftech_options_type }`, but
+            // that currently hits assertions in Sema due to type resolution issues.
+            // See #16603
+            const options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]);
             _ = try gz.addExtendedPayload(.prefetch, Zir.Inst.BinNode{
                 .node = gz.nodeIndexToRelative(node),
                 .lhs = ptr,
src/print_zir.zig
@@ -229,6 +229,7 @@ const Writer = struct {
             .make_ptr_const,
             .validate_deref,
             .check_comptime_control_flow,
+            .opt_eu_base_ty,
             => try self.writeUnNode(stream, inst),
 
             .ref,
src/Sema.zig
@@ -1125,6 +1125,7 @@ fn analyzeBodyInner(
             .array_base_ptr               => try sema.zirArrayBasePtr(block, inst),
             .field_base_ptr               => try sema.zirFieldBasePtr(block, inst),
             .for_len                      => try sema.zirForLen(block, inst),
+            .opt_eu_base_ty               => try sema.zirOptEuBaseTy(block, inst),
 
             .clz       => try sema.zirBitCount(block, inst, .clz,      Value.clz),
             .ctz       => try sema.zirBitCount(block, inst, .ctz,      Value.ctz),
@@ -1359,12 +1360,12 @@ fn analyzeBodyInner(
                 continue;
             },
             .validate_array_init_ty => {
-                try sema.validateArrayInitTy(block, inst);
+                try sema.zirValidateArrayInitTy(block, inst);
                 i += 1;
                 continue;
             },
             .validate_struct_init_ty => {
-                try sema.validateStructInitTy(block, inst);
+                try sema.zirValidateStructInitTy(block, inst);
                 i += 1;
                 continue;
             },
@@ -4312,7 +4313,31 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
     return len;
 }
 
-fn validateArrayInitTy(
+fn zirOptEuBaseTy(
+    sema: *Sema,
+    block: *Block,
+    inst: Zir.Inst.Index,
+) CompileError!Air.Inst.Ref {
+    const mod = sema.mod;
+    const inst_data = sema.code.instructions.items(.data)[inst].un_node;
+    var ty = sema.resolveType(block, .unneeded, inst_data.operand) catch |err| switch (err) {
+        // Since this is a ZIR instruction that returns a type, encountering
+        // generic poison should not result in a failed compilation, but the
+        // generic poison type. This prevents unnecessary failures when
+        // constructing types at compile-time.
+        error.GenericPoison => return .generic_poison_type,
+        else => |e| return e,
+    };
+    while (true) {
+        switch (ty.zigTypeTag(mod)) {
+            .Optional => ty = ty.optionalChild(mod),
+            .ErrorUnion => ty = ty.errorUnionPayload(mod),
+            else => return sema.addType(ty),
+        }
+    }
+}
+
+fn zirValidateArrayInitTy(
     sema: *Sema,
     block: *Block,
     inst: Zir.Inst.Index,
@@ -4322,7 +4347,11 @@ fn validateArrayInitTy(
     const src = inst_data.src();
     const ty_src: LazySrcLoc = .{ .node_offset_init_ty = inst_data.src_node };
     const extra = sema.code.extraData(Zir.Inst.ArrayInit, inst_data.payload_index).data;
-    const ty = try sema.resolveType(block, ty_src, extra.ty);
+    const ty = sema.resolveType(block, ty_src, extra.ty) catch |err| switch (err) {
+        // It's okay for the type to be unknown: this will result in an anonymous array init.
+        error.GenericPoison => return,
+        else => |e| return e,
+    };
 
     switch (ty.zigTypeTag(mod)) {
         .Array => {
@@ -4358,7 +4387,7 @@ fn validateArrayInitTy(
     return sema.failWithArrayInitNotSupported(block, ty_src, ty);
 }
 
-fn validateStructInitTy(
+fn zirValidateStructInitTy(
     sema: *Sema,
     block: *Block,
     inst: Zir.Inst.Index,
@@ -4366,7 +4395,11 @@ fn validateStructInitTy(
     const mod = sema.mod;
     const inst_data = sema.code.instructions.items(.data)[inst].un_node;
     const src = inst_data.src();
-    const ty = try sema.resolveType(block, src, inst_data.operand);
+    const ty = sema.resolveType(block, src, inst_data.operand) catch |err| switch (err) {
+        // It's okay for the type to be unknown: this will result in an anonymous struct init.
+        error.GenericPoison => return,
+        else => |e| return e,
+    };
 
     switch (ty.zigTypeTag(mod)) {
         .Struct, .Union => return,
@@ -7744,7 +7777,15 @@ fn zirOptionalType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
 fn zirElemTypeIndex(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
     const mod = sema.mod;
     const bin = sema.code.instructions.items(.data)[inst].bin;
-    const indexable_ty = try sema.resolveType(block, .unneeded, bin.lhs);
+    const operand = sema.resolveType(block, .unneeded, bin.lhs) catch |err| switch (err) {
+        // Since this is a ZIR instruction that returns a type, encountering
+        // generic poison should not result in a failed compilation, but the
+        // generic poison type. This prevents unnecessary failures when
+        // constructing types at compile-time.
+        error.GenericPoison => return .generic_poison_type,
+        else => |e| return e,
+    };
+    const indexable_ty = try sema.resolveTypeFields(operand);
     assert(indexable_ty.isIndexable(mod)); // validated by a previous instruction
     if (indexable_ty.zigTypeTag(mod) == .Struct) {
         const elem_type = indexable_ty.structFieldType(@intFromEnum(bin.rhs), mod);
@@ -18794,7 +18835,13 @@ fn zirStructInit(
     const first_item = sema.code.extraData(Zir.Inst.StructInit.Item, extra.end).data;
     const first_field_type_data = zir_datas[first_item.field_type].pl_node;
     const first_field_type_extra = sema.code.extraData(Zir.Inst.FieldType, first_field_type_data.payload_index).data;
-    const resolved_ty = try sema.resolveType(block, src, first_field_type_extra.container_type);
+    const resolved_ty = sema.resolveType(block, src, first_field_type_extra.container_type) catch |err| switch (err) {
+        error.GenericPoison => {
+            // The type wasn't actually known, so treat this as an anon struct init.
+            return sema.structInitAnon(block, src, .typed_init, extra.data, extra.end, is_ref);
+        },
+        else => |e| return e,
+    };
     try sema.resolveTypeLayout(resolved_ty);
 
     if (resolved_ty.zigTypeTag(mod) == .Struct) {
@@ -19037,26 +19084,57 @@ fn zirStructInitAnon(
     inst: Zir.Inst.Index,
     is_ref: bool,
 ) CompileError!Air.Inst.Ref {
-    const mod = sema.mod;
-    const gpa = sema.gpa;
     const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
     const src = inst_data.src();
     const extra = sema.code.extraData(Zir.Inst.StructInitAnon, inst_data.payload_index);
-    const types = try sema.arena.alloc(InternPool.Index, extra.data.fields_len);
+    return sema.structInitAnon(block, src, .anon_init, extra.data, extra.end, is_ref);
+}
+
+fn structInitAnon(
+    sema: *Sema,
+    block: *Block,
+    src: LazySrcLoc,
+    /// It is possible for a typed struct_init to be downgraded to an anonymous init due to a
+    /// generic poison type. In this case, we need to know to interpret the extra data differently.
+    comptime kind: enum { anon_init, typed_init },
+    extra_data: switch (kind) {
+        .anon_init => Zir.Inst.StructInitAnon,
+        .typed_init => Zir.Inst.StructInit,
+    },
+    extra_end: usize,
+    is_ref: bool,
+) CompileError!Air.Inst.Ref {
+    const mod = sema.mod;
+    const gpa = sema.gpa;
+    const zir_datas = sema.code.instructions.items(.data);
+
+    const types = try sema.arena.alloc(InternPool.Index, extra_data.fields_len);
     const values = try sema.arena.alloc(InternPool.Index, types.len);
+
     var fields = std.AutoArrayHashMap(InternPool.NullTerminatedString, u32).init(sema.arena);
     try fields.ensureUnusedCapacity(types.len);
 
     // Find which field forces the expression to be runtime, if any.
     const opt_runtime_index = rs: {
         var runtime_index: ?usize = null;
-        var extra_index = extra.end;
+        var extra_index = extra_end;
         for (types, 0..) |*field_ty, i_usize| {
-            const i = @as(u32, @intCast(i_usize));
-            const item = sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index);
+            const i: u32 = @intCast(i_usize);
+            const item = switch (kind) {
+                .anon_init => sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index),
+                .typed_init => sema.code.extraData(Zir.Inst.StructInit.Item, extra_index),
+            };
             extra_index = item.end;
 
-            const name = sema.code.nullTerminatedString(item.data.field_name);
+            const name = switch (kind) {
+                .anon_init => sema.code.nullTerminatedString(item.data.field_name),
+                .typed_init => name: {
+                    // `item.data.field_type` references a `field_type` instruction
+                    const field_type_data = zir_datas[item.data.field_type].pl_node;
+                    const field_type_extra = sema.code.extraData(Zir.Inst.FieldType, field_type_data.payload_index);
+                    break :name sema.code.nullTerminatedString(field_type_extra.data.name_start);
+                },
+            };
             const name_ip = try mod.intern_pool.getOrPutString(gpa, name);
             const gop = fields.getOrPutAssumeCapacity(name_ip);
             if (gop.found_existing) {
@@ -19129,10 +19207,13 @@ fn zirStructInitAnon(
             .flags = .{ .address_space = target_util.defaultAddressSpace(target, .local) },
         });
         const alloc = try block.addTy(.alloc, alloc_ty);
-        var extra_index = extra.end;
+        var extra_index = extra_end;
         for (types, 0..) |field_ty, i_usize| {
             const i = @as(u32, @intCast(i_usize));
-            const item = sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index);
+            const item = switch (kind) {
+                .anon_init => sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index),
+                .typed_init => sema.code.extraData(Zir.Inst.StructInit.Item, extra_index),
+            };
             extra_index = item.end;
 
             const field_ptr_ty = try mod.ptrType(.{
@@ -19150,9 +19231,12 @@ fn zirStructInitAnon(
     }
 
     const element_refs = try sema.arena.alloc(Air.Inst.Ref, types.len);
-    var extra_index = extra.end;
+    var extra_index = extra_end;
     for (types, 0..) |_, i| {
-        const item = sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index);
+        const item = switch (kind) {
+            .anon_init => sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index),
+            .typed_init => sema.code.extraData(Zir.Inst.StructInit.Item, extra_index),
+        };
         extra_index = item.end;
         element_refs[i] = try sema.resolveInst(item.data.init);
     }
@@ -19175,7 +19259,13 @@ fn zirArrayInit(
     const args = sema.code.refSlice(extra.end, extra.data.operands_len);
     assert(args.len >= 2); // array_ty + at least one element
 
-    const array_ty = try sema.resolveType(block, src, args[0]);
+    const array_ty = sema.resolveType(block, src, args[0]) catch |err| switch (err) {
+        error.GenericPoison => {
+            // The type wasn't actually known, so treat this as an anon array init.
+            return sema.arrayInitAnon(block, src, args[1..], is_ref);
+        },
+        else => |e| return e,
+    };
     const sentinel_val = array_ty.sentinel(mod);
 
     const resolved_args = try gpa.alloc(Air.Inst.Ref, args.len - 1 + @intFromBool(sentinel_val != null));
@@ -19283,6 +19373,16 @@ fn zirArrayInitAnon(
     const src = inst_data.src();
     const extra = sema.code.extraData(Zir.Inst.MultiOp, inst_data.payload_index);
     const operands = sema.code.refSlice(extra.end, extra.data.operands_len);
+    return sema.arrayInitAnon(block, src, operands, is_ref);
+}
+
+fn arrayInitAnon(
+    sema: *Sema,
+    block: *Block,
+    src: LazySrcLoc,
+    operands: []const Zir.Inst.Ref,
+    is_ref: bool,
+) CompileError!Air.Inst.Ref {
     const mod = sema.mod;
 
     const types = try sema.arena.alloc(InternPool.Index, operands.len);
src/type.zig
@@ -3039,6 +3039,7 @@ pub const Type = struct {
         return switch (mod.intern_pool.indexToKey(ty.toIntern())) {
             .struct_type => |struct_type| {
                 const struct_obj = mod.structPtrUnwrap(struct_type.index).?;
+                assert(struct_obj.haveFieldTypes());
                 return struct_obj.fields.values()[index].ty;
             },
             .union_type => |union_type| {
src/Zir.zig
@@ -700,10 +700,16 @@ pub const Inst = struct {
         ///   *?S returns *S
         /// Uses the `un_node` field.
         field_base_ptr,
+        /// Given a type, strips all optional and error union types wrapping it.
+        /// e.g. `E!?u32` becomes `u32`, `[]u8` becomes `[]u8`.
+        /// Uses the `un_node` field.
+        opt_eu_base_ty,
         /// Checks that the type supports array init syntax.
+        /// Returns the underlying indexable type (since the given type may be e.g. an optional).
         /// Uses the `un_node` field.
         validate_array_init_ty,
         /// Checks that the type supports struct init syntax.
+        /// Returns the underlying struct type (since the given type may be e.g. an optional).
         /// Uses the `un_node` field.
         validate_struct_init_ty,
         /// Given a set of `field_ptr` instructions, assumes they are all part of a struct
@@ -1234,6 +1240,7 @@ pub const Inst = struct {
                 .save_err_ret_index,
                 .restore_err_ret_index,
                 .for_len,
+                .opt_eu_base_ty,
                 => false,
 
                 .@"break",
@@ -1522,6 +1529,7 @@ pub const Inst = struct {
                 .for_len,
                 .@"try",
                 .try_ptr,
+                .opt_eu_base_ty,
                 => false,
 
                 .extended => switch (data.extended.opcode) {
@@ -1676,6 +1684,7 @@ pub const Inst = struct {
                 .switch_block_ref = .pl_node,
                 .array_base_ptr = .un_node,
                 .field_base_ptr = .un_node,
+                .opt_eu_base_ty = .un_node,
                 .validate_array_init_ty = .pl_node,
                 .validate_struct_init_ty = .un_node,
                 .validate_struct_init = .pl_node,
test/behavior/array.zig
@@ -761,3 +761,17 @@ test "slicing array of zero-sized values" {
     for (arr[0..]) |zero|
         try expect(zero == 0);
 }
+
+test "array init with no result pointer sets field result types" {
+    const S = struct {
+        // A function parameter has a result type, but no result pointer.
+        fn f(arr: [1]u32) u32 {
+            return arr[0];
+        }
+    };
+
+    const x: u64 = 123;
+    const y = S.f(.{@intCast(x)});
+
+    try expect(y == x);
+}
test/behavior/struct.zig
@@ -1724,3 +1724,17 @@ test "packed struct field in anonymous struct" {
 fn countFields(v: anytype) usize {
     return @typeInfo(@TypeOf(v)).Struct.fields.len;
 }
+
+test "struct init with no result pointer sets field result types" {
+    const S = struct {
+        // A function parameter has a result type, but no result pointer.
+        fn f(s: struct { x: u32 }) u32 {
+            return s.x;
+        }
+    };
+
+    const x: u64 = 123;
+    const y = S.f(.{ .x = @intCast(x) });
+
+    try expect(y == x);
+}