Commit 56cfa8f22f

Andrew Kelley <andrew@ziglang.org>
2022-08-31 02:35:25
Sema: prevent access of undefined fields
When instantiating a generic function, there is a period of time where the function is inserted into monomorphed_funcs map, but is not yet initialized. Despite semantic analysis being single-threaded, generic function instantiation can happen recursively, meaning that the hash and equality functions for monomorphed_funcs entries are potentially invoked for an uninitialized function. This problem was mitigated by pre-setting the hash field on the newly allocated function, however it did not solve the problem for hash collisions in which case the equality function would be invoked. That it was solved for hash() but not eql() explains why the problem was difficult to observe. I tested this patch by temporarily sabotaging the hash and making it always return 0. This fix is centered on adding a new field to Module.Fn which is the one checked by eql() and is populated pre-initialization. closes #12643
1 parent 35e0ff7
Changed files (2)
src/Module.zig
@@ -1496,6 +1496,22 @@ pub const Fn = struct {
     /// active Sema context. Importantly, this value is also updated when an existing
     /// generic function instantiation is found and called.
     branch_quota: u32,
+
+    /// If this is not none, this function is a generic function instantiation, and
+    /// this is the generic function decl from which the instance was derived.
+    /// This information is redundant with a combination of checking if comptime_args is
+    /// not null and looking at the first decl dependency of owner_decl. This redundant
+    /// information is useful for three reasons:
+    /// 1. Improved perf of monomorphed_funcs when checking the eql() function because it
+    ///    can do two fewer pointer chases by grabbing the info from this field directly
+    ///    instead of accessing the decl and then the dependencies set.
+    /// 2. While a generic function instantiation is being initialized, we need hash()
+    ///    and eql() to work before the initialization is complete. Completing the
+    ///    insertion into the decl dependency set has more fallible operations than simply
+    ///    setting this field.
+    /// 3. I forgot what the third thing was while typing up the other two.
+    generic_owner_decl: Decl.OptionalIndex,
+
     state: Analysis,
     is_cold: bool = false,
     is_noinline: bool,
src/Sema.zig
@@ -5590,11 +5590,10 @@ const GenericCallAdapter = struct {
 
     pub fn eql(ctx: @This(), adapted_key: void, other_key: *Module.Fn) bool {
         _ = adapted_key;
-        // The generic function Decl is guaranteed to be the first dependency
-        // of each of its instantiations.
-        const other_owner_decl = ctx.module.declPtr(other_key.owner_decl);
-        const generic_owner_decl = other_owner_decl.dependencies.keys()[0];
-        if (ctx.generic_fn.owner_decl != generic_owner_decl) return false;
+        // Checking for equality may happen on an item that has been inserted
+        // into the map but is not yet fully initialized. In such case, the
+        // two initialized fields are `hash` and `generic_owner_decl`.
+        if (ctx.generic_fn.owner_decl != other_key.generic_owner_decl.unwrap().?) return false;
 
         const other_comptime_args = other_key.comptime_args.?;
         for (other_comptime_args[0..ctx.func_ty_info.param_types.len]) |other_arg, i| {
@@ -6447,11 +6446,14 @@ fn instantiateGenericCall(
     const gop = try mod.monomorphed_funcs.getOrPutAdapted(gpa, {}, adapter);
     const callee = if (!gop.found_existing) callee: {
         const new_module_func = try gpa.create(Module.Fn);
+        errdefer gpa.destroy(new_module_func);
+
         // This ensures that we can operate on the hash map before the Module.Fn
         // struct is fully initialized.
         new_module_func.hash = precomputed_hash;
+        new_module_func.generic_owner_decl = module_fn.owner_decl.toOptional();
+        new_module_func.comptime_args = null;
         gop.key_ptr.* = new_module_func;
-        errdefer gpa.destroy(new_module_func);
         errdefer assert(mod.monomorphed_funcs.remove(new_module_func));
 
         try namespace.anon_decls.ensureUnusedCapacity(gpa, 1);
@@ -8032,11 +8034,13 @@ fn funcCommon(
     } else null;
 
     const hash = new_func.hash;
+    const generic_owner_decl = if (comptime_args == null) .none else new_func.generic_owner_decl;
     const fn_payload = try sema.arena.create(Value.Payload.Function);
     new_func.* = .{
         .state = anal_state,
         .zir_body_inst = func_inst,
         .owner_decl = sema.owner_decl_index,
+        .generic_owner_decl = generic_owner_decl,
         .comptime_args = comptime_args,
         .hash = hash,
         .lbrace_line = src_locs.lbrace_line,