Commit 67f5a28257

Andrew Kelley <andrew@ziglang.org>
2021-05-18 02:39:52
Sema: use a hash map for ZIR->AIR mapping
Previously, ZIR was per-function so we could simply allocate a slice for all ZIR instructions. However now ZIR is whole-file, so we need a sparse mapping of ZIR to AIR instructions in order to not waste memory.
1 parent 5cacc44
Changed files (3)
src/Module.zig
@@ -2904,14 +2904,13 @@ pub fn semaFile(mod: *Module, file: *Scope.File) InnerError!void {
             .gpa = gpa,
             .arena = &sema_arena.allocator,
             .code = file.zir,
-            // TODO use a map because this array is too big
-            .inst_map = try sema_arena.allocator.alloc(*ir.Inst, file.zir.instructions.len),
             .owner_decl = new_decl,
             .namespace = &struct_obj.namespace,
             .func = null,
             .owner_func = null,
             .param_inst_list = &.{},
         };
+        defer sema.deinit();
         var block_scope: Scope.Block = .{
             .parent = null,
             .sema = &sema,
@@ -2960,13 +2959,13 @@ fn semaDecl(mod: *Module, decl: *Decl) !bool {
         .gpa = gpa,
         .arena = &analysis_arena.allocator,
         .code = zir,
-        .inst_map = try analysis_arena.allocator.alloc(*ir.Inst, zir.instructions.len),
         .owner_decl = decl,
         .namespace = decl.namespace,
         .func = null,
         .owner_func = null,
         .param_inst_list = &.{},
     };
+    defer sema.deinit();
 
     if (decl.isRoot()) {
         log.debug("semaDecl root {*} ({s})", .{ decl, decl.name });
@@ -3565,14 +3564,13 @@ pub fn analyzeFnBody(mod: *Module, decl: *Decl, func: *Fn) !void {
         .gpa = mod.gpa,
         .arena = &arena.allocator,
         .code = zir,
-        .inst_map = try mod.gpa.alloc(*ir.Inst, zir.instructions.len),
         .owner_decl = decl,
         .namespace = decl.namespace,
         .func = func,
         .owner_func = func,
         .param_inst_list = param_inst_list,
     };
-    defer mod.gpa.free(sema.inst_map);
+    defer sema.deinit();
 
     var inner_block: Scope.Block = .{
         .parent = null,
@@ -4555,14 +4553,13 @@ pub fn analyzeStructFields(mod: *Module, struct_obj: *Struct) InnerError!void {
         .gpa = gpa,
         .arena = &decl_arena.allocator,
         .code = zir,
-        .inst_map = try gpa.alloc(*ir.Inst, zir.instructions.len),
         .owner_decl = struct_obj.owner_decl,
         .namespace = &struct_obj.namespace,
         .owner_func = null,
         .func = null,
         .param_inst_list = &.{},
     };
-    defer gpa.free(sema.inst_map);
+    defer sema.deinit();
 
     var block: Scope.Block = .{
         .parent = null,
@@ -4712,14 +4709,13 @@ pub fn analyzeUnionFields(mod: *Module, union_obj: *Union) InnerError!void {
         .gpa = gpa,
         .arena = &decl_arena.allocator,
         .code = zir,
-        .inst_map = try gpa.alloc(*ir.Inst, zir.instructions.len),
         .owner_decl = union_obj.owner_decl,
         .namespace = &union_obj.namespace,
         .owner_func = null,
         .func = null,
         .param_inst_list = &.{},
     };
-    defer gpa.free(sema.inst_map);
+    defer sema.deinit();
 
     var block: Scope.Block = .{
         .parent = null,
src/Sema.zig
@@ -12,7 +12,7 @@ gpa: *Allocator,
 arena: *Allocator,
 code: Zir,
 /// Maps ZIR to AIR.
-inst_map: []*Inst,
+inst_map: InstMap = .{},
 /// When analyzing an inline function call, owner_decl is the Decl of the caller
 /// and `src_decl` of `Scope.Block` is the `Decl` of the callee.
 /// This `Decl` owns the arena memory of this `Sema`.
@@ -65,6 +65,13 @@ const LazySrcLoc = Module.LazySrcLoc;
 const RangeSet = @import("RangeSet.zig");
 const target_util = @import("target.zig");
 
+pub const InstMap = std.AutoHashMapUnmanaged(Zir.Inst.Index, *ir.Inst);
+
+pub fn deinit(sema: *Sema) void {
+    sema.inst_map.deinit(sema.gpa);
+    sema.* = undefined;
+}
+
 pub fn analyzeFnBody(
     sema: *Sema,
     block: *Scope.Block,
@@ -129,7 +136,7 @@ pub fn analyzeBody(
 ) InnerError!Zir.Inst.Index {
     // No tracy calls here, to avoid interfering with the tail call mechanism.
 
-    const map = block.sema.inst_map;
+    const map = &block.sema.inst_map;
     const tags = block.sema.code.instructions.items(.tag);
     const datas = block.sema.code.instructions.items(.data);
 
@@ -142,7 +149,7 @@ pub fn analyzeBody(
     var i: usize = 0;
     while (true) : (i += 1) {
         const inst = body[i];
-        map[inst] = switch (tags[inst]) {
+        const air_inst = switch (tags[inst]) {
             // zig fmt: off
             .arg                          => try sema.zirArg(block, inst),
             .alloc                        => try sema.zirAlloc(block, inst),
@@ -500,8 +507,9 @@ pub fn analyzeBody(
                 }
             },
         };
-        if (map[inst].ty.isNoReturn())
+        if (air_inst.ty.isNoReturn())
             return always_noreturn;
+        try map.putNoClobber(sema.gpa, inst, air_inst);
     }
 }
 
@@ -556,7 +564,7 @@ pub fn resolveInst(sema: *Sema, zir_ref: Zir.Inst.Ref) error{OutOfMemory}!*ir.In
     i -= Zir.Inst.Ref.typed_value_map.len;
 
     // Finally, the last section of indexes refers to the map of ZIR=>AIR.
-    return sema.inst_map[i];
+    return sema.inst_map.get(@intCast(u32, i)).?;
 }
 
 fn resolveConstString(
@@ -2244,9 +2252,9 @@ fn analyzeCall(
         defer sema.code = parent_zir;
 
         const parent_inst_map = sema.inst_map;
-        sema.inst_map = try sema.gpa.alloc(*ir.Inst, sema.code.instructions.len);
+        sema.inst_map = .{};
         defer {
-            sema.gpa.free(sema.inst_map);
+            sema.inst_map.deinit(sema.gpa);
             sema.inst_map = parent_inst_map;
         }
 
BRANCH_TODO
@@ -1,8 +1,3 @@
- * use a hash map for instructions because the array is too big
-   - no, actually modify the Zir.Inst.Ref strategy so that each decl gets
-     their indexes starting at 0 so that we can use an array to store Sema
-     results rather than a map.
-
  * in SwitchProng resolve, make sure AST tree gets loaded.
    It will be unloaded if using cached ZIR.