Commit 8fb392dbb4

Andrew Kelley <andrew@ziglang.org>
2020-06-30 03:58:34
stage2: implement liveness analysis
1 parent abcd4ea
Changed files (4)
src-self-hosted/codegen.zig
@@ -104,11 +104,18 @@ pub fn generateSymbol(
                 .bin_file = bin_file,
                 .mod_fn = module_fn,
                 .code = code,
-                .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator),
                 .err_msg = null,
                 .args = mc_args.items,
+                .branch_stack = .{},
             };
-            defer function.inst_table.deinit();
+            defer {
+                assert(function.branch_stack.items.len == 1);
+                function.branch_stack.items[0].inst_table.deinit();
+                function.branch_stack.deinit(bin_file.allocator);
+            }
+            try function.branch_stack.append(bin_file.allocator, .{
+                .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator),
+            });
 
             function.gen() catch |err| switch (err) {
                 error.CodegenFail => return Result{ .fail = function.err_msg.? },
@@ -215,13 +222,29 @@ const Function = struct {
     target: *const std.Target,
     mod_fn: *const Module.Fn,
     code: *std.ArrayList(u8),
-    inst_table: std.AutoHashMap(*ir.Inst, MCValue),
     err_msg: ?*ErrorMsg,
     args: []MCValue,
 
+    /// Whenever there is a runtime branch, we push a Branch onto this stack,
+    /// and pop it off when the runtime branch joins. This provides an "overlay"
+    /// of the table of mappings from instructions to `MCValue` from within the branch.
+    /// This way we can modify the `MCValue` for an instruction in different ways
+    /// within different branches. Special consideration is needed when a branch
+    /// joins with its parent, to make sure all instructions have the same MCValue
+    /// across each runtime branch upon joining.
+    branch_stack: std.ArrayListUnmanaged(Branch),
+
+    const Branch = struct {
+        inst_table: std.AutoHashMap(*ir.Inst, MCValue),
+    };
+
     const MCValue = union(enum) {
+        /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
         none,
+        /// Control flow will not allow this value to be observed.
         unreach,
+        /// No more references to this value remain.
+        dead,
         /// A pointer-sized integer that fits in a register.
         immediate: u64,
         /// The constant was emitted into the code, at this offset.
@@ -292,9 +315,10 @@ const Function = struct {
     }
 
     fn genArch(self: *Function, comptime arch: std.Target.Cpu.Arch) !void {
+        const inst_table = &self.branch_stack.items[0].inst_table;
         for (self.mod_fn.analysis.success.instructions) |inst| {
             const new_inst = try self.genFuncInst(inst, arch);
-            try self.inst_table.putNoClobber(inst, new_inst);
+            try inst_table.putNoClobber(inst, new_inst);
         }
     }
 
@@ -525,7 +549,9 @@ const Function = struct {
     fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) error{ CodegenFail, OutOfMemory }!void {
         switch (arch) {
             .x86_64 => switch (mcv) {
-                .none, .unreach => unreachable,
+                .dead => unreachable,
+                .none => unreachable,
+                .unreach => unreachable,
                 .immediate => |x| {
                     if (reg.size() != 64) {
                         return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
@@ -708,12 +734,26 @@ const Function = struct {
         if (self.inst_table.get(inst)) |mcv| {
             return mcv;
         }
+        // Constants have static lifetimes, so they are always memoized in the outer most table.
         if (inst.cast(ir.Inst.Constant)) |const_inst| {
-            const mcvalue = try self.genTypedValue(inst.src, .{ .ty = inst.ty, .val = const_inst.val });
-            try self.inst_table.putNoClobber(inst, mcvalue);
-            return mcvalue;
-        } else {
-            return self.inst_table.get(inst).?;
+            const branch = &self.branch_stack.items[0];
+            const gop = try branch.inst_table.getOrPut(inst);
+            if (!gop.found_existing) {
+                const mcv = try self.genTypedValue(inst.src, .{ .ty = inst.ty, .val = const_inst.val });
+                try branch.inst_table.putNoClobber(inst, mcv);
+                gop.kv.value = mcv;
+                return mcv;
+            }
+            return gop.kv.value;
+        }
+
+        // Treat each stack item as a "layer" on top of the previous one.
+        var i: usize = self.branch_stack.items.len;
+        while (true) {
+            i -= 1;
+            if (self.branch_stack.items[i].inst_table.getValue(inst)) |mcv| {
+                return mcv;
+            }
         }
     }
 
src-self-hosted/ir.zig
@@ -10,6 +10,16 @@ const Module = @import("Module.zig");
 /// a memory location for the value to survive after a const instruction.
 pub const Inst = struct {
     tag: Tag,
+    /// Each bit represents the index of an `Inst` parameter in the `args` field.
+    /// If a bit is set, it marks the end of the lifetime of the corresponding 
+    /// instruction parameter. For example, 0b00000101 means that the first and
+    /// third `Inst` parameters' lifetimes end after this instruction, and will
+    /// not have any more following references.
+    /// The most significant bit being set means that the instruction itself is
+    /// never referenced, in other words its lifetime ends as soon as it finishes.
+    /// If the byte is `0xff`, it means this is a special case and this data is
+    /// encoded elsewhere.
+    deaths: u8 = 0xff,
     ty: Type,
     /// Byte offset into the source.
     src: usize,
@@ -165,6 +175,12 @@ pub const Inst = struct {
             true_body: Body,
             false_body: Body,
         },
+        /// Set of instructions whose lifetimes end at the start of one of the branches.
+        /// The `true` branch is first: `deaths[0..true_death_count]`.
+        /// The `false` branch is next: `(deaths + true_death_count)[..false_death_count]`.
+        deaths: [*]*Inst = undefined,
+        true_death_count: u32 = 0,
+        false_death_count: u32 = 0,
     };
 
     pub const Constant = struct {
@@ -224,4 +240,4 @@ pub const Inst = struct {
 
 pub const Body = struct {
     instructions: []*Inst,
-};
+};
\ No newline at end of file
src-self-hosted/liveness.zig
@@ -0,0 +1,134 @@
+const std = @import("std");
+const ir = @import("ir.zig");
+const trace = @import("tracy.zig").trace;
+
+/// Perform Liveness Analysis over the `Body`. Each `Inst` will have its `deaths` field populated.
+pub fn analyze(
+    /// Used for temporary storage during the analysis.
+    gpa: *std.mem.Allocator,
+    /// Used to tack on extra allocations in the same lifetime as the existing instructions.
+    arena: *std.mem.Allocator,
+    body: ir.Body,
+) error{OutOfMemory}!void {
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    var table = std.AutoHashMap(*ir.Inst, void).init(gpa);
+    defer table.deinit();
+    try table.ensureCapacity(body.instructions.len);
+    try analyzeWithTable(arena, &table, body);
+}
+
+fn analyzeWithTable(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), body: ir.Body) error{OutOfMemory}!void {
+    var i: usize = body.instructions.len;
+
+    while (i != 0) {
+        i -= 1;
+        const base = body.instructions[i];
+
+        // Obtain the corresponding instruction type based on the tag type.
+        inline for (std.meta.declarations(ir.Inst)) |decl| {
+            switch (decl.data) {
+                .Type => |T| {
+                    if (@hasDecl(T, "base_tag")) {
+                        if (T.base_tag == base.tag) {
+                            return analyzeInst(arena, table, T, @fieldParentPtr(T, "base", base));
+                        }
+                    }
+                },
+                else => continue,
+            }
+        }
+        unreachable;
+    }
+}
+
+fn analyzeInst(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), comptime T: type, inst: *T) error{OutOfMemory}!void {
+    inst.base.deaths = 0;
+
+    switch (T) {
+        ir.Inst.Constant => return,
+        ir.Inst.Block => {
+            try analyzeWithTable(arena, table, inst.args.body);
+            // We let this continue so that it can possibly mark the block as
+            // unreferenced below.
+        },
+        ir.Inst.CondBr => {
+            var true_table = std.AutoHashMap(*ir.Inst, void).init(table.allocator);
+            defer true_table.deinit();
+            try true_table.ensureCapacity(inst.args.true_body.instructions.len);
+            try analyzeWithTable(arena, &true_table, inst.args.true_body);
+
+            var false_table = std.AutoHashMap(*ir.Inst, void).init(table.allocator);
+            defer false_table.deinit();
+            try false_table.ensureCapacity(inst.args.false_body.instructions.len);
+            try analyzeWithTable(arena, &false_table, inst.args.false_body);
+
+            // Each death that occurs inside one branch, but not the other, needs
+            // to be added as a death immediately upon entering the other branch.
+            // During the iteration of the table, we additionally propagate the
+            // deaths to the parent table.
+            var true_entry_deaths = std.ArrayList(*ir.Inst).init(table.allocator);
+            defer true_entry_deaths.deinit();
+            var false_entry_deaths = std.ArrayList(*ir.Inst).init(table.allocator);
+            defer false_entry_deaths.deinit();
+            {
+                var it = false_table.iterator();
+                while (it.next()) |entry| {
+                    const false_death = entry.key;
+                    if (!true_table.contains(false_death)) {
+                        try true_entry_deaths.append(false_death);
+                        // Here we are only adding to the parent table if the following iteration
+                        // would miss it.
+                        try table.putNoClobber(false_death, {});
+                    }
+                }
+            }
+            {
+                var it = true_table.iterator();
+                while (it.next()) |entry| {
+                    const true_death = entry.key;
+                    try table.putNoClobber(true_death, {});
+                    if (!false_table.contains(true_death)) {
+                        try false_entry_deaths.append(true_death);
+                    }
+                }
+            }
+            inst.true_death_count = std.math.cast(@TypeOf(inst.true_death_count), true_entry_deaths.items.len) catch return error.OutOfMemory;
+            inst.false_death_count = std.math.cast(@TypeOf(inst.false_death_count), false_entry_deaths.items.len) catch return error.OutOfMemory;
+            const allocated_slice = try arena.alloc(*ir.Inst, true_entry_deaths.items.len + false_entry_deaths.items.len);
+            inst.deaths = allocated_slice.ptr;
+
+            // Continue on with the instruction analysis. The following code will find the condition
+            // instruction, and the deaths flag for the CondBr instruction will indicate whether the
+            // condition's lifetime ends immediately before entering any branch.
+        },
+        else => {},
+    }
+
+    if (!table.contains(&inst.base)) {
+        // No tombstone for this instruction means it is never referenced,
+        // and its birth marks its own death. Very metal 🤘
+        inst.base.deaths |= 1 << 7;
+    }
+
+    const Args = ir.Inst.Args(T);
+    if (Args == void) {
+        return;
+    }
+
+    comptime var arg_index: usize = 0;
+    inline for (std.meta.fields(Args)) |field| {
+        if (field.field_type == *ir.Inst) {
+            if (arg_index >= 6) {
+                @compileError("out of bits to mark deaths of operands");
+            }
+            const prev = try table.put(@field(inst.args, field.name), {});
+            if (prev == null) {
+                // Death.
+                inst.base.deaths |= 1 << arg_index;
+            }
+            arg_index += 1;
+        }
+    }
+}
\ No newline at end of file
src-self-hosted/Module.zig
@@ -18,6 +18,7 @@ const Inst = ir.Inst;
 const Body = ir.Body;
 const ast = std.zig.ast;
 const trace = @import("tracy.zig").trace;
+const liveness = @import("liveness.zig");
 
 /// General-purpose allocator.
 allocator: *Allocator,
@@ -986,6 +987,11 @@ pub fn performAllTheWork(self: *Module) error{OutOfMemory}!void {
                         .sema_failure, .dependency_failure => continue,
                         .success => {},
                     }
+                    // Here we tack on additional allocations to the Decl's arena. The allocations are
+                    // lifetime annotations in the ZIR.
+                    var decl_arena = decl.typed_value.most_recent.arena.?.promote(self.allocator);
+                    defer decl.typed_value.most_recent.arena.?.* = decl_arena.state;
+                    try liveness.analyze(self.allocator, &decl_arena.allocator, payload.func.analysis.success);
                 }
 
                 assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits());