Commit 5d7f2697de

Andrew Kelley <andrew@ziglang.org>
2021-05-05 22:16:14
stage2: add `zig changelist` debug command
and implement the first pass at mechanism to map old ZIR to new ZIR.
1 parent 6b5d0b3
Changed files (5)
src/main.zig
@@ -72,6 +72,7 @@ const debug_usage = normal_usage ++
     \\Debug Commands:
     \\
     \\  astgen           Print ZIR code for a .zig source file
+    \\  changelist       Compute mappings from old ZIR to new ZIR
     \\
 ;
 
@@ -231,6 +232,8 @@ pub fn mainArgs(gpa: *Allocator, arena: *Allocator, args: []const []const u8) !v
         return io.getStdOut().writeAll(usage);
     } else if (debug_extensions_enabled and mem.eql(u8, cmd, "astgen")) {
         return cmdAstgen(gpa, arena, cmd_args);
+    } else if (debug_extensions_enabled and mem.eql(u8, cmd, "changelist")) {
+        return cmdChangelist(gpa, arena, cmd_args);
     } else {
         std.log.info("{s}", .{usage});
         fatal("unknown command: {s}", .{args[1]});
@@ -3618,3 +3621,142 @@ pub fn cmdAstgen(
 
     return Zir.renderAsTextToFile(gpa, &file, io.getStdOut());
 }
+
+/// This is only enabled for debug builds.
+pub fn cmdChangelist(
+    gpa: *Allocator,
+    arena: *Allocator,
+    args: []const []const u8,
+) !void {
+    const Module = @import("Module.zig");
+    const AstGen = @import("AstGen.zig");
+    const Zir = @import("Zir.zig");
+
+    const old_source_file = args[0];
+    const new_source_file = args[1];
+
+    var f = try fs.cwd().openFile(old_source_file, .{});
+    defer f.close();
+
+    const stat = try f.stat();
+
+    if (stat.size > max_src_size)
+        return error.FileTooBig;
+
+    var file: Module.Scope.File = .{
+        .status = .never_loaded,
+        .source_loaded = false,
+        .tree_loaded = false,
+        .zir_loaded = false,
+        .sub_file_path = old_source_file,
+        .source = undefined,
+        .stat_size = stat.size,
+        .stat_inode = stat.inode,
+        .stat_mtime = stat.mtime,
+        .tree = undefined,
+        .zir = undefined,
+        .pkg = undefined,
+        .namespace = undefined,
+    };
+
+    const source = try arena.allocSentinel(u8, stat.size, 0);
+    const amt = try f.readAll(source);
+    if (amt != stat.size)
+        return error.UnexpectedEndOfFile;
+    file.source = source;
+    file.source_loaded = true;
+
+    file.tree = try std.zig.parse(gpa, file.source);
+    file.tree_loaded = true;
+    defer file.tree.deinit(gpa);
+
+    for (file.tree.errors) |parse_error| {
+        try printErrMsgToFile(gpa, parse_error, file.tree, old_source_file, io.getStdErr(), .auto);
+    }
+    if (file.tree.errors.len != 0) {
+        process.exit(1);
+    }
+
+    file.zir = try AstGen.generate(gpa, file.tree);
+    file.zir_loaded = true;
+    defer file.zir.deinit(gpa);
+
+    if (file.zir.hasCompileErrors()) {
+        var errors = std.ArrayList(Compilation.AllErrors.Message).init(arena);
+        try Compilation.AllErrors.addZir(arena, &errors, &file);
+        const ttyconf = std.debug.detectTTYConfig();
+        for (errors.items) |full_err_msg| {
+            full_err_msg.renderToStdErr(ttyconf);
+        }
+        process.exit(1);
+    }
+
+    var new_f = try fs.cwd().openFile(new_source_file, .{});
+    defer new_f.close();
+
+    const new_stat = try new_f.stat();
+
+    if (new_stat.size > max_src_size)
+        return error.FileTooBig;
+
+    const new_source = try arena.allocSentinel(u8, new_stat.size, 0);
+    const new_amt = try new_f.readAll(new_source);
+    if (new_amt != new_stat.size)
+        return error.UnexpectedEndOfFile;
+
+    var new_tree = try std.zig.parse(gpa, new_source);
+    defer new_tree.deinit(gpa);
+
+    for (new_tree.errors) |parse_error| {
+        try printErrMsgToFile(gpa, parse_error, new_tree, new_source_file, io.getStdErr(), .auto);
+    }
+    if (new_tree.errors.len != 0) {
+        process.exit(1);
+    }
+
+    var old_zir = file.zir;
+    defer old_zir.deinit(gpa);
+    file.zir_loaded = false;
+    file.zir = try AstGen.generate(gpa, new_tree);
+    file.zir_loaded = true;
+
+    if (file.zir.hasCompileErrors()) {
+        var errors = std.ArrayList(Compilation.AllErrors.Message).init(arena);
+        try Compilation.AllErrors.addZir(arena, &errors, &file);
+        const ttyconf = std.debug.detectTTYConfig();
+        for (errors.items) |full_err_msg| {
+            full_err_msg.renderToStdErr(ttyconf);
+        }
+        process.exit(1);
+    }
+
+    var inst_map: std.AutoHashMapUnmanaged(Zir.Inst.Index, Zir.Inst.Index) = .{};
+    defer inst_map.deinit(gpa);
+
+    var extra_map: std.AutoHashMapUnmanaged(u32, u32) = .{};
+    defer extra_map.deinit(gpa);
+
+    try Module.mapOldZirToNew(gpa, old_zir, file.zir, &inst_map, &extra_map);
+
+    var bw = io.bufferedWriter(io.getStdOut().writer());
+    const stdout = bw.writer();
+    {
+        try stdout.print("Instruction mappings:\n", .{});
+        var it = inst_map.iterator();
+        while (it.next()) |entry| {
+            try stdout.print(" %{d} => %{d}\n", .{
+                entry.key, entry.value,
+            });
+        }
+    }
+    {
+        try stdout.print("Extra mappings:\n", .{});
+        var it = extra_map.iterator();
+        while (it.next()) |entry| {
+            try stdout.print(" {d} => {d}\n", .{
+                entry.key, entry.value,
+            });
+        }
+    }
+    try bw.flush();
+}
src/Module.zig
@@ -190,6 +190,7 @@ pub const Decl = struct {
     /// Index to ZIR `extra` array to the entry in the parent's decl structure
     /// (the part that says "for every decls_len"). The first item at this index is
     /// the contents hash, followed by line, name, etc.
+    /// For anonymous decls and also the root Decl for a File, this is 0.
     zir_decl_index: Zir.Inst.Index,
 
     /// Represents the "shallow" analysis status. For example, for decls that are functions,
@@ -329,6 +330,7 @@ pub const Decl = struct {
     }
 
     pub fn getNameZir(decl: Decl, zir: Zir) ?[:0]const u8 {
+        assert(decl.zir_decl_index != 0);
         const name_index = zir.extra[decl.zir_decl_index + 5];
         if (name_index <= 1) return null;
         return zir.nullTerminatedString(name_index);
@@ -340,24 +342,28 @@ pub const Decl = struct {
     }
 
     pub fn contentsHashZir(decl: Decl, zir: Zir) std.zig.SrcHash {
+        assert(decl.zir_decl_index != 0);
         const hash_u32s = zir.extra[decl.zir_decl_index..][0..4];
         const contents_hash = @bitCast(std.zig.SrcHash, hash_u32s.*);
         return contents_hash;
     }
 
     pub fn zirBlockIndex(decl: Decl) Zir.Inst.Index {
+        assert(decl.zir_decl_index != 0);
         const zir = decl.namespace.file_scope.zir;
         return zir.extra[decl.zir_decl_index + 6];
     }
 
     pub fn zirAlignRef(decl: Decl) Zir.Inst.Ref {
         if (!decl.has_align) return .none;
+        assert(decl.zir_decl_index != 0);
         const zir = decl.namespace.file_scope.zir;
         return @intToEnum(Zir.Inst.Ref, zir.extra[decl.zir_decl_index + 6]);
     }
 
     pub fn zirLinksectionRef(decl: Decl) Zir.Inst.Ref {
         if (!decl.has_linksection) return .none;
+        assert(decl.zir_decl_index != 0);
         const zir = decl.namespace.file_scope.zir;
         const extra_index = decl.zir_decl_index + 6 + @boolToInt(decl.has_align);
         return @intToEnum(Zir.Inst.Ref, zir.extra[extra_index]);
@@ -430,6 +436,15 @@ pub const Decl = struct {
         return tv.ty.zigTypeTag() == .Fn;
     }
 
+    /// If the Decl has a value and it is a struct, return it,
+    /// otherwise null.
+    pub fn getStruct(decl: Decl) ?*Struct {
+        if (!decl.has_tv) return null;
+        const ty = (decl.val.castTag(.ty) orelse return null).data;
+        const struct_obj = (ty.castTag(.@"struct") orelse return null).data;
+        return struct_obj;
+    }
+
     pub fn dump(decl: *Decl) void {
         const loc = std.zig.findLineColumn(decl.scope.source.bytes, decl.src);
         std.debug.print("{s}:{d}:{d} name={s} status={s}", .{
@@ -2335,7 +2350,8 @@ pub fn astGenFile(mod: *Module, file: *Scope.File, prog_node: *std.Progress.Node
         // We do not need to hold any locks at this time because all the Decl and Namespace
         // objects being touched are specific to this File, and the only other concurrent
         // tasks are touching other File objects.
-        @panic("TODO implement update references from old ZIR to new ZIR");
+        const change_list = try updateZirRefs(gpa, file, prev_zir);
+        @panic("TODO do something with change_list");
     }
 
     // TODO don't report compile errors until Sema @importFile
@@ -2350,6 +2366,177 @@ pub fn astGenFile(mod: *Module, file: *Scope.File, prog_node: *std.Progress.Node
     }
 }
 
+const UpdateChangeList = struct {
+    deleted: []*const Decl,
+    outdated: []*const Decl,
+
+    fn deinit(self: *UpdateChangeList, gpa: *Allocator) void {
+        gpa.free(self.deleted);
+        gpa.free(self.outdated);
+    }
+};
+
+/// Patch ups:
+/// * Struct.zir_index
+/// * Decl.zir_decl_index
+/// * Decl.name
+/// * Namespace.decl keys
+fn updateZirRefs(gpa: *Allocator, file: *Scope.File, old_zir: Zir) !UpdateChangeList {
+    const new_zir = file.zir;
+
+    // Maps from old ZIR to new ZIR, struct_decl, enum_decl, etc. Any instruction which
+    // creates a namespace, gets mapped from old to new here.
+    var inst_map: std.AutoHashMapUnmanaged(Zir.Inst.Index, Zir.Inst.Index) = .{};
+    defer inst_map.deinit(gpa);
+    // Maps from old ZIR to new ZIR, the extra data index for the sub-decl item.
+    // e.g. the thing that Decl.zir_decl_index points to.
+    var extra_map: std.AutoHashMapUnmanaged(u32, u32) = .{};
+    defer extra_map.deinit(gpa);
+
+    try mapOldZirToNew(gpa, old_zir, new_zir, &inst_map, &extra_map);
+
+    // Build string table for new ZIR.
+    var string_table: std.StringHashMapUnmanaged(u32) = .{};
+    defer string_table.deinit(gpa);
+    {
+        var i: usize = 2;
+        while (i < new_zir.string_bytes.len) {
+            const string = new_zir.nullTerminatedString(i);
+            try string_table.put(gpa, string, @intCast(u32, i));
+            i += string.len + 1;
+        }
+    }
+
+    // Walk the Decl graph.
+
+    var decl_stack: std.ArrayListUnmanaged(*Decl) = .{};
+    defer decl_stack.deinit(gpa);
+
+    const root_decl = file.namespace.getDecl();
+    try decl_stack.append(gpa, root_decl);
+
+    var deleted_decls: std.ArrayListUnmanaged(*Decl) = .{};
+    defer deleted_decls.deinit(gpa);
+    var outdated_decls: std.ArrayListUnmanaged(*Decl) = .{};
+    defer outdated_decls.deinit(gpa);
+
+    while (decl_stack.popOrNull()) |decl| {
+        // Anonymous decls and the root decl have this set to 0. We still need
+        // to walk them but we do not need to modify this value.
+        if (decl.zir_decl_index != 0) {
+            decl.zir_decl_index = extra_map.get(decl.zir_decl_index) orelse {
+                try deleted_decls.append(gpa, decl);
+                continue;
+            };
+            const new_name_index = string_table.get(mem.spanZ(decl.name)) orelse {
+                try deleted_decls.append(gpa, decl);
+                continue;
+            };
+            decl.name = new_zir.nullTerminatedString(new_name_index).ptr;
+
+            const old_hash = decl.contentsHashZir(old_zir);
+            const new_hash = decl.contentsHashZir(new_zir);
+            if (!std.zig.srcHashEql(old_hash, new_hash)) {
+                try outdated_decls.append(gpa, decl);
+            }
+        } else {
+            // TODO all decls should probably store source hash. Without this,
+            // we currently unnecessarily mark all anon decls outdated here.
+            try outdated_decls.append(gpa, decl);
+        }
+
+        if (!decl.has_tv) continue;
+
+        if (decl.getStruct()) |struct_obj| {
+            struct_obj.zir_index = inst_map.get(struct_obj.zir_index) orelse {
+                try deleted_decls.append(gpa, decl);
+                continue;
+            };
+        }
+
+        if (decl.val.getTypeNamespace()) |namespace| {
+            for (namespace.decls.items()) |*entry| {
+                const sub_decl = entry.value;
+                if (sub_decl.zir_decl_index != 0) {
+                    const new_key_index = string_table.get(entry.key) orelse {
+                        try deleted_decls.append(gpa, sub_decl);
+                        continue;
+                    };
+                    entry.key = new_zir.nullTerminatedString(new_key_index);
+                }
+                try decl_stack.append(gpa, sub_decl);
+            }
+        }
+    }
+
+    const outdated_slice = outdated_decls.toOwnedSlice(gpa);
+    const deleted_slice = deleted_decls.toOwnedSlice(gpa);
+
+    return UpdateChangeList{
+        .outdated = outdated_slice,
+        .deleted = deleted_slice,
+    };
+}
+
+pub fn mapOldZirToNew(
+    gpa: *Allocator,
+    old_zir: Zir,
+    new_zir: Zir,
+    inst_map: *std.AutoHashMapUnmanaged(Zir.Inst.Index, Zir.Inst.Index),
+    extra_map: *std.AutoHashMapUnmanaged(u32, u32),
+) Allocator.Error!void {
+    // Contain ZIR indexes of declaration instructions.
+    const MatchedZirDecl = struct {
+        old_inst: Zir.Inst.Index,
+        new_inst: Zir.Inst.Index,
+    };
+    var match_stack: std.ArrayListUnmanaged(MatchedZirDecl) = .{};
+    defer match_stack.deinit(gpa);
+
+    const old_main_struct_inst = old_zir.extra[@enumToInt(Zir.ExtraIndex.main_struct)] -
+        @intCast(u32, Zir.Inst.Ref.typed_value_map.len);
+    const new_main_struct_inst = new_zir.extra[@enumToInt(Zir.ExtraIndex.main_struct)] -
+        @intCast(u32, Zir.Inst.Ref.typed_value_map.len);
+
+    try match_stack.append(gpa, .{
+        .old_inst = old_main_struct_inst,
+        .new_inst = new_main_struct_inst,
+    });
+
+    while (match_stack.popOrNull()) |match_item| {
+        try inst_map.put(gpa, match_item.old_inst, match_item.new_inst);
+
+        // Maps name to extra index of decl sub item.
+        var decl_map: std.StringHashMapUnmanaged(u32) = .{};
+        defer decl_map.deinit(gpa);
+
+        {
+            var old_decl_it = old_zir.declIterator(match_item.old_inst);
+            while (old_decl_it.next()) |old_decl| {
+                try decl_map.put(gpa, old_decl.name, old_decl.sub_index);
+            }
+        }
+
+        var new_decl_it = new_zir.declIterator(match_item.new_inst);
+        while (new_decl_it.next()) |new_decl| {
+            const old_extra_index = decl_map.get(new_decl.name) orelse continue;
+            const new_extra_index = new_decl.sub_index;
+            try extra_map.put(gpa, old_extra_index, new_extra_index);
+
+            //var old_it = declInstIterator(old_zir, old_extra_index);
+            //var new_it = declInstIterator(new_zir, new_extra_index);
+            //while (true) {
+            //    const old_decl_inst = old_it.next() orelse break;
+            //    const new_decl_inst = new_it.next() orelse break;
+            //    try match_stack.append(gpa, .{
+            //        .old_inst = old_decl_inst,
+            //        .new_inst = new_decl_inst,
+            //    });
+            //}
+        }
+    }
+}
+
 pub fn ensureDeclAnalyzed(mod: *Module, decl: *Decl) InnerError!void {
     const tracy = trace(@src());
     defer tracy.end();
@@ -2487,7 +2674,6 @@ pub fn semaFile(mod: *Module, file: *Scope.File) InnerError!void {
     new_decl.is_exported = false;
     new_decl.has_align = false;
     new_decl.has_linksection = false;
-    new_decl.zir_decl_index = undefined;
     new_decl.ty = struct_ty;
     new_decl.val = struct_val;
     new_decl.has_tv = true;
@@ -3256,7 +3442,7 @@ fn allocateNewDecl(mod: *Module, namespace: *Scope.Namespace, src_node: ast.Node
         .linksection_val = undefined,
         .analysis = .unreferenced,
         .deletion_flag = false,
-        .zir_decl_index = undefined,
+        .zir_decl_index = 0,
         .link = switch (mod.comp.bin_file.tag) {
             .coff => .{ .coff = link.File.Coff.TextBlock.empty },
             .elf => .{ .elf = link.File.Elf.TextBlock.empty },
src/Zir.zig
@@ -3702,6 +3702,8 @@ const Writer = struct {
             const has_section = @truncate(u1, cur_bit_bag) != 0;
             cur_bit_bag >>= 1;
 
+            const sub_index = extra_index;
+
             const hash_u32s = self.code.extra[extra_index..][0..4];
             extra_index += 4;
             const line = self.code.extra[extra_index];
@@ -3738,8 +3740,8 @@ const Writer = struct {
                     raw_decl_name;
                 const test_str = if (raw_decl_name.len == 0) "test " else "";
                 const export_str = if (is_exported) "export " else "";
-                try stream.print("{s}{s}{s}{}", .{
-                    pub_str, test_str, export_str, std.zig.fmtId(decl_name),
+                try stream.print("[{d}] {s}{s}{s}{}", .{
+                    sub_index, pub_str, test_str, export_str, std.zig.fmtId(decl_name),
                 });
                 if (align_inst != .none) {
                     try stream.writeAll(" align(");
@@ -4334,3 +4336,108 @@ const Writer = struct {
         }
     }
 };
+
+pub const DeclIterator = struct {
+    extra_index: usize,
+    bit_bag_index: usize,
+    cur_bit_bag: u32,
+    decl_i: u32,
+    decls_len: u32,
+    zir: Zir,
+
+    pub const Item = struct {
+        name: [:0]const u8,
+        sub_index: u32,
+    };
+
+    pub fn next(it: *DeclIterator) ?Item {
+        if (it.decl_i >= it.decls_len) return null;
+
+        if (it.decl_i % 8 == 0) {
+            it.cur_bit_bag = it.zir.extra[it.bit_bag_index];
+            it.bit_bag_index += 1;
+        }
+        it.decl_i += 1;
+
+        const flags = @truncate(u4, it.cur_bit_bag);
+        it.cur_bit_bag >>= 4;
+
+        const sub_index = @intCast(u32, it.extra_index);
+        it.extra_index += 5; // src_hash(4) + line(1)
+        const name = it.zir.nullTerminatedString(it.zir.extra[it.extra_index]);
+        it.extra_index += 2; // name(1) + value(1)
+        it.extra_index += @truncate(u1, flags >> 2);
+        it.extra_index += @truncate(u1, flags >> 3);
+
+        return Item{
+            .sub_index = sub_index,
+            .name = name,
+        };
+    }
+};
+
+pub fn declIterator(zir: Zir, decl_inst: u32) DeclIterator {
+    const tags = zir.instructions.items(.tag);
+    const datas = zir.instructions.items(.data);
+    const decl_info: struct {
+        extra_index: usize,
+        decls_len: u32,
+    } = switch (tags[decl_inst]) {
+        .struct_decl,
+        .struct_decl_packed,
+        .struct_decl_extern,
+        => blk: {
+            const inst_data = datas[decl_inst].pl_node;
+            const extra = zir.extraData(Inst.StructDecl, inst_data.payload_index);
+            break :blk .{
+                .extra_index = extra.end,
+                .decls_len = extra.data.decls_len,
+            };
+        },
+
+        .union_decl,
+        .union_decl_packed,
+        .union_decl_extern,
+        => blk: {
+            const inst_data = datas[decl_inst].pl_node;
+            const extra = zir.extraData(Inst.UnionDecl, inst_data.payload_index);
+            break :blk .{
+                .extra_index = extra.end,
+                .decls_len = extra.data.decls_len,
+            };
+        },
+
+        .enum_decl,
+        .enum_decl_nonexhaustive,
+        => blk: {
+            const inst_data = datas[decl_inst].pl_node;
+            const extra = zir.extraData(Inst.EnumDecl, inst_data.payload_index);
+            break :blk .{
+                .extra_index = extra.end,
+                .decls_len = extra.data.decls_len,
+            };
+        },
+
+        .opaque_decl => blk: {
+            const inst_data = datas[decl_inst].pl_node;
+            const extra = zir.extraData(Inst.OpaqueDecl, inst_data.payload_index);
+            break :blk .{
+                .extra_index = extra.end,
+                .decls_len = extra.data.decls_len,
+            };
+        },
+
+        else => unreachable,
+    };
+
+    const bit_bags_count = std.math.divCeil(usize, decl_info.decls_len, 8) catch unreachable;
+
+    return .{
+        .zir = zir,
+        .extra_index = decl_info.extra_index + bit_bags_count,
+        .bit_bag_index = decl_info.extra_index,
+        .cur_bit_bag = undefined,
+        .decl_i = 0,
+        .decls_len = decl_info.decls_len,
+    };
+}
test/stage2/test.zig
@@ -28,13 +28,13 @@ pub fn addCases(ctx: *TestContext) !void {
 
         // Incorrect return type
         case.addError(
-            \\export fn _start() noreturn {
+            \\pub export fn _start() noreturn {
             \\}
         , &[_][]const u8{":2:1: error: expected noreturn, found void"});
 
         // Regular old hello world
         case.addCompareOutput(
-            \\export fn _start() noreturn {
+            \\pub export fn _start() noreturn {
             \\    print();
             \\
             \\    exit();
BRANCH_TODO
@@ -1,5 +1,11 @@
- * AstGen threadlocal
- * extern "foo" for vars and for functions
+ * implement the iterators that updateZirRefs needs
+   - for iterating over ZIR decls
+   - for iterating over ZIR instructions within a decl to find decl instructions
+ * implement `zig zirdiff a.zig b.zig` for showing debug output for how a particular
+   source transformation will be seen by the change detection algorithm.
+ * communicate the changelist back to the driver code and process it in semantic analysis,
+   handling deletions and outdatings.
+
  * namespace decls table can't reference ZIR memory because it can get modified on updates
    - change it for astgen worker to compare old and new ZIR, updating existing
      namespaces & decls, and creating a changelist.
@@ -55,4 +61,8 @@
    natural alignment for fields and do not have any comptime fields. this
    will save 16 bytes per struct field in the compilation.
 
+ * AstGen threadlocal
+ * extern "foo" for vars
 
+ * TODO all decls should probably store source hash. Without this,
+   we currently unnecessarily mark all anon decls outdated here.