Commit 8f2f12f940

Andrew Kelley <andrew@ziglang.org>
2023-09-28 03:20:00
Compilation: introduce saveState()
This commit introduces `--debug-incremental` so that we can start playing around with incremental compilation while it is still being developed, and before it is enabled by default. Currently it saves InternPool data, and has TODO comments for the remaining things. Deserialization is not implemented yet, which will require some post-processing such as to build a string map out of null-terminated string table bytes. The saved compiler state is stored in a file called <root-name>.zcs alongside <root-name>.o, <root-name>.pdb, <root-name>.exe, etc. In case of using the zig build system, these files are all in a zig-cache directory. For the self-hosted compiler, here is one data point on the performance penalty of saving this data: ``` Benchmark 1 (3 runs): zig build-exe ... measurement mean ± σ min … max outliers delta wall_time 51.1s ± 354ms 50.7s … 51.4s 0 ( 0%) 0% peak_rss 3.91GB ± 354KB 3.91GB … 3.91GB 0 ( 0%) 0% cpu_cycles 212G ± 3.17G 210G … 216G 0 ( 0%) 0% instructions 274G ± 57.5M 274G … 275G 0 ( 0%) 0% cache_references 13.1G ± 97.6M 13.0G … 13.2G 0 ( 0%) 0% cache_misses 1.12G ± 24.6M 1.10G … 1.15G 0 ( 0%) 0% branch_misses 1.53G ± 1.46M 1.53G … 1.53G 0 ( 0%) 0% Benchmark 2 (3 runs): zig build-exe ... --debug-incremental measurement mean ± σ min … max outliers delta wall_time 51.8s ± 271ms 51.5s … 52.1s 0 ( 0%) + 1.3% ± 1.4% peak_rss 3.91GB ± 317KB 3.91GB … 3.91GB 0 ( 0%) - 0.0% ± 0.0% cpu_cycles 213G ± 398M 212G … 213G 0 ( 0%) + 0.3% ± 2.4% instructions 275G ± 79.1M 275G … 275G 0 ( 0%) + 0.1% ± 0.1% cache_references 13.1G ± 26.9M 13.0G … 13.1G 0 ( 0%) - 0.1% ± 1.2% cache_misses 1.12G ± 5.66M 1.11G … 1.12G 0 ( 0%) - 0.6% ± 3.6% branch_misses 1.53G ± 1.75M 1.53G … 1.54G 0 ( 0%) + 0.2% ± 0.2% ``` At the end of each compilation with `--debug-incremental`, we end up with a 43 MiB `zig.zcs` file that contains all of the InternPool data serialized. Of course, it will necessarily be more expensive to save the state than to not save the state. However, this data point shows just how cheap the save state operation is, with all of the groundwork laid for using a serialization-friendly in-memory data layout.
1 parent 077994a
Changed files (2)
src/Compilation.zig
@@ -2702,6 +2702,72 @@ pub fn makeBinFileWritable(self: *Compilation) !void {
     return self.bin_file.makeWritable();
 }
 
+const Header = extern struct {
+    intern_pool: extern struct {
+        items_len: u32,
+        extra_len: u32,
+        limbs_len: u32,
+        string_bytes_len: u32,
+    },
+};
+
+/// Note that all state that is included in the cache hash namespace is *not*
+/// saved, such as the target and most CLI flags. A cache hit will only occur
+/// when subsequent compiler invocations use the same set of flags.
+pub fn saveState(comp: *Compilation) !void {
+    var bufs_list: [6]std.os.iovec_const = undefined;
+    var bufs_len: usize = 0;
+
+    const emit = comp.bin_file.options.emit orelse return;
+
+    if (comp.bin_file.options.module) |mod| {
+        const ip = &mod.intern_pool;
+        const header: Header = .{
+            .intern_pool = .{
+                .items_len = @intCast(ip.items.len),
+                .extra_len = @intCast(ip.extra.items.len),
+                .limbs_len = @intCast(ip.limbs.items.len),
+                .string_bytes_len = @intCast(ip.string_bytes.items.len),
+            },
+        };
+        addBuf(&bufs_list, &bufs_len, mem.asBytes(&header));
+        addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.limbs.items));
+        addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.extra.items));
+        addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.items.items(.data)));
+        addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.items.items(.tag)));
+        addBuf(&bufs_list, &bufs_len, ip.string_bytes.items);
+
+        // TODO: compilation errors
+        // TODO: files
+        // TODO: namespaces
+        // TODO: decls
+        // TODO: linker state
+    }
+    var basename_buf: [255]u8 = undefined;
+    const basename = std.fmt.bufPrint(&basename_buf, "{s}.zcs", .{
+        comp.bin_file.options.root_name,
+    }) catch o: {
+        basename_buf[basename_buf.len - 4 ..].* = ".zcs".*;
+        break :o &basename_buf;
+    };
+
+    // Using an atomic file prevents a crash or power failure from corrupting
+    // the previous incremental compilation state.
+    var af = try emit.directory.handle.atomicFile(basename, .{});
+    defer af.deinit();
+    try af.file.pwritevAll(bufs_list[0..bufs_len], 0);
+    try af.finish();
+}
+
+fn addBuf(bufs_list: []std.os.iovec_const, bufs_len: *usize, buf: []const u8) void {
+    const i = bufs_len.*;
+    bufs_len.* = i + 1;
+    bufs_list[i] = .{
+        .iov_base = buf.ptr,
+        .iov_len = buf.len,
+    };
+}
+
 /// This function is temporally single-threaded.
 pub fn totalErrorCount(self: *Compilation) u32 {
     var total: usize = self.failed_c_objects.count() +
src/main.zig
@@ -594,6 +594,7 @@ const usage_build_generic =
     \\  --debug-log [scope]          Enable printing debug/info log messages for scope
     \\  --debug-compile-errors       Crash with helpful diagnostics at the first compile error
     \\  --debug-link-snapshot        Enable dumping of the linker's state in JSON format
+    \\  --debug-incremental          Enable experimental feature: incremental compilation
     \\
 ;
 
@@ -904,6 +905,7 @@ fn buildOutputType(
     var minor_subsystem_version: ?u32 = null;
     var wasi_exec_model: ?std.builtin.WasiExecModel = null;
     var enable_link_snapshots: bool = false;
+    var debug_incremental: bool = false;
     var install_name: ?[]const u8 = null;
     var hash_style: link.HashStyle = .both;
     var entitlements: ?[]const u8 = null;
@@ -1272,6 +1274,8 @@ fn buildOutputType(
                         } else {
                             enable_link_snapshots = true;
                         }
+                    } else if (mem.eql(u8, arg, "--debug-incremental")) {
+                        debug_incremental = true;
                     } else if (mem.eql(u8, arg, "--entitlements")) {
                         entitlements = args_iter.nextOrFatal();
                     } else if (mem.eql(u8, arg, "-fcompiler-rt")) {
@@ -3591,11 +3595,16 @@ fn buildOutputType(
     }
 
     updateModule(comp) catch |err| switch (err) {
-        error.SemanticAnalyzeFail => if (listen == .none) process.exit(1),
+        error.SemanticAnalyzeFail => {
+            assert(listen == .none);
+            saveState(comp, debug_incremental);
+            process.exit(1);
+        },
         else => |e| return e,
     };
     if (build_options.only_c) return cleanExit();
     try comp.makeBinFileExecutable();
+    saveState(comp, debug_incremental);
 
     if (test_exec_args.items.len == 0 and object_format == .c) default_exec_args: {
         // Default to using `zig run` to execute the produced .c code from `zig test`.
@@ -3658,6 +3667,14 @@ fn buildOutputType(
     return cleanExit();
 }
 
+fn saveState(comp: *Compilation, debug_incremental: bool) void {
+    if (debug_incremental) {
+        comp.saveState() catch |err| {
+            warn("unable to save incremental compilation state: {s}", .{@errorName(err)});
+        };
+    }
+}
+
 fn serve(
     comp: *Compilation,
     in: fs.File,