Commit f51413d2cf

Andrew Kelley <andrew@ziglang.org>
2023-03-06 08:20:11
zig build: add an OOM-prevention system
The problem is that one may execute too many subprocesses concurrently that, together, exceed an RSS value that causes the OOM killer to kill something problematic such as the window manager. Or worse, nothing, and the system freezes. This is a real world problem. For example when building LLVM a simple `ninja install` will bring your system to its knees if you don't know that you should add `-DLLVM_PARALLEL_LINK_JOBS=1`. In particular: compiling the zig std lib tests takes about 2G each, which at 16x at once (8 cores + hyperthreading) is using all 32GB of my RAM, causing the OOM killer to kill my window manager The idea here is that you can annotate steps that might use a high amount of system resources with an upper bound. So for example I could mark the std lib tests as having an upper bound peak RSS of 3 GiB. Then the build system will do 2 things: 1. ulimit the child process, so that it will fail if it would exceed that memory limit. 2. Notice how much system RAM is available and avoid running too many concurrent jobs at once that would total more than that. This implements (1) not with an operating system enforced limit, but by checking the maxrss after a child process exits. However it does implement (2) correctly. The available memory used by the build system defaults to the total system memory, regardless of whether it is used by other processes at the time of spawning the build runner. This value can be overridden with the new --maxrss flag to `zig build`. This mechanism will ensure that the sum total of upper bound RSS memory of concurrent tasks will not exceed this value. This system makes it so that project maintainers can annotate problematic subprocesses, avoiding bug reports from users, who can blissfully execute `zig build` without worrying about the project's internals. Nobody's computer crashes, and the build system uses as much parallelism as possible without risking OOM. Users do not need to unnecessarily resort to -j1 when the build system can figure this out for them.
1 parent 3b29d00
lib/std/Build/CompileStep.zig
@@ -274,6 +274,7 @@ pub const Options = struct {
     kind: Kind,
     linkage: ?Linkage = null,
     version: ?std.builtin.Version = null,
+    max_rss: usize = 0,
 };
 
 pub const Kind = enum {
@@ -333,6 +334,7 @@ pub fn create(owner: *std.Build, options: Options) *CompileStep {
             .name = step_name,
             .owner = owner,
             .makeFn = make,
+            .max_rss = options.max_rss,
         }),
         .version = options.version,
         .out_filename = undefined,
lib/std/Build/Step.zig
@@ -2,14 +2,32 @@ id: Id,
 name: []const u8,
 owner: *Build,
 makeFn: MakeFn,
+
 dependencies: std.ArrayList(*Step),
 /// This field is empty during execution of the user's build script, and
 /// then populated during dependency loop checking in the build runner.
 dependants: std.ArrayListUnmanaged(*Step),
 state: State,
-/// The return addresss associated with creation of this step that can be useful
-/// to print along with debugging messages.
-debug_stack_trace: [n_debug_stack_frames]usize,
+/// Set this field to declare an upper bound on the amount of bytes of memory it will
+/// take to run the step. Zero means no limit.
+///
+/// The idea to annotate steps that might use a high amount of RAM with an
+/// upper bound. For example, perhaps a particular set of unit tests require 4
+/// GiB of RAM, and those tests will be run under 4 different build
+/// configurations at once. This would potentially require 16 GiB of memory on
+/// the system if all 4 steps executed simultaneously, which could easily be
+/// greater than what is actually available, potentially causing the system to
+/// crash when using `zig build` at the default concurrency level.
+///
+/// This field causes the build runner to do two things:
+/// 1. ulimit child processes, so that they will fail if it would exceed this
+/// memory limit. This serves to enforce that this upper bound value is
+/// correct.
+/// 2. Ensure that the set of concurrent steps at any given time have a total
+/// max_rss value that does not exceed the `max_total_rss` value of the build
+/// runner. This value is configurable on the command line, and defaults to the
+/// total system memory available.
+max_rss: usize,
 
 result_error_msgs: std.ArrayListUnmanaged([]const u8),
 result_error_bundle: std.zig.ErrorBundle,
@@ -18,6 +36,10 @@ result_duration_ns: ?u64,
 /// 0 means unavailable or not reported.
 result_peak_rss: usize,
 
+/// The return addresss associated with creation of this step that can be useful
+/// to print along with debugging messages.
+debug_stack_trace: [n_debug_stack_frames]usize,
+
 pub const MakeFn = *const fn (self: *Step, prog_node: *std.Progress.Node) anyerror!void;
 
 const n_debug_stack_frames = 4;
@@ -83,6 +105,7 @@ pub const Options = struct {
     owner: *Build,
     makeFn: MakeFn = makeNoOp,
     first_ret_addr: ?usize = null,
+    max_rss: usize = 0,
 };
 
 pub fn init(options: Options) Step {
@@ -104,6 +127,7 @@ pub fn init(options: Options) Step {
         .dependencies = std.ArrayList(*Step).init(arena),
         .dependants = .{},
         .state = .precheck_unstarted,
+        .max_rss = options.max_rss,
         .debug_stack_trace = addresses,
         .result_error_msgs = .{},
         .result_error_bundle = std.zig.ErrorBundle.empty,
@@ -117,15 +141,24 @@ pub fn init(options: Options) Step {
 /// have already reported the error. Otherwise, we add a simple error report
 /// here.
 pub fn make(s: *Step, prog_node: *std.Progress.Node) error{ MakeFailed, MakeSkipped }!void {
-    return s.makeFn(s, prog_node) catch |err| switch (err) {
+    const arena = s.owner.allocator;
+
+    s.makeFn(s, prog_node) catch |err| switch (err) {
         error.MakeFailed => return error.MakeFailed,
         error.MakeSkipped => return error.MakeSkipped,
         else => {
-            const gpa = s.dependencies.allocator;
-            s.result_error_msgs.append(gpa, @errorName(err)) catch @panic("OOM");
+            s.result_error_msgs.append(arena, @errorName(err)) catch @panic("OOM");
             return error.MakeFailed;
         },
     };
+
+    if (s.max_rss != 0 and s.result_peak_rss > s.max_rss) {
+        const msg = std.fmt.allocPrint(arena, "memory usage peaked at {d} bytes, exceeding the declared upper bound of {d}", .{
+            s.result_peak_rss, s.max_rss,
+        }) catch @panic("OOM");
+        s.result_error_msgs.append(arena, msg) catch @panic("OOM");
+        return error.MakeFailed;
+    }
 }
 
 pub fn dependOn(self: *Step, other: *Step) void {
lib/std/Build.zig
@@ -453,6 +453,7 @@ pub const ExecutableOptions = struct {
     target: CrossTarget = .{},
     optimize: std.builtin.Mode = .Debug,
     linkage: ?CompileStep.Linkage = null,
+    max_rss: usize = 0,
 };
 
 pub fn addExecutable(b: *Build, options: ExecutableOptions) *CompileStep {
@@ -464,6 +465,7 @@ pub fn addExecutable(b: *Build, options: ExecutableOptions) *CompileStep {
         .optimize = options.optimize,
         .kind = .exe,
         .linkage = options.linkage,
+        .max_rss = options.max_rss,
     });
 }
 
@@ -472,6 +474,7 @@ pub const ObjectOptions = struct {
     root_source_file: ?FileSource = null,
     target: CrossTarget,
     optimize: std.builtin.Mode,
+    max_rss: usize = 0,
 };
 
 pub fn addObject(b: *Build, options: ObjectOptions) *CompileStep {
@@ -481,6 +484,7 @@ pub fn addObject(b: *Build, options: ObjectOptions) *CompileStep {
         .target = options.target,
         .optimize = options.optimize,
         .kind = .obj,
+        .max_rss = options.max_rss,
     });
 }
 
@@ -490,6 +494,7 @@ pub const SharedLibraryOptions = struct {
     version: ?std.builtin.Version = null,
     target: CrossTarget,
     optimize: std.builtin.Mode,
+    max_rss: usize = 0,
 };
 
 pub fn addSharedLibrary(b: *Build, options: SharedLibraryOptions) *CompileStep {
@@ -501,6 +506,7 @@ pub fn addSharedLibrary(b: *Build, options: SharedLibraryOptions) *CompileStep {
         .version = options.version,
         .target = options.target,
         .optimize = options.optimize,
+        .max_rss = options.max_rss,
     });
 }
 
@@ -510,6 +516,7 @@ pub const StaticLibraryOptions = struct {
     target: CrossTarget,
     optimize: std.builtin.Mode,
     version: ?std.builtin.Version = null,
+    max_rss: usize = 0,
 };
 
 pub fn addStaticLibrary(b: *Build, options: StaticLibraryOptions) *CompileStep {
@@ -521,6 +528,7 @@ pub fn addStaticLibrary(b: *Build, options: StaticLibraryOptions) *CompileStep {
         .version = options.version,
         .target = options.target,
         .optimize = options.optimize,
+        .max_rss = options.max_rss,
     });
 }
 
@@ -531,6 +539,7 @@ pub const TestOptions = struct {
     target: CrossTarget = .{},
     optimize: std.builtin.Mode = .Debug,
     version: ?std.builtin.Version = null,
+    max_rss: usize = 0,
 };
 
 pub fn addTest(b: *Build, options: TestOptions) *CompileStep {
@@ -540,6 +549,7 @@ pub fn addTest(b: *Build, options: TestOptions) *CompileStep {
         .root_source_file = options.root_source_file,
         .target = options.target,
         .optimize = options.optimize,
+        .max_rss = options.max_rss,
     });
 }
 
@@ -548,6 +558,7 @@ pub const AssemblyOptions = struct {
     source_file: FileSource,
     target: CrossTarget,
     optimize: std.builtin.Mode,
+    max_rss: usize = 0,
 };
 
 pub fn addAssembly(b: *Build, options: AssemblyOptions) *CompileStep {
@@ -557,6 +568,7 @@ pub fn addAssembly(b: *Build, options: AssemblyOptions) *CompileStep {
         .root_source_file = null,
         .target = options.target,
         .optimize = options.optimize,
+        .max_rss = options.max_rss,
     });
     obj_step.addAssemblyFileSource(options.source_file.dupe(b));
     return obj_step;
lib/build_runner.zig
@@ -84,20 +84,21 @@ pub fn main() !void {
     );
     defer builder.destroy();
 
+    const Color = enum { auto, off, on };
+
     var targets = ArrayList([]const u8).init(arena);
     var debug_log_scopes = ArrayList([]const u8).init(arena);
     var thread_pool_options: std.Thread.Pool.Options = .{ .allocator = arena };
 
-    const stderr_stream = io.getStdErr().writer();
-    const stdout_stream = io.getStdOut().writer();
-
     var install_prefix: ?[]const u8 = null;
     var dir_list = std.Build.DirList{};
     var enable_summary: ?bool = null;
-
-    const Color = enum { auto, off, on };
+    var max_rss: usize = 0;
     var color: Color = .auto;
 
+    const stderr_stream = io.getStdErr().writer();
+    const stdout_stream = io.getStdOut().writer();
+
     while (nextArg(args, &arg_idx)) |arg| {
         if (mem.startsWith(u8, arg, "-D")) {
             const option_contents = arg[2..];
@@ -147,6 +148,18 @@ pub fn main() !void {
                     usageAndErr(builder, false, stderr_stream);
                 };
                 builder.sysroot = sysroot;
+            } else if (mem.eql(u8, arg, "--maxrss")) {
+                const max_rss_text = nextArg(args, &arg_idx) orelse {
+                    std.debug.print("Expected argument after --sysroot\n\n", .{});
+                    usageAndErr(builder, false, stderr_stream);
+                };
+                // TODO: support shorthand such as "2GiB", "2GB", or "2G"
+                max_rss = std.fmt.parseInt(usize, max_rss_text, 10) catch |err| {
+                    std.debug.print("invalid byte size: '{s}': {s}\n", .{
+                        max_rss_text, @errorName(err),
+                    });
+                    process.exit(1);
+                };
             } else if (mem.eql(u8, arg, "--search-prefix")) {
                 const search_prefix = nextArg(args, &arg_idx) orelse {
                     std.debug.print("Expected argument after --search-prefix\n\n", .{});
@@ -280,30 +293,55 @@ pub fn main() !void {
     if (builder.validateUserInputDidItFail())
         usageAndErr(builder, true, stderr_stream);
 
+    var run: Run = .{
+        .max_rss = max_rss,
+        .max_rss_is_default = false,
+        .max_rss_mutex = .{},
+        .memory_blocked_steps = std.ArrayList(*Step).init(arena),
+
+        .claimed_rss = 0,
+        .enable_summary = enable_summary,
+        .ttyconf = ttyconf,
+        .stderr = stderr,
+    };
+
+    if (run.max_rss == 0) {
+        run.max_rss = process.totalSystemMemory() catch std.math.maxInt(usize);
+        run.max_rss_is_default = true;
+    }
+
     runStepNames(
         arena,
         builder,
         targets.items,
         main_progress_node,
         thread_pool_options,
-        ttyconf,
-        stderr,
-        enable_summary,
+        &run,
     ) catch |err| switch (err) {
         error.UncleanExit => process.exit(1),
         else => return err,
     };
 }
 
+const Run = struct {
+    max_rss: usize,
+    max_rss_is_default: bool,
+    max_rss_mutex: std.Thread.Mutex,
+    memory_blocked_steps: std.ArrayList(*Step),
+
+    claimed_rss: usize,
+    enable_summary: ?bool,
+    ttyconf: std.debug.TTY.Config,
+    stderr: std.fs.File,
+};
+
 fn runStepNames(
     arena: std.mem.Allocator,
     b: *std.Build,
     step_names: []const []const u8,
     parent_prog_node: *std.Progress.Node,
     thread_pool_options: std.Thread.Pool.Options,
-    ttyconf: std.debug.TTY.Config,
-    stderr: std.fs.File,
-    enable_summary: ?bool,
+    run: *Run,
 ) !void {
     const gpa = b.allocator;
     var step_stack: std.AutoArrayHashMapUnmanaged(*Step, void) = .{};
@@ -331,6 +369,26 @@ fn runStepNames(
         };
     }
 
+    {
+        // Check that we have enough memory to complete the build.
+        var any_problems = false;
+        for (step_stack.keys()) |s| {
+            if (s.max_rss == 0) continue;
+            if (s.max_rss > run.max_rss) {
+                std.debug.print("{s}{s}: this step declares an upper bound of {d} bytes of memory, exceeding the available {d} bytes of memory\n", .{
+                    s.owner.dep_prefix, s.name, s.max_rss, run.max_rss,
+                });
+                any_problems = true;
+            }
+        }
+        if (any_problems) {
+            if (run.max_rss_is_default) {
+                std.debug.print("note: use --maxrss to override the default", .{});
+            }
+            return error.UncleanExit;
+        }
+    }
+
     var thread_pool: std.Thread.Pool = undefined;
     try thread_pool.init(thread_pool_options);
     defer thread_pool.deinit();
@@ -353,10 +411,11 @@ fn runStepNames(
 
             wait_group.start();
             thread_pool.spawn(workerMakeOneStep, .{
-                &wait_group, &thread_pool, b, step, &step_prog, ttyconf,
+                &wait_group, &thread_pool, b, step, &step_prog, run,
             }) catch @panic("OOM");
         }
     }
+    assert(run.memory_blocked_steps.items.len == 0);
 
     var success_count: usize = 0;
     var skipped_count: usize = 0;
@@ -396,9 +455,12 @@ fn runStepNames(
 
     // A proper command line application defaults to silently succeeding.
     // The user may request verbose mode if they have a different preference.
-    if (failure_count == 0 and enable_summary != true) return cleanExit();
+    if (failure_count == 0 and run.enable_summary != true) return cleanExit();
+
+    const ttyconf = run.ttyconf;
+    const stderr = run.stderr;
 
-    if (enable_summary != false) {
+    if (run.enable_summary != false) {
         const total_count = success_count + failure_count + pending_count + skipped_count;
         ttyconf.setColor(stderr, .Cyan) catch {};
         stderr.writeAll("Build Summary:") catch {};
@@ -407,7 +469,7 @@ fn runStepNames(
         if (skipped_count > 0) stderr.writer().print("; {d} skipped", .{skipped_count}) catch {};
         if (failure_count > 0) stderr.writer().print("; {d} failed", .{failure_count}) catch {};
 
-        if (enable_summary == null) {
+        if (run.enable_summary == null) {
             ttyconf.setColor(stderr, .Dim) catch {};
             stderr.writeAll(" (disable with -fno-summary)") catch {};
             ttyconf.setColor(stderr, .Reset) catch {};
@@ -623,7 +685,7 @@ fn workerMakeOneStep(
     b: *std.Build,
     s: *Step,
     prog_node: *std.Progress.Node,
-    ttyconf: std.debug.TTY.Config,
+    run: *Run,
 ) void {
     defer wg.finish();
 
@@ -646,10 +708,32 @@ fn workerMakeOneStep(
         }
     }
 
-    // Avoid running steps twice.
-    if (@cmpxchgStrong(Step.State, &s.state, .precheck_done, .running, .SeqCst, .SeqCst) != null) {
-        // Another worker got the job.
-        return;
+    if (s.max_rss != 0) {
+        run.max_rss_mutex.lock();
+        defer run.max_rss_mutex.unlock();
+
+        // Avoid running steps twice.
+        if (s.state != .precheck_done) {
+            // Another worker got the job.
+            return;
+        }
+
+        const new_claimed_rss = run.claimed_rss + s.max_rss;
+        if (new_claimed_rss > run.max_rss) {
+            // Running this step right now could possibly exceed the allotted RSS.
+            // Add this step to the queue of memory-blocked steps.
+            run.memory_blocked_steps.append(s) catch @panic("OOM");
+            return;
+        }
+
+        run.claimed_rss = new_claimed_rss;
+        s.state = .running;
+    } else {
+        // Avoid running steps twice.
+        if (@cmpxchgStrong(Step.State, &s.state, .precheck_done, .running, .SeqCst, .SeqCst) != null) {
+            // Another worker got the job.
+            return;
+        }
     }
 
     var sub_prog_node = prog_node.start(s.name, 0);
@@ -667,7 +751,8 @@ fn workerMakeOneStep(
         sub_prog_node.context.lock_stderr();
         defer sub_prog_node.context.unlock_stderr();
 
-        const stderr = std.io.getStdErr();
+        const stderr = run.stderr;
+        const ttyconf = run.ttyconf;
 
         for (s.result_error_msgs.items) |msg| {
             // Sometimes it feels like you just can't catch a break. Finally,
@@ -684,22 +769,55 @@ fn workerMakeOneStep(
         }
     }
 
-    if (make_result) |_| {
-        @atomicStore(Step.State, &s.state, .success, .SeqCst);
-    } else |err| switch (err) {
-        error.MakeFailed => {
-            @atomicStore(Step.State, &s.state, .failure, .SeqCst);
-            return;
-        },
-        error.MakeSkipped => @atomicStore(Step.State, &s.state, .skipped, .SeqCst),
+    handle_result: {
+        if (make_result) |_| {
+            @atomicStore(Step.State, &s.state, .success, .SeqCst);
+        } else |err| switch (err) {
+            error.MakeFailed => {
+                @atomicStore(Step.State, &s.state, .failure, .SeqCst);
+                break :handle_result;
+            },
+            error.MakeSkipped => @atomicStore(Step.State, &s.state, .skipped, .SeqCst),
+        }
+
+        // Successful completion of a step, so we queue up its dependants as well.
+        for (s.dependants.items) |dep| {
+            wg.start();
+            thread_pool.spawn(workerMakeOneStep, .{
+                wg, thread_pool, b, dep, prog_node, run,
+            }) catch @panic("OOM");
+        }
     }
 
-    // Successful completion of a step, so we queue up its dependants as well.
-    for (s.dependants.items) |dep| {
-        wg.start();
-        thread_pool.spawn(workerMakeOneStep, .{
-            wg, thread_pool, b, dep, prog_node, ttyconf,
-        }) catch @panic("OOM");
+    // If this is a step that claims resources, we must now queue up other
+    // steps that are waiting for resources.
+    if (s.max_rss != 0) {
+        run.max_rss_mutex.lock();
+        defer run.max_rss_mutex.unlock();
+
+        // Give the memory back to the scheduler.
+        run.claimed_rss -= s.max_rss;
+        // Avoid kicking off too many tasks that we already know will not have
+        // enough resources.
+        var remaining = run.max_rss - run.claimed_rss;
+        var i: usize = 0;
+        var j: usize = 0;
+        while (j < run.memory_blocked_steps.items.len) : (j += 1) {
+            const dep = run.memory_blocked_steps.items[j];
+            assert(dep.max_rss != 0);
+            if (dep.max_rss <= remaining) {
+                remaining -= dep.max_rss;
+
+                wg.start();
+                thread_pool.spawn(workerMakeOneStep, .{
+                    wg, thread_pool, b, dep, prog_node, run,
+                }) catch @panic("OOM");
+            } else {
+                run.memory_blocked_steps.items[i] = dep;
+                i += 1;
+            }
+        }
+        run.memory_blocked_steps.shrinkRetainingCapacity(i);
     }
 }
 
@@ -770,6 +888,7 @@ fn usage(builder: *std.Build, already_ran_build: bool, out_stream: anytype) !voi
         \\  --color [auto|off|on]        Enable or disable colored error messages
         \\  --prominent-compile-errors   Output compile errors formatted for a human to read
         \\  -j<N>                        Limit concurrent jobs (default is to use all CPU cores)
+        \\  --maxrss <bytes>             Limit memory usage (default is to use available memory)
         \\
         \\Project-Specific Options:
         \\