Commit 7e7d7875b9

mlugg <mlugg@mlugg.co.uk>
2025-08-21 14:44:43
std.Build: implement unit test timeouts
For now, there is a flag to `zig build` called `--test-timeout-ms` which accepts a value in milliseconds. If the execution time of any individual unit test exceeds that number of milliseconds, the test is terminated and marked as timed out. In the future, we may want to increase the granularity of this feature by allowing timeouts to be specified per-step or even per-test. However, a global option is actually very useful. In particular, it can be used in CI scripts to ensure that no individual unit test exceeds some reasonable limit (e.g. 60 seconds) without having to assign limits to every individual test step in the build script. Also, individual unit test durations are now shown in the time report web interface -- this was fairly trivial to add since we're timing tests (to check for timeouts) anyway. This commit makes progress on #19821, but does not close it, because that proposal includes a more sophisticated mechanism for setting timeouts. Co-Authored-By: David Rubin <david@vortan.dev>
1 parent 3377621
lib/build-web/index.html
@@ -139,6 +139,19 @@
         <div><slot name="llvm-pass-timings"></slot></div>
       </details>
     </div>
+    <div id="runTestReport">
+      <table class="time-stats">
+        <thead>
+          <tr>
+            <th scope="col">Test Name</th>
+            <th scope="col">Duration</th>
+          </tr>
+        </thead>
+        <!-- HTML does not allow placing a 'slot' inside of a 'tbody' for backwards-compatibility
+          reasons, so we unfortunately must template on the `id` here. -->
+        <tbody id="runTestTableBody"></tbody>
+      </div>
+    </div>
   </details>
 </template>
 
lib/build-web/main.js
@@ -46,8 +46,9 @@ WebAssembly.instantiateStreaming(wasm_promise, {
     updateCoverage: fuzzUpdateCoverage,
   },
   time_report: {
-    updateCompile: timeReportUpdateCompile,
     updateGeneric: timeReportUpdateGeneric,
+    updateCompile: timeReportUpdateCompile,
+    updateRunTest: timeReportUpdateRunTest,
   },
 }).then(function(obj) {
   setConnectionStatus("Connecting to WebSocket...", true);
@@ -248,6 +249,7 @@ function timeReportUpdateCompile(
 
   shadow.getElementById("genericReport").classList.add("hidden");
   shadow.getElementById("compileReport").classList.remove("hidden");
+  shadow.getElementById("runTestReport").classList.add("hidden");
 
   if (!use_llvm) shadow.querySelector(":host > details").classList.add("no-llvm");
   host.innerHTML = inner_html;
@@ -265,8 +267,26 @@ function timeReportUpdateGeneric(
   shadow.querySelector(":host > details").classList.remove("pending", "no-llvm");
   shadow.getElementById("genericReport").classList.remove("hidden");
   shadow.getElementById("compileReport").classList.add("hidden");
+  shadow.getElementById("runTestReport").classList.add("hidden");
   host.innerHTML = inner_html;
 }
+function timeReportUpdateRunTest(
+  step_idx,
+  table_html_ptr,
+  table_html_len,
+) {
+  const table_html = decodeString(table_html_ptr, table_html_len);
+  const host = domTimeReportList.children.item(step_idx);
+  const shadow = host.shadowRoot;
+
+  shadow.querySelector(":host > details").classList.remove("pending", "no-llvm");
+
+  shadow.getElementById("genericReport").classList.add("hidden");
+  shadow.getElementById("compileReport").classList.add("hidden");
+  shadow.getElementById("runTestReport").classList.remove("hidden");
+
+  shadow.getElementById("runTestTableBody").innerHTML = table_html;
+}
 
 const fuzz_entry_template = document.getElementById("fuzzEntryTemplate").content;
 const domFuzz = document.getElementById("fuzz");
lib/build-web/main.zig
@@ -94,6 +94,7 @@ export fn message_end() void {
 
         .time_report_generic_result => return time_report.genericResultMessage(msg_bytes) catch @panic("OOM"),
         .time_report_compile_result => return time_report.compileResultMessage(msg_bytes) catch @panic("OOM"),
+        .time_report_run_test_result => return time_report.runTestResultMessage(msg_bytes) catch @panic("OOM"),
     }
 }
 
lib/build-web/time_report.zig
@@ -27,6 +27,13 @@ const js = struct {
         /// Whether the LLVM backend was used. If not, LLVM-specific statistics are hidden.
         use_llvm: bool,
     ) void;
+    extern "time_report" fn updateRunTest(
+        /// The index of the step.
+        step_idx: u32,
+        // The HTML which will populate the <tbody> of the test table.
+        table_html_ptr: [*]const u8,
+        table_html_len: usize,
+    ) void;
 };
 
 pub fn genericResultMessage(msg_bytes: []u8) error{OutOfMemory}!void {
@@ -237,3 +244,37 @@ pub fn compileResultMessage(msg_bytes: []u8) error{ OutOfMemory, WriteFailed }!v
         hdr.flags.use_llvm,
     );
 }
+
+pub fn runTestResultMessage(msg_bytes: []u8) error{OutOfMemory}!void {
+    if (msg_bytes.len < @sizeOf(abi.RunTestResult)) @panic("malformed RunTestResult message");
+    const hdr: *const abi.RunTestResult = @ptrCast(msg_bytes[0..@sizeOf(abi.RunTestResult)]);
+    if (hdr.step_idx >= step_list.*.len) @panic("malformed RunTestResult message");
+    const trailing = msg_bytes[@sizeOf(abi.RunTestResult)..];
+
+    const durations: []align(1) const u64 = @ptrCast(trailing[0 .. hdr.tests_len * 8]);
+    var offset: usize = hdr.tests_len * 8;
+
+    var table_html: std.ArrayListUnmanaged(u8) = .empty;
+    defer table_html.deinit(gpa);
+
+    for (durations) |test_ns| {
+        const test_name_len = std.mem.indexOfScalar(u8, trailing[offset..], 0) orelse @panic("malformed RunTestResult message");
+        const test_name = trailing[offset..][0..test_name_len];
+        offset += test_name_len + 1;
+        try table_html.print(gpa, "<tr><th scope=\"row\"><code>{f}</code></th>", .{fmtEscapeHtml(test_name)});
+        if (test_ns == std.math.maxInt(u64)) {
+            try table_html.appendSlice(gpa, "<td class=\"empty-cell\"></td>"); // didn't run
+        } else {
+            try table_html.print(gpa, "<td>{D}</td>", .{test_ns});
+        }
+        try table_html.appendSlice(gpa, "</tr>\n");
+    }
+
+    if (offset != trailing.len) @panic("malformed RunTestResult message");
+
+    js.updateRunTest(
+        hdr.step_idx,
+        table_html.items.ptr,
+        table_html.items.len,
+    );
+}
lib/compiler/build_runner.zig
@@ -106,6 +106,7 @@ pub fn main() !void {
     var summary: ?Summary = null;
     var max_rss: u64 = 0;
     var skip_oom_steps = false;
+    var test_timeout_ms: ?u64 = null;
     var color: Color = .auto;
     var prominent_compile_errors = false;
     var help_menu = false;
@@ -175,6 +176,14 @@ pub fn main() !void {
                 };
             } else if (mem.eql(u8, arg, "--skip-oom-steps")) {
                 skip_oom_steps = true;
+            } else if (mem.eql(u8, arg, "--test-timeout-ms")) {
+                const millis_str = nextArgOrFatal(args, &arg_idx);
+                test_timeout_ms = std.fmt.parseInt(u64, millis_str, 10) catch |err| {
+                    std.debug.print("invalid millisecond count: '{s}': {s}\n", .{
+                        millis_str, @errorName(err),
+                    });
+                    process.exit(1);
+                };
             } else if (mem.eql(u8, arg, "--search-prefix")) {
                 const search_prefix = nextArgOrFatal(args, &arg_idx);
                 builder.addSearchPrefix(search_prefix);
@@ -448,6 +457,11 @@ pub fn main() !void {
         .max_rss_is_default = false,
         .max_rss_mutex = .{},
         .skip_oom_steps = skip_oom_steps,
+        .unit_test_timeout_ns = ns: {
+            const ms = test_timeout_ms orelse break :ns null;
+            break :ns std.math.mul(u64, ms, std.time.ns_per_ms) catch null;
+        },
+
         .watch = watch,
         .web_server = undefined, // set after `prepare`
         .memory_blocked_steps = .empty,
@@ -605,6 +619,7 @@ const Run = struct {
     max_rss_is_default: bool,
     max_rss_mutex: std.Thread.Mutex,
     skip_oom_steps: bool,
+    unit_test_timeout_ns: ?u64,
     watch: bool,
     web_server: if (!builtin.single_threaded) ?WebServer else ?noreturn,
     /// Allocated into `gpa`.
@@ -724,6 +739,7 @@ fn runStepNames(
     var test_fail_count: usize = 0;
     var test_pass_count: usize = 0;
     var test_leak_count: usize = 0;
+    var test_timeout_count: usize = 0;
     var test_count: usize = 0;
 
     var success_count: usize = 0;
@@ -736,6 +752,7 @@ fn runStepNames(
         test_fail_count += s.test_results.fail_count;
         test_skip_count += s.test_results.skip_count;
         test_leak_count += s.test_results.leak_count;
+        test_timeout_count += s.test_results.timeout_count;
         test_pass_count += s.test_results.passCount();
         test_count += s.test_results.test_count;
 
@@ -834,6 +851,7 @@ fn runStepNames(
         if (test_skip_count > 0) w.print("; {d} skipped", .{test_skip_count}) catch {};
         if (test_fail_count > 0) w.print("; {d} failed", .{test_fail_count}) catch {};
         if (test_leak_count > 0) w.print("; {d} leaked", .{test_leak_count}) catch {};
+        if (test_timeout_count > 0) w.print("; {d} timed out", .{test_timeout_count}) catch {};
 
         w.writeAll("\n") catch {};
 
@@ -995,7 +1013,10 @@ fn printStepStatus(
             try stderr.writeAll("\n");
             try ttyconf.setColor(stderr, .reset);
         },
-        .failure => try printStepFailure(s, stderr, ttyconf),
+        .failure => {
+            try printStepFailure(s, stderr, ttyconf);
+            try ttyconf.setColor(stderr, .reset);
+        },
     }
 }
 
@@ -1009,7 +1030,6 @@ fn printStepFailure(
         try stderr.print(" {d} errors\n", .{
             s.result_error_bundle.errorMessageCount(),
         });
-        try ttyconf.setColor(stderr, .reset);
     } else if (!s.test_results.isSuccess()) {
         try stderr.print(" {d}/{d} passed", .{
             s.test_results.passCount(), s.test_results.test_count,
@@ -1020,7 +1040,7 @@ fn printStepFailure(
             try stderr.print("{d} failed", .{
                 s.test_results.fail_count,
             });
-            try ttyconf.setColor(stderr, .reset);
+            try ttyconf.setColor(stderr, .white);
         }
         if (s.test_results.skip_count > 0) {
             try stderr.writeAll(", ");
@@ -1028,7 +1048,7 @@ fn printStepFailure(
             try stderr.print("{d} skipped", .{
                 s.test_results.skip_count,
             });
-            try ttyconf.setColor(stderr, .reset);
+            try ttyconf.setColor(stderr, .white);
         }
         if (s.test_results.leak_count > 0) {
             try stderr.writeAll(", ");
@@ -1036,18 +1056,24 @@ fn printStepFailure(
             try stderr.print("{d} leaked", .{
                 s.test_results.leak_count,
             });
-            try ttyconf.setColor(stderr, .reset);
+            try ttyconf.setColor(stderr, .white);
+        }
+        if (s.test_results.timeout_count > 0) {
+            try stderr.writeAll(", ");
+            try ttyconf.setColor(stderr, .red);
+            try stderr.print("{d} timed out", .{
+                s.test_results.timeout_count,
+            });
+            try ttyconf.setColor(stderr, .white);
         }
         try stderr.writeAll("\n");
     } else if (s.result_error_msgs.items.len > 0) {
         try ttyconf.setColor(stderr, .red);
         try stderr.writeAll(" failure\n");
-        try ttyconf.setColor(stderr, .reset);
     } else {
         assert(s.result_stderr.len > 0);
         try ttyconf.setColor(stderr, .red);
         try stderr.writeAll(" stderr\n");
-        try ttyconf.setColor(stderr, .reset);
     }
 }
 
@@ -1250,6 +1276,7 @@ fn workerMakeOneStep(
         .thread_pool = thread_pool,
         .watch = run.watch,
         .web_server = if (run.web_server) |*ws| ws else null,
+        .unit_test_timeout_ns = run.unit_test_timeout_ns,
         .gpa = run.gpa,
     });
 
@@ -1439,6 +1466,7 @@ fn printUsage(b: *std.Build, w: *Writer) !void {
         \\  -j<N>                        Limit concurrent jobs (default is to use all CPU cores)
         \\  --maxrss <bytes>             Limit memory usage (default is to use available memory)
         \\  --skip-oom-steps             Instead of failing, skip steps that would exceed --maxrss
+        \\  --test-timeout-ms <ms>       Limit execution time of unit tests, terminating if exceeded
         \\  --fetch[=mode]               Fetch dependency tree (optionally choose laziness) and exit
         \\    needed                     (Default) Lazy dependencies are fetched as needed
         \\    all                        Lazy dependencies are always fetched
lib/compiler/test_runner.zig
@@ -135,6 +135,10 @@ fn mainServer() !void {
                 var fail = false;
                 var skip = false;
                 is_fuzz_test = false;
+
+                // let the build server know we're starting the test now
+                try server.serveStringMessage(.test_started, &.{});
+
                 test_fn.func() catch |err| switch (err) {
                     error.SkipZigTest => skip = true,
                     else => {
lib/std/Build/Step/Run.zig
@@ -756,7 +756,6 @@ const IndexedOutput = struct {
     output: *Output,
 };
 fn make(step: *Step, options: Step.MakeOptions) !void {
-    const prog_node = options.progress_node;
     const b = step.owner;
     const arena = b.allocator;
     const run: *Run = @fieldParentPtr("step", step);
@@ -964,7 +963,7 @@ fn make(step: *Step, options: Step.MakeOptions) !void {
                 b.fmt("{s}{s}", .{ placeholder.output.prefix, arg_output_path });
         }
 
-        try runCommand(run, argv_list.items, has_side_effects, output_dir_path, prog_node, null);
+        try runCommand(run, argv_list.items, has_side_effects, output_dir_path, options, null);
         if (!has_side_effects) try step.writeManifestAndWatch(&man);
         return;
     };
@@ -997,7 +996,7 @@ fn make(step: *Step, options: Step.MakeOptions) !void {
         });
     }
 
-    try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, null);
+    try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, options, null);
 
     const dep_file_dir = std.fs.cwd();
     const dep_file_basename = dep_output_file.generated_file.getPath2(b, step);
@@ -1115,7 +1114,14 @@ pub fn rerunInFuzzMode(
     const has_side_effects = false;
     const rand_int = std.crypto.random.int(u64);
     const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int);
-    try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, .{
+    try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, .{
+        .progress_node = prog_node,
+        .thread_pool = undefined, // not used by `runCommand`
+        .watch = undefined, // not used by `runCommand`
+        .web_server = null, // only needed for time reports
+        .unit_test_timeout_ns = null, // don't time out fuzz tests for now
+        .gpa = undefined, // not used by `runCommand`
+    }, .{
         .unit_test_index = unit_test_index,
         .fuzz = fuzz,
     });
@@ -1196,7 +1202,7 @@ fn runCommand(
     argv: []const []const u8,
     has_side_effects: bool,
     output_dir_path: []const u8,
-    prog_node: std.Progress.Node,
+    options: Step.MakeOptions,
     fuzz_context: ?FuzzContext,
 ) !void {
     const step = &run.step;
@@ -1218,7 +1224,7 @@ fn runCommand(
 
     var env_map = run.env_map orelse &b.graph.env_map;
 
-    const result = spawnChildAndCollect(run, argv, env_map, has_side_effects, prog_node, fuzz_context) catch |err| term: {
+    const result = spawnChildAndCollect(run, argv, env_map, has_side_effects, options, fuzz_context) catch |err| term: {
         // InvalidExe: cpu arch mismatch
         // FileNotFound: can happen with a wrong dynamic linker path
         if (err == error.InvalidExe or err == error.FileNotFound) interpret: {
@@ -1357,7 +1363,7 @@ fn runCommand(
 
             try Step.handleVerbose2(step.owner, cwd, run.env_map, interp_argv.items);
 
-            break :term spawnChildAndCollect(run, interp_argv.items, env_map, has_side_effects, prog_node, fuzz_context) catch |e| {
+            break :term spawnChildAndCollect(run, interp_argv.items, env_map, has_side_effects, options, fuzz_context) catch |e| {
                 if (!run.failing_to_execute_foreign_is_an_error) return error.MakeSkipped;
 
                 return step.fail("unable to spawn interpreter {s}: {s}", .{
@@ -1372,8 +1378,14 @@ fn runCommand(
     step.result_duration_ns = result.elapsed_ns;
     step.result_peak_rss = result.peak_rss;
     step.test_results = result.stdio.test_results;
-    if (result.stdio.test_metadata) |tm|
+    if (result.stdio.test_metadata) |tm| {
         run.cached_test_metadata = tm.toCachedTestMetadata();
+        if (options.web_server) |ws| ws.updateTimeReportRunTest(
+            run,
+            &run.cached_test_metadata.?,
+            tm.ns_per_test,
+        );
+    }
 
     const final_argv = if (interp_argv.items.len == 0) argv else interp_argv.items;
 
@@ -1558,7 +1570,7 @@ fn spawnChildAndCollect(
     argv: []const []const u8,
     env_map: *EnvMap,
     has_side_effects: bool,
-    prog_node: std.Progress.Node,
+    options: Step.MakeOptions,
     fuzz_context: ?FuzzContext,
 ) !ChildProcResult {
     const b = run.step.owner;
@@ -1604,7 +1616,7 @@ fn spawnChildAndCollect(
     const inherit = child.stdout_behavior == .Inherit or child.stderr_behavior == .Inherit;
 
     if (run.stdio != .zig_test and !run.disable_zig_progress and !inherit) {
-        child.progress_node = prog_node;
+        child.progress_node = options.progress_node;
     }
 
     const term, const result, const elapsed_ns = t: {
@@ -1622,7 +1634,7 @@ fn spawnChildAndCollect(
         var timer = try std.time.Timer.start();
 
         const result = if (run.stdio == .zig_test)
-            try evalZigTest(run, &child, prog_node, fuzz_context)
+            try evalZigTest(run, &child, options, fuzz_context)
         else
             try evalGeneric(run, &child);
 
@@ -1647,13 +1659,15 @@ const StdIoResult = struct {
 fn evalZigTest(
     run: *Run,
     child: *std.process.Child,
-    prog_node: std.Progress.Node,
+    options: Step.MakeOptions,
     fuzz_context: ?FuzzContext,
 ) !StdIoResult {
     const gpa = run.step.owner.allocator;
     const arena = run.step.owner.allocator;
 
-    var poller = std.Io.poll(gpa, enum { stdout, stderr }, .{
+    const PollEnum = enum { stdout, stderr };
+
+    var poller = std.Io.poll(gpa, PollEnum, .{
         .stdout = child.stdout.?,
         .stderr = child.stderr.?,
     });
@@ -1692,21 +1706,126 @@ fn evalZigTest(
     var fail_count: u32 = 0;
     var skip_count: u32 = 0;
     var leak_count: u32 = 0;
+    var timeout_count: u32 = 0;
     var test_count: u32 = 0;
     var log_err_count: u32 = 0;
 
     var metadata: ?TestMetadata = null;
     var coverage_id: ?u64 = null;
 
+    var test_is_running = false;
+
+    // String allocated into `gpa`. Owned by this function while it runs, then moved to the `Step`.
+    var result_stderr: []u8 = &.{};
+    defer run.step.result_stderr = result_stderr;
+
+    // `null` means this host does not support `std.time.Timer`. This timer is `reset()` whenever we
+    // toggle `test_is_running`, i.e. whenever a test starts or finishes.
+    var timer: ?std.time.Timer = std.time.Timer.start() catch t: {
+        std.log.warn("std.time.Timer not supported on host; test timeouts will be ignored", .{});
+        break :t null;
+    };
+
     var sub_prog_node: ?std.Progress.Node = null;
     defer if (sub_prog_node) |n| n.end();
 
-    const stdout = poller.reader(.stdout);
-    const stderr = poller.reader(.stderr);
+    // This timeout is used when we're waiting on the test runner itself rather than a user-specified
+    // test. For instance, if the test runner leaves this much time between us requesting a test to
+    // start and it acknowledging the test starting, we terminate the child and raise an error. This
+    // *should* never happen, but could in theory be caused by some very unlucky IB in a test.
+    const response_timeout_ns = 30 * std.time.ns_per_s;
+
     const any_write_failed = first_write_failed or poll: while (true) {
+        // These are scoped inside the loop because we sometimes respawn the child and recreate
+        // `poller` which invaldiates these readers.
+        const stdout = poller.reader(.stdout);
+        const stderr = poller.reader(.stderr);
+
         const Header = std.zig.Server.Message.Header;
-        while (stdout.buffered().len < @sizeOf(Header)) if (!try poller.poll()) break :poll false;
+
+        // This block is exited when `stdout` contains enough bytes for a `Header`.
+        header_ready: {
+            if (stdout.buffered().len >= @sizeOf(Header)) {
+                // We already have one, no need to poll!
+                break :header_ready;
+            }
+
+            // Always `null` if `timer` is `null`.
+            const opt_timeout_ns: ?u64 = ns: {
+                if (timer == null) break :ns null;
+                if (!test_is_running) break :ns response_timeout_ns;
+                break :ns options.unit_test_timeout_ns;
+            };
+
+            if (opt_timeout_ns) |timeout_ns| {
+                const remaining_ns = timeout_ns -| timer.?.read();
+                if (!try poller.pollTimeout(remaining_ns)) break :poll false;
+            } else {
+                if (!try poller.poll()) break :poll false;
+            }
+
+            if (stdout.buffered().len >= @sizeOf(Header)) {
+                // There wasn't a header before, but there is one after the `poll`.
+                break :header_ready;
+            }
+
+            const timeout_ns = opt_timeout_ns orelse continue;
+            const cur_ns = timer.?.read();
+            if (cur_ns < timeout_ns) continue;
+
+            // There was a timeout.
+
+            if (!test_is_running) {
+                // The child stopped responding while *not* running a test. To avoid getting into
+                // a loop if something's broken, don't retry; just report an error and stop.
+                try run.step.addError("test runner failed to respond for {D}", .{cur_ns});
+                break :poll false;
+            }
+
+            // A test has probably just gotten stuck. We'll report an error, then just kill the
+            // child and continue with the next test in the list.
+
+            const md = &metadata.?;
+            const test_index = md.next_index - 1;
+
+            timeout_count += 1;
+            try run.step.addError(
+                "'{s}' timed out after {D}",
+                .{ md.testName(test_index), cur_ns },
+            );
+            if (stderr.buffered().len > 0) {
+                const new_bytes = stderr.buffered();
+                const old_len = result_stderr.len;
+                result_stderr = try gpa.realloc(result_stderr, old_len + new_bytes.len);
+                @memcpy(result_stderr[old_len..], new_bytes);
+            }
+
+            _ = try child.kill();
+            // Respawn the test runner. There's a double-cleanup if this fails, but that's
+            // fine because our caller's `kill` will just return `error.AlreadyTerminated`.
+            try child.spawn();
+            try child.waitForSpawn();
+
+            // After respawning the child, we must update the poller's streams.
+            poller.deinit();
+            poller = std.Io.poll(gpa, PollEnum, .{
+                .stdout = child.stdout.?,
+                .stderr = child.stderr.?,
+            });
+
+            test_is_running = false;
+            md.ns_per_test[test_index] = timer.?.lap();
+
+            requestNextTest(child.stdin.?, md, &sub_prog_node) catch |err| {
+                try run.step.addError("unable to write stdin: {s}", .{@errorName(err)});
+                break :poll true;
+            };
+
+            continue :poll; // continue work with the new (respawned) child
+        }
+        // There is definitely a header available now -- read it.
         const header = stdout.takeStruct(Header, .little) catch unreachable;
+
         while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll false;
         const body = stdout.take(header.bytes_len) catch unreachable;
         switch (header.tag) {
@@ -1720,6 +1839,12 @@ fn evalZigTest(
             },
             .test_metadata => {
                 assert(fuzz_context == null);
+
+                // `metadata` would only be populated if we'd already seen a `test_metadata`, but we
+                // only request it once (and importantly, we don't re-request it if we kill and
+                // restart the test runner).
+                assert(metadata == null);
+
                 const TmHdr = std.zig.Server.Message.TestMetadata;
                 const tm_hdr = @as(*align(1) const TmHdr, @ptrCast(body));
                 test_count = tm_hdr.tests_len;
@@ -1730,32 +1855,42 @@ fn evalZigTest(
 
                 const names = std.mem.bytesAsSlice(u32, names_bytes);
                 const expected_panic_msgs = std.mem.bytesAsSlice(u32, expected_panic_msgs_bytes);
+
                 const names_aligned = try arena.alloc(u32, names.len);
                 for (names_aligned, names) |*dest, src| dest.* = src;
 
                 const expected_panic_msgs_aligned = try arena.alloc(u32, expected_panic_msgs.len);
                 for (expected_panic_msgs_aligned, expected_panic_msgs) |*dest, src| dest.* = src;
 
-                prog_node.setEstimatedTotalItems(names.len);
+                options.progress_node.setEstimatedTotalItems(names.len);
                 metadata = .{
                     .string_bytes = try arena.dupe(u8, string_bytes),
+                    .ns_per_test = try arena.alloc(u64, test_count),
                     .names = names_aligned,
                     .expected_panic_msgs = expected_panic_msgs_aligned,
                     .next_index = 0,
-                    .prog_node = prog_node,
+                    .prog_node = options.progress_node,
                 };
+                @memset(metadata.?.ns_per_test, std.math.maxInt(u64));
+
+                test_is_running = false;
+                if (timer) |*t| t.reset();
 
                 requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node) catch |err| {
                     try run.step.addError("unable to write stdin: {s}", .{@errorName(err)});
                     break :poll true;
                 };
             },
+            .test_started => {
+                test_is_running = true;
+                if (timer) |*t| t.reset();
+            },
             .test_results => {
                 assert(fuzz_context == null);
-                const md = metadata.?;
+                const md = &metadata.?;
 
                 const TrHdr = std.zig.Server.Message.TestResults;
-                const tr_hdr = @as(*align(1) const TrHdr, @ptrCast(body));
+                const tr_hdr: *align(1) const TrHdr = @ptrCast(body);
                 fail_count +|= @intFromBool(tr_hdr.flags.fail);
                 skip_count +|= @intFromBool(tr_hdr.flags.skip);
                 leak_count +|= @intFromBool(tr_hdr.flags.leak);
@@ -1764,7 +1899,7 @@ fn evalZigTest(
                 if (tr_hdr.flags.fuzz) try run.fuzz_tests.append(gpa, tr_hdr.index);
 
                 if (tr_hdr.flags.fail or tr_hdr.flags.leak or tr_hdr.flags.log_err_count > 0) {
-                    const name = std.mem.sliceTo(md.string_bytes[md.names[tr_hdr.index]..], 0);
+                    const name = std.mem.sliceTo(md.testName(tr_hdr.index), 0);
                     const stderr_contents = stderr.buffered();
                     stderr.toss(stderr_contents.len);
                     const msg = std.mem.trim(u8, stderr_contents, "\n");
@@ -1783,7 +1918,10 @@ fn evalZigTest(
                     }
                 }
 
-                requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node) catch |err| {
+                test_is_running = false;
+                if (timer) |*t| md.ns_per_test[tr_hdr.index] = t.lap();
+
+                requestNextTest(child.stdin.?, md, &sub_prog_node) catch |err| {
                     try run.step.addError("unable to write stdin: {s}", .{@errorName(err)});
                     break :poll true;
                 };
@@ -1831,9 +1969,12 @@ fn evalZigTest(
         while (try poller.poll()) {}
     }
 
-    const stderr_contents = std.mem.trim(u8, stderr.buffered(), "\n");
-    if (stderr_contents.len > 0) {
-        run.step.result_stderr = try arena.dupe(u8, stderr_contents);
+    const stderr = poller.reader(.stderr);
+    if (stderr.buffered().len > 0) {
+        const new_bytes = stderr.buffered();
+        const old_len = result_stderr.len;
+        result_stderr = try gpa.realloc(result_stderr, old_len + new_bytes.len);
+        @memcpy(result_stderr[old_len..], new_bytes);
     }
 
     // Send EOF to stdin.
@@ -1848,6 +1989,7 @@ fn evalZigTest(
             .fail_count = fail_count,
             .skip_count = skip_count,
             .leak_count = leak_count,
+            .timeout_count = timeout_count,
             .log_err_count = log_err_count,
         },
         .test_metadata = metadata,
@@ -1856,6 +1998,7 @@ fn evalZigTest(
 
 const TestMetadata = struct {
     names: []const u32,
+    ns_per_test: []u64,
     expected_panic_msgs: []const u32,
     string_bytes: []const u8,
     next_index: u32,
@@ -1896,6 +2039,7 @@ fn requestNextTest(in: fs.File, metadata: *TestMetadata, sub_prog_node: *?std.Pr
         try sendRunTestMessage(in, .run_test, i);
         return;
     } else {
+        metadata.next_index = std.math.maxInt(u32); // indicate that all tests are done
         try sendMessage(in, .exit);
     }
 }
lib/std/Build/abi.zig
@@ -56,6 +56,7 @@ pub const ToClientTag = enum(u8) {
     // `--time-report`
     time_report_generic_result,
     time_report_compile_result,
+    time_report_run_test_result,
 
     _,
 };
@@ -342,4 +343,19 @@ pub const time_report = struct {
             };
         };
     };
+
+    /// WebSocket server->client.
+    ///
+    /// Sent after a `Step.Run` for a Zig test executable finishes, providing the test's time report.
+    ///
+    /// Trailing:
+    /// * for each `tests_len`:
+    ///   * `test_ns: u64` (nanoseconds spent running this test)
+    /// * for each `tests_len`:
+    ///   * `name` (null-terminated UTF-8 string)
+    pub const RunTestResult = extern struct {
+        tag: ToClientTag = .time_report_run_test_result,
+        step_idx: u32 align(1),
+        tests_len: u32 align(1),
+    };
 };
lib/std/Build/Step.zig
@@ -66,15 +66,16 @@ pub const TestResults = struct {
     fail_count: u32 = 0,
     skip_count: u32 = 0,
     leak_count: u32 = 0,
+    timeout_count: u32 = 0,
     log_err_count: u32 = 0,
     test_count: u32 = 0,
 
     pub fn isSuccess(tr: TestResults) bool {
-        return tr.fail_count == 0 and tr.leak_count == 0 and tr.log_err_count == 0;
+        return tr.fail_count == 0 and tr.leak_count == 0 and tr.log_err_count == 0 and tr.timeout_count == 0;
     }
 
     pub fn passCount(tr: TestResults) u32 {
-        return tr.test_count - tr.fail_count - tr.skip_count;
+        return tr.test_count - tr.fail_count - tr.skip_count - tr.timeout_count;
     }
 };
 
@@ -88,6 +89,8 @@ pub const MakeOptions = struct {
         // it currently breaks because `std.net.Address` doesn't work there. Work around for now.
         .wasm32 => void,
     },
+    /// If set, this is a timeout to enforce on all individual unit tests, in nanoseconds.
+    unit_test_timeout_ns: ?u64,
     /// Not to be confused with `Build.allocator`, which is an alias of `Build.graph.arena`.
     gpa: Allocator,
 };
@@ -243,6 +246,7 @@ pub fn make(s: *Step, options: MakeOptions) error{ MakeFailed, MakeSkipped }!voi
     var timer: ?std.time.Timer = t: {
         if (!s.owner.graph.time_report) break :t null;
         if (s.id == .compile) break :t null;
+        if (s.id == .run and s.cast(Run).?.stdio == .zig_test) break :t null;
         break :t std.time.Timer.start() catch @panic("--time-report not supported on this host");
     };
     const make_result = s.makeFn(s, options);
@@ -513,7 +517,6 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool, web_server: ?*Build.
         const header = stdout.takeStruct(Header, .little) catch unreachable;
         while (stdout.buffered().len < header.bytes_len) if (!try zp.poller.poll()) break :poll;
         const body = stdout.take(header.bytes_len) catch unreachable;
-
         switch (header.tag) {
             .zig_version => {
                 if (!std.mem.eql(u8, builtin.zig_version_string, body)) {
lib/std/Build/WebServer.zig
@@ -751,6 +751,64 @@ pub fn updateTimeReportGeneric(ws: *WebServer, step: *Build.Step, ns_total: u64)
     ws.notifyUpdate();
 }
 
+pub fn updateTimeReportRunTest(
+    ws: *WebServer,
+    run: *Build.Step.Run,
+    tests: *const Build.Step.Run.CachedTestMetadata,
+    ns_per_test: []const u64,
+) void {
+    const gpa = ws.gpa;
+
+    const step_idx: u32 = for (ws.all_steps, 0..) |s, i| {
+        if (s == &run.step) break @intCast(i);
+    } else unreachable;
+
+    assert(tests.names.len == ns_per_test.len);
+    const tests_len: u32 = @intCast(tests.names.len);
+
+    const new_len: u64 = len: {
+        var names_len: u64 = 0;
+        for (0..tests_len) |i| {
+            names_len += tests.testName(@intCast(i)).len + 1;
+        }
+        break :len @sizeOf(abi.time_report.RunTestResult) + names_len + 8 * tests_len;
+    };
+    const old_buf = old: {
+        ws.time_report_mutex.lock();
+        defer ws.time_report_mutex.unlock();
+        const old = ws.time_report_msgs[step_idx];
+        ws.time_report_msgs[step_idx] = &.{};
+        break :old old;
+    };
+    const buf = gpa.realloc(old_buf, new_len) catch @panic("out of memory");
+
+    const out_header: *align(1) abi.time_report.RunTestResult = @ptrCast(buf[0..@sizeOf(abi.time_report.RunTestResult)]);
+    out_header.* = .{
+        .step_idx = step_idx,
+        .tests_len = tests_len,
+    };
+    var offset: usize = @sizeOf(abi.time_report.RunTestResult);
+    const ns_per_test_out: []align(1) u64 = @ptrCast(buf[offset..][0 .. tests_len * 8]);
+    @memcpy(ns_per_test_out, ns_per_test);
+    offset += tests_len * 8;
+    for (0..tests_len) |i| {
+        const name = tests.testName(@intCast(i));
+        @memcpy(buf[offset..][0..name.len], name);
+        buf[offset..][name.len] = 0;
+        offset += name.len + 1;
+    }
+    assert(offset == buf.len);
+
+    {
+        ws.time_report_mutex.lock();
+        defer ws.time_report_mutex.unlock();
+        assert(ws.time_report_msgs[step_idx].len == 0);
+        ws.time_report_msgs[step_idx] = buf;
+        ws.time_report_update_times[step_idx] = ws.now();
+    }
+    ws.notifyUpdate();
+}
+
 const RunnerRequest = union(enum) {
     rebuild,
 };
lib/std/zig/Server.zig
@@ -34,6 +34,12 @@ pub const Message = struct {
         test_metadata,
         /// Body is a TestResults
         test_results,
+        /// Does not have a body.
+        /// Notifies the build runner that the next test (requested by `Client.Message.Tag.run_test`)
+        /// is starting execution. This message helps to ensure that the timestamp used by the build
+        /// runner to enforce unit test time limits is relatively accurate under extreme system load
+        /// (where there may be a non-trivial delay before the test process is scheduled).
+        test_started,
         /// Body is a series of strings, delimited by null bytes.
         /// Each string is a prefixed file path.
         /// The first byte indicates the file prefix path (see prefixes fields