Commit 9751a0ae04

Andrew Kelley <superjoe30@gmail.com>
2018-07-12 01:38:01
std.atomic: use spinlocks
the lock-free data structures all had ABA problems and std.atomic.Stack had a possibility to load an unmapped memory address.
1 parent 9bdcd2a
std/atomic/index.zig
@@ -1,11 +1,9 @@
 pub const Stack = @import("stack.zig").Stack;
-pub const QueueMpsc = @import("queue_mpsc.zig").QueueMpsc;
-pub const QueueMpmc = @import("queue_mpmc.zig").QueueMpmc;
+pub const Queue = @import("queue.zig").Queue;
 pub const Int = @import("int.zig").Int;
 
 test "std.atomic" {
     _ = @import("stack.zig");
-    _ = @import("queue_mpsc.zig");
-    _ = @import("queue_mpmc.zig");
+    _ = @import("queue.zig");
     _ = @import("int.zig");
 }
std/atomic/queue_mpmc.zig → std/atomic/queue.zig
@@ -2,15 +2,13 @@ const builtin = @import("builtin");
 const AtomicOrder = builtin.AtomicOrder;
 const AtomicRmwOp = builtin.AtomicRmwOp;
 
-/// Many producer, many consumer, non-allocating, thread-safe, lock-free
-/// This implementation has a crippling limitation - it hangs onto node
-/// memory for 1 extra get() and 1 extra put() operation - when get() returns a node, that
-/// node must not be freed until both the next get() and the next put() completes.
-pub fn QueueMpmc(comptime T: type) type {
+/// Many producer, many consumer, non-allocating, thread-safe.
+/// Uses a spinlock to protect get() and put().
+pub fn Queue(comptime T: type) type {
     return struct {
-        head: *Node,
-        tail: *Node,
-        root: Node,
+        head: ?*Node,
+        tail: ?*Node,
+        lock: u8,
 
         pub const Self = this;
 
@@ -19,31 +17,48 @@ pub fn QueueMpmc(comptime T: type) type {
             data: T,
         };
 
-        /// TODO: well defined copy elision: https://github.com/ziglang/zig/issues/287
-        pub fn init(self: *Self) void {
-            self.root.next = null;
-            self.head = &self.root;
-            self.tail = &self.root;
+        pub fn init() Self {
+            return Self{
+                .head = null,
+                .tail = null,
+                .lock = 0,
+            };
         }
 
         pub fn put(self: *Self, node: *Node) void {
             node.next = null;
 
-            const tail = @atomicRmw(*Node, &self.tail, AtomicRmwOp.Xchg, node, AtomicOrder.SeqCst);
-            _ = @atomicRmw(?*Node, &tail.next, AtomicRmwOp.Xchg, node, AtomicOrder.SeqCst);
+            while (@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) != 0) {}
+            defer assert(@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst) == 1);
+
+            const opt_tail = self.tail;
+            self.tail = node;
+            if (opt_tail) |tail| {
+                tail.next = node;
+            } else {
+                assert(self.head == null);
+                self.head = node;
+            }
         }
 
-        /// node must not be freed until both the next get() and the next put() complete
         pub fn get(self: *Self) ?*Node {
-            var head = @atomicLoad(*Node, &self.head, AtomicOrder.SeqCst);
-            while (true) {
-                const node = head.next orelse return null;
-                head = @cmpxchgWeak(*Node, &self.head, head, node, AtomicOrder.SeqCst, AtomicOrder.SeqCst) orelse return node;
-            }
+            while (@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) != 0) {}
+            defer assert(@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst) == 1);
+
+            const head = self.head orelse return null;
+            self.head = head.next;
+            if (head.next == null) self.tail = null;
+            return head;
+        }
+
+        pub fn isEmpty(self: *Self) bool {
+            return @atomicLoad(?*Node, &self.head, builtin.AtomicOrder.SeqCst) != null;
         }
 
-        ///// This is a debug function that is not thread-safe.
         pub fn dump(self: *Self) void {
+            while (@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) != 0) {}
+            defer assert(@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst) == 1);
+
             std.debug.warn("head: ");
             dumpRecursive(self.head, 0);
             std.debug.warn("tail: ");
@@ -64,12 +79,12 @@ pub fn QueueMpmc(comptime T: type) type {
     };
 }
 
-const std = @import("std");
+const std = @import("../index.zig");
 const assert = std.debug.assert;
 
 const Context = struct {
     allocator: *std.mem.Allocator,
-    queue: *QueueMpmc(i32),
+    queue: *Queue(i32),
     put_sum: isize,
     get_sum: isize,
     get_count: usize,
@@ -84,7 +99,7 @@ const Context = struct {
 const puts_per_thread = 500;
 const put_thread_count = 3;
 
-test "std.atomic.queue_mpmc" {
+test "std.atomic.Queue" {
     var direct_allocator = std.heap.DirectAllocator.init();
     defer direct_allocator.deinit();
 
@@ -94,8 +109,7 @@ test "std.atomic.queue_mpmc" {
     var fixed_buffer_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(plenty_of_memory);
     var a = &fixed_buffer_allocator.allocator;
 
-    var queue: QueueMpmc(i32) = undefined;
-    queue.init();
+    var queue = Queue(i32).init();
     var context = Context{
         .allocator = a,
         .queue = &queue,
@@ -140,7 +154,7 @@ fn startPuts(ctx: *Context) u8 {
     while (put_count != 0) : (put_count -= 1) {
         std.os.time.sleep(0, 1); // let the os scheduler be our fuzz
         const x = @bitCast(i32, r.random.scalar(u32));
-        const node = ctx.allocator.create(QueueMpmc(i32).Node{
+        const node = ctx.allocator.create(Queue(i32).Node{
             .next = undefined,
             .data = x,
         }) catch unreachable;
@@ -164,17 +178,16 @@ fn startGets(ctx: *Context) u8 {
     }
 }
 
-test "std.atomic.queue_mpmc single-threaded" {
-    var queue: QueueMpmc(i32) = undefined;
-    queue.init();
+test "std.atomic.Queue single-threaded" {
+    var queue = Queue(i32).init();
 
-    var node_0 = QueueMpmc(i32).Node{
+    var node_0 = Queue(i32).Node{
         .data = 0,
         .next = undefined,
     };
     queue.put(&node_0);
 
-    var node_1 = QueueMpmc(i32).Node{
+    var node_1 = Queue(i32).Node{
         .data = 1,
         .next = undefined,
     };
@@ -182,13 +195,13 @@ test "std.atomic.queue_mpmc single-threaded" {
 
     assert(queue.get().?.data == 0);
 
-    var node_2 = QueueMpmc(i32).Node{
+    var node_2 = Queue(i32).Node{
         .data = 2,
         .next = undefined,
     };
     queue.put(&node_2);
 
-    var node_3 = QueueMpmc(i32).Node{
+    var node_3 = Queue(i32).Node{
         .data = 3,
         .next = undefined,
     };
@@ -198,15 +211,14 @@ test "std.atomic.queue_mpmc single-threaded" {
 
     assert(queue.get().?.data == 2);
 
-    var node_4 = QueueMpmc(i32).Node{
+    var node_4 = Queue(i32).Node{
         .data = 4,
         .next = undefined,
     };
     queue.put(&node_4);
 
     assert(queue.get().?.data == 3);
-    // if we were to set node_3.next to null here, it would cause this test
-    // to fail. this demonstrates the limitation of hanging on to extra memory.
+    node_3.next = null;
 
     assert(queue.get().?.data == 4);
 
std/atomic/queue_mpsc.zig
@@ -1,185 +0,0 @@
-const std = @import("../index.zig");
-const assert = std.debug.assert;
-const builtin = @import("builtin");
-const AtomicOrder = builtin.AtomicOrder;
-const AtomicRmwOp = builtin.AtomicRmwOp;
-
-/// Many producer, single consumer, non-allocating, thread-safe, lock-free
-pub fn QueueMpsc(comptime T: type) type {
-    return struct {
-        inboxes: [2]std.atomic.Stack(T),
-        outbox: std.atomic.Stack(T),
-        inbox_index: usize,
-
-        pub const Self = this;
-
-        pub const Node = std.atomic.Stack(T).Node;
-
-        /// Not thread-safe. The call to init() must complete before any other functions are called.
-        /// No deinitialization required.
-        pub fn init() Self {
-            return Self{
-                .inboxes = []std.atomic.Stack(T){
-                    std.atomic.Stack(T).init(),
-                    std.atomic.Stack(T).init(),
-                },
-                .outbox = std.atomic.Stack(T).init(),
-                .inbox_index = 0,
-            };
-        }
-
-        /// Fully thread-safe. put() may be called from any thread at any time.
-        pub fn put(self: *Self, node: *Node) void {
-            const inbox_index = @atomicLoad(usize, &self.inbox_index, AtomicOrder.SeqCst);
-            const inbox = &self.inboxes[inbox_index];
-            inbox.push(node);
-        }
-
-        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
-        /// the next call to get().
-        pub fn get(self: *Self) ?*Node {
-            if (self.outbox.pop()) |node| {
-                return node;
-            }
-            const prev_inbox_index = @atomicRmw(usize, &self.inbox_index, AtomicRmwOp.Xor, 0x1, AtomicOrder.SeqCst);
-            const prev_inbox = &self.inboxes[prev_inbox_index];
-            while (prev_inbox.pop()) |node| {
-                self.outbox.push(node);
-            }
-            return self.outbox.pop();
-        }
-
-        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
-        /// the next call to isEmpty().
-        pub fn isEmpty(self: *Self) bool {
-            if (!self.outbox.isEmpty()) return false;
-            const prev_inbox_index = @atomicRmw(usize, &self.inbox_index, AtomicRmwOp.Xor, 0x1, AtomicOrder.SeqCst);
-            const prev_inbox = &self.inboxes[prev_inbox_index];
-            while (prev_inbox.pop()) |node| {
-                self.outbox.push(node);
-            }
-            return self.outbox.isEmpty();
-        }
-
-        /// For debugging only. No API guarantees about what this does.
-        pub fn dump(self: *Self) void {
-            {
-                var it = self.outbox.root;
-                while (it) |node| {
-                    std.debug.warn("0x{x} -> ", @ptrToInt(node));
-                    it = node.next;
-                }
-            }
-            const inbox_index = self.inbox_index;
-            const inboxes = []*std.atomic.Stack(T){
-                &self.inboxes[self.inbox_index],
-                &self.inboxes[1 - self.inbox_index],
-            };
-            for (inboxes) |inbox| {
-                var it = inbox.root;
-                while (it) |node| {
-                    std.debug.warn("0x{x} -> ", @ptrToInt(node));
-                    it = node.next;
-                }
-            }
-
-            std.debug.warn("null\n");
-        }
-    };
-}
-
-const Context = struct {
-    allocator: *std.mem.Allocator,
-    queue: *QueueMpsc(i32),
-    put_sum: isize,
-    get_sum: isize,
-    get_count: usize,
-    puts_done: u8, // TODO make this a bool
-};
-
-// TODO add lazy evaluated build options and then put puts_per_thread behind
-// some option such as: "AggressiveMultithreadedFuzzTest". In the AppVeyor
-// CI we would use a less aggressive setting since at 1 core, while we still
-// want this test to pass, we need a smaller value since there is so much thrashing
-// we would also use a less aggressive setting when running in valgrind
-const puts_per_thread = 500;
-const put_thread_count = 3;
-
-test "std.atomic.queue_mpsc" {
-    var direct_allocator = std.heap.DirectAllocator.init();
-    defer direct_allocator.deinit();
-
-    var plenty_of_memory = try direct_allocator.allocator.alloc(u8, 300 * 1024);
-    defer direct_allocator.allocator.free(plenty_of_memory);
-
-    var fixed_buffer_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(plenty_of_memory);
-    var a = &fixed_buffer_allocator.allocator;
-
-    var queue = QueueMpsc(i32).init();
-    var context = Context{
-        .allocator = a,
-        .queue = &queue,
-        .put_sum = 0,
-        .get_sum = 0,
-        .puts_done = 0,
-        .get_count = 0,
-    };
-
-    var putters: [put_thread_count]*std.os.Thread = undefined;
-    for (putters) |*t| {
-        t.* = try std.os.spawnThread(&context, startPuts);
-    }
-    var getters: [1]*std.os.Thread = undefined;
-    for (getters) |*t| {
-        t.* = try std.os.spawnThread(&context, startGets);
-    }
-
-    for (putters) |t|
-        t.wait();
-    _ = @atomicRmw(u8, &context.puts_done, builtin.AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
-    for (getters) |t|
-        t.wait();
-
-    if (context.put_sum != context.get_sum) {
-        std.debug.panic("failure\nput_sum:{} != get_sum:{}", context.put_sum, context.get_sum);
-    }
-
-    if (context.get_count != puts_per_thread * put_thread_count) {
-        std.debug.panic(
-            "failure\nget_count:{} != puts_per_thread:{} * put_thread_count:{}",
-            context.get_count,
-            u32(puts_per_thread),
-            u32(put_thread_count),
-        );
-    }
-}
-
-fn startPuts(ctx: *Context) u8 {
-    var put_count: usize = puts_per_thread;
-    var r = std.rand.DefaultPrng.init(0xdeadbeef);
-    while (put_count != 0) : (put_count -= 1) {
-        std.os.time.sleep(0, 1); // let the os scheduler be our fuzz
-        const x = @bitCast(i32, r.random.scalar(u32));
-        const node = ctx.allocator.create(QueueMpsc(i32).Node{
-            .next = undefined,
-            .data = x,
-        }) catch unreachable;
-        ctx.queue.put(node);
-        _ = @atomicRmw(isize, &ctx.put_sum, builtin.AtomicRmwOp.Add, x, AtomicOrder.SeqCst);
-    }
-    return 0;
-}
-
-fn startGets(ctx: *Context) u8 {
-    while (true) {
-        const last = @atomicLoad(u8, &ctx.puts_done, builtin.AtomicOrder.SeqCst) == 1;
-
-        while (ctx.queue.get()) |node| {
-            std.os.time.sleep(0, 1); // let the os scheduler be our fuzz
-            _ = @atomicRmw(isize, &ctx.get_sum, builtin.AtomicRmwOp.Add, node.data, builtin.AtomicOrder.SeqCst);
-            _ = @atomicRmw(usize, &ctx.get_count, builtin.AtomicRmwOp.Add, 1, builtin.AtomicOrder.SeqCst);
-        }
-
-        if (last) return 0;
-    }
-}
std/atomic/stack.zig
@@ -1,10 +1,13 @@
+const assert = std.debug.assert;
 const builtin = @import("builtin");
 const AtomicOrder = builtin.AtomicOrder;
 
-/// Many reader, many writer, non-allocating, thread-safe, lock-free
+/// Many reader, many writer, non-allocating, thread-safe
+/// Uses a spinlock to protect push() and pop()
 pub fn Stack(comptime T: type) type {
     return struct {
         root: ?*Node,
+        lock: u8,
 
         pub const Self = this;
 
@@ -14,7 +17,10 @@ pub fn Stack(comptime T: type) type {
         };
 
         pub fn init() Self {
-            return Self{ .root = null };
+            return Self{
+                .root = null,
+                .lock = 0,
+            };
         }
 
         /// push operation, but only if you are the first item in the stack. if you did not succeed in
@@ -25,18 +31,20 @@ pub fn Stack(comptime T: type) type {
         }
 
         pub fn push(self: *Self, node: *Node) void {
-            var root = @atomicLoad(?*Node, &self.root, AtomicOrder.SeqCst);
-            while (true) {
-                node.next = root;
-                root = @cmpxchgWeak(?*Node, &self.root, root, node, AtomicOrder.SeqCst, AtomicOrder.SeqCst) orelse break;
-            }
+            while (@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) != 0) {}
+            defer assert(@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst) == 1);
+
+            node.next = self.root;
+            self.root = node;
         }
 
         pub fn pop(self: *Self) ?*Node {
-            var root = @atomicLoad(?*Node, &self.root, AtomicOrder.SeqCst);
-            while (true) {
-                root = @cmpxchgWeak(?*Node, &self.root, root, (root orelse return null).next, AtomicOrder.SeqCst, AtomicOrder.SeqCst) orelse return root;
-            }
+            while (@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) != 0) {}
+            defer assert(@atomicRmw(u8, &self.lock, builtin.AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst) == 1);
+
+            const root = self.root orelse return null;
+            self.root = root.next;
+            return root;
         }
 
         pub fn isEmpty(self: *Self) bool {
@@ -45,7 +53,7 @@ pub fn Stack(comptime T: type) type {
     };
 }
 
-const std = @import("std");
+const std = @import("../index.zig");
 const Context = struct {
     allocator: *std.mem.Allocator,
     stack: *Stack(i32),
std/event/channel.zig
@@ -12,8 +12,8 @@ pub fn Channel(comptime T: type) type {
     return struct {
         loop: *Loop,
 
-        getters: std.atomic.QueueMpsc(GetNode),
-        putters: std.atomic.QueueMpsc(PutNode),
+        getters: std.atomic.Queue(GetNode),
+        putters: std.atomic.Queue(PutNode),
         get_count: usize,
         put_count: usize,
         dispatch_lock: u8, // TODO make this a bool
@@ -46,8 +46,8 @@ pub fn Channel(comptime T: type) type {
                 .buffer_index = 0,
                 .dispatch_lock = 0,
                 .need_dispatch = 0,
-                .getters = std.atomic.QueueMpsc(GetNode).init(),
-                .putters = std.atomic.QueueMpsc(PutNode).init(),
+                .getters = std.atomic.Queue(GetNode).init(),
+                .putters = std.atomic.Queue(PutNode).init(),
                 .get_count = 0,
                 .put_count = 0,
             });
@@ -81,7 +81,7 @@ pub fn Channel(comptime T: type) type {
                     .next = undefined,
                     .data = handle,
                 };
-                var queue_node = std.atomic.QueueMpsc(PutNode).Node{
+                var queue_node = std.atomic.Queue(PutNode).Node{
                     .data = PutNode{
                         .tick_node = &my_tick_node,
                         .data = data,
@@ -111,7 +111,7 @@ pub fn Channel(comptime T: type) type {
                     .next = undefined,
                     .data = handle,
                 };
-                var queue_node = std.atomic.QueueMpsc(GetNode).Node{
+                var queue_node = std.atomic.Queue(GetNode).Node{
                     .data = GetNode{
                         .ptr = &result,
                         .tick_node = &my_tick_node,
std/event/future.zig
@@ -17,7 +17,7 @@ pub fn Future(comptime T: type) type {
         available: u8, // TODO make this a bool
 
         const Self = this;
-        const Queue = std.atomic.QueueMpsc(promise);
+        const Queue = std.atomic.Queue(promise);
 
         pub fn init(loop: *Loop) Self {
             return Self{
@@ -30,19 +30,19 @@ pub fn Future(comptime T: type) type {
         /// Obtain the value. If it's not available, wait until it becomes
         /// available.
         /// Thread-safe.
-        pub async fn get(self: *Self) T {
+        pub async fn get(self: *Self) *T {
             if (@atomicLoad(u8, &self.available, AtomicOrder.SeqCst) == 1) {
-                return self.data;
+                return &self.data;
             }
             const held = await (async self.lock.acquire() catch unreachable);
-            defer held.release();
+            held.release();
 
-            return self.data;
+            return &self.data;
         }
 
         /// Make the data become available. May be called only once.
-        pub fn put(self: *Self, value: T) void {
-            self.data = value;
+        /// Before calling this, modify the `data` property.
+        pub fn resolve(self: *Self) void {
             const prev = @atomicRmw(u8, &self.available, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
             assert(prev == 0); // put() called twice
             Lock.Held.release(Lock.Held{ .lock = &self.lock });
@@ -57,7 +57,7 @@ test "std.event.Future" {
     const allocator = &da.allocator;
 
     var loop: Loop = undefined;
-    try loop.initMultiThreaded(allocator);
+    try loop.initSingleThreaded(allocator);
     defer loop.deinit();
 
     const handle = try async<allocator> testFuture(&loop);
@@ -79,9 +79,10 @@ async fn testFuture(loop: *Loop) void {
 }
 
 async fn waitOnFuture(future: *Future(i32)) i32 {
-    return await (async future.get() catch @panic("memory"));
+    return (await (async future.get() catch @panic("memory"))).*;
 }
 
 async fn resolveFuture(future: *Future(i32)) void {
-    future.put(6);
+    future.data = 6;
+    future.resolve();
 }
std/event/lock.zig
@@ -15,7 +15,7 @@ pub const Lock = struct {
     queue: Queue,
     queue_empty_bit: u8, // TODO make this a bool
 
-    const Queue = std.atomic.QueueMpsc(promise);
+    const Queue = std.atomic.Queue(promise);
 
     pub const Held = struct {
         lock: *Lock,
std/event/loop.zig
@@ -9,7 +9,7 @@ const AtomicOrder = builtin.AtomicOrder;
 
 pub const Loop = struct {
     allocator: *mem.Allocator,
-    next_tick_queue: std.atomic.QueueMpsc(promise),
+    next_tick_queue: std.atomic.Queue(promise),
     os_data: OsData,
     final_resume_node: ResumeNode,
     dispatch_lock: u8, // TODO make this a bool
@@ -21,7 +21,7 @@ pub const Loop = struct {
     available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
     eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
 
-    pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
+    pub const NextTickNode = std.atomic.Queue(promise).Node;
 
     pub const ResumeNode = struct {
         id: Id,
@@ -77,7 +77,7 @@ pub const Loop = struct {
             .pending_event_count = 0,
             .allocator = allocator,
             .os_data = undefined,
-            .next_tick_queue = std.atomic.QueueMpsc(promise).init(),
+            .next_tick_queue = std.atomic.Queue(promise).init(),
             .dispatch_lock = 1, // start locked so threads go directly into epoll wait
             .extra_threads = undefined,
             .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
test/tests.zig
@@ -47,12 +47,13 @@ const test_targets = []TestTarget{
 
 const max_stdout_size = 1 * 1024 * 1024; // 1 MB
 
-pub fn addCompareOutputTests(b: *build.Builder, test_filter: ?[]const u8) *build.Step {
+pub fn addCompareOutputTests(b: *build.Builder, test_filter: ?[]const u8, modes: []const Mode) *build.Step {
     const cases = b.allocator.create(CompareOutputContext{
         .b = b,
         .step = b.step("test-compare-output", "Run the compare output tests"),
         .test_index = 0,
         .test_filter = test_filter,
+        .modes = modes,
     }) catch unreachable;
 
     compare_output.addCases(cases);
@@ -60,12 +61,13 @@ pub fn addCompareOutputTests(b: *build.Builder, test_filter: ?[]const u8) *build
     return cases.step;
 }
 
-pub fn addRuntimeSafetyTests(b: *build.Builder, test_filter: ?[]const u8) *build.Step {
+pub fn addRuntimeSafetyTests(b: *build.Builder, test_filter: ?[]const u8, modes: []const Mode) *build.Step {
     const cases = b.allocator.create(CompareOutputContext{
         .b = b,
         .step = b.step("test-runtime-safety", "Run the runtime safety tests"),
         .test_index = 0,
         .test_filter = test_filter,
+        .modes = modes,
     }) catch unreachable;
 
     runtime_safety.addCases(cases);
@@ -73,12 +75,13 @@ pub fn addRuntimeSafetyTests(b: *build.Builder, test_filter: ?[]const u8) *build
     return cases.step;
 }
 
-pub fn addCompileErrorTests(b: *build.Builder, test_filter: ?[]const u8) *build.Step {
+pub fn addCompileErrorTests(b: *build.Builder, test_filter: ?[]const u8, modes: []const Mode) *build.Step {
     const cases = b.allocator.create(CompileErrorContext{
         .b = b,
         .step = b.step("test-compile-errors", "Run the compile error tests"),
         .test_index = 0,
         .test_filter = test_filter,
+        .modes = modes,
     }) catch unreachable;
 
     compile_errors.addCases(cases);
@@ -99,12 +102,13 @@ pub fn addBuildExampleTests(b: *build.Builder, test_filter: ?[]const u8) *build.
     return cases.step;
 }
 
-pub fn addAssembleAndLinkTests(b: *build.Builder, test_filter: ?[]const u8) *build.Step {
+pub fn addAssembleAndLinkTests(b: *build.Builder, test_filter: ?[]const u8, modes: []const Mode) *build.Step {
     const cases = b.allocator.create(CompareOutputContext{
         .b = b,
         .step = b.step("test-asm-link", "Run the assemble and link tests"),
         .test_index = 0,
         .test_filter = test_filter,
+        .modes = modes,
     }) catch unreachable;
 
     assemble_and_link.addCases(cases);
@@ -173,6 +177,7 @@ pub const CompareOutputContext = struct {
     step: *build.Step,
     test_index: usize,
     test_filter: ?[]const u8,
+    modes: []const Mode,
 
     const Special = enum {
         None,
@@ -423,12 +428,7 @@ pub const CompareOutputContext = struct {
                 self.step.dependOn(&run_and_cmp_output.step);
             },
             Special.None => {
-                for ([]Mode{
-                    Mode.Debug,
-                    Mode.ReleaseSafe,
-                    Mode.ReleaseFast,
-                    Mode.ReleaseSmall,
-                }) |mode| {
+                for (self.modes) |mode| {
                     const annotated_case_name = fmt.allocPrint(self.b.allocator, "{} {} ({})", "compare-output", case.name, @tagName(mode)) catch unreachable;
                     if (self.test_filter) |filter| {
                         if (mem.indexOf(u8, annotated_case_name, filter) == null) continue;
@@ -483,6 +483,7 @@ pub const CompileErrorContext = struct {
     step: *build.Step,
     test_index: usize,
     test_filter: ?[]const u8,
+    modes: []const Mode,
 
     const TestCase = struct {
         name: []const u8,
@@ -673,10 +674,7 @@ pub const CompileErrorContext = struct {
     pub fn addCase(self: *CompileErrorContext, case: *const TestCase) void {
         const b = self.b;
 
-        for ([]Mode{
-            Mode.Debug,
-            Mode.ReleaseFast,
-        }) |mode| {
+        for (self.modes) |mode| {
             const annotated_case_name = fmt.allocPrint(self.b.allocator, "compile-error {} ({})", case.name, @tagName(mode)) catch unreachable;
             if (self.test_filter) |filter| {
                 if (mem.indexOf(u8, annotated_case_name, filter) == null) continue;
build.zig
@@ -91,11 +91,11 @@ pub fn build(b: *Builder) !void {
 
     test_step.dependOn(tests.addPkgTests(b, test_filter, "std/special/compiler_rt/index.zig", "compiler-rt", "Run the compiler_rt tests", modes));
 
-    test_step.dependOn(tests.addCompareOutputTests(b, test_filter));
+    test_step.dependOn(tests.addCompareOutputTests(b, test_filter, modes));
     test_step.dependOn(tests.addBuildExampleTests(b, test_filter));
-    test_step.dependOn(tests.addCompileErrorTests(b, test_filter));
-    test_step.dependOn(tests.addAssembleAndLinkTests(b, test_filter));
-    test_step.dependOn(tests.addRuntimeSafetyTests(b, test_filter));
+    test_step.dependOn(tests.addCompileErrorTests(b, test_filter, modes));
+    test_step.dependOn(tests.addAssembleAndLinkTests(b, test_filter, modes));
+    test_step.dependOn(tests.addRuntimeSafetyTests(b, test_filter, modes));
     test_step.dependOn(tests.addTranslateCTests(b, test_filter));
     test_step.dependOn(tests.addGenHTests(b, test_filter));
     test_step.dependOn(docs_step);
CMakeLists.txt
@@ -432,8 +432,7 @@ set(ZIG_STD_FILES
     "array_list.zig"
     "atomic/index.zig"
     "atomic/int.zig"
-    "atomic/queue_mpmc.zig"
-    "atomic/queue_mpsc.zig"
+    "atomic/queue.zig"
     "atomic/stack.zig"
     "base64.zig"
     "buf_map.zig"