Commit cfe5defd02

Pat Tullmann <pat.github@tullmann.org>
2025-04-03 01:14:50
linux: futex v1 API cleanup
* Use `packed struct` for flags arguments. So, instead of `linux.FUTEX.WAIT` use `.{ .cmd = .WAIT, .private = true }` * rename `futex_wait` and `futex_wake` which didn't actually specify wait/wake, as `futex_3arg` and `futex_4arg` (as its the number of parameters that is different, the `op` is whatever is specified. * expose the full six-arg flavor of the syscall (for some of the advanced ops), and add packed structs for their arguments. * Use a `packed union` to support the 4th parameter which is sometimes a `timespec` pointer, and sometimes a `u32`. * Add tests that make sure the structure layout is correct and that the basic argument passing is working (no actual futexes are contended).
1 parent 850655f
Changed files (4)
lib
lib/std/os/linux/test.zig
@@ -207,6 +207,96 @@ test "sysinfo" {
     try expect(info.mem_unit <= std.heap.page_size_max);
 }
 
+comptime {
+    std.debug.assert(128 == @as(u32, @bitCast(linux.FUTEX_OP{ .cmd = @enumFromInt(0), .private = true, .realtime = false })));
+    std.debug.assert(256 == @as(u32, @bitCast(linux.FUTEX_OP{ .cmd = @enumFromInt(0), .private = false, .realtime = true })));
+
+    // Check futex_param4 union is packed correctly
+    const param_union = linux.futex_param4{
+        .val2 = 0xaabbcc,
+    };
+    std.debug.assert(@intFromPtr(param_union.timeout) == 0xaabbcc);
+}
+
+test "futex v1" {
+    var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
+    var rc: usize = 0;
+
+    // No-op wait, lock value is not expected value
+    rc = linux.futex(&lock.raw, .{ .cmd = .WAIT, .private = true }, 2, .{ .timeout = null }, null, 0);
+    try expectEqual(.AGAIN, linux.E.init(rc));
+
+    rc = linux.futex_4arg(&lock.raw, .{ .cmd = .WAIT, .private = true }, 2, null);
+    try expectEqual(.AGAIN, linux.E.init(rc));
+
+    // Short-fuse wait, timeout kicks in
+    rc = linux.futex(&lock.raw, .{ .cmd = .WAIT, .private = true }, 1, .{ .timeout = &.{ .sec = 0, .nsec = 2 } }, null, 0);
+    try expectEqual(.TIMEDOUT, linux.E.init(rc));
+
+    rc = linux.futex_4arg(&lock.raw, .{ .cmd = .WAIT, .private = true }, 1, &.{ .sec = 0, .nsec = 2 });
+    try expectEqual(.TIMEDOUT, linux.E.init(rc));
+
+    // Wakeup (no waiters)
+    rc = linux.futex(&lock.raw, .{ .cmd = .WAKE, .private = true }, 2, .{ .timeout = null }, null, 0);
+    try expectEqual(0, rc);
+
+    rc = linux.futex_3arg(&lock.raw, .{ .cmd = .WAKE, .private = true }, 2);
+    try expectEqual(0, rc);
+
+    // CMP_REQUEUE - val3 mismatch
+    rc = linux.futex(&lock.raw, .{ .cmd = .CMP_REQUEUE, .private = true }, 2, .{ .val2 = 0 }, null, 99);
+    try expectEqual(.AGAIN, linux.E.init(rc));
+
+    // CMP_REQUEUE - requeue (but no waiters, so ... not much)
+    {
+        const val3 = 1;
+        const wake_nr = 3;
+        const requeue_max = std.math.maxInt(u31);
+        var target_lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
+        rc = linux.futex(&lock.raw, .{ .cmd = .CMP_REQUEUE, .private = true }, wake_nr, .{ .val2 = requeue_max }, &target_lock.raw, val3);
+        try expectEqual(0, rc);
+    }
+
+    // WAKE_OP - just to see if we can construct the arguments ...
+    {
+        var lock2: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
+        const wake1_nr = 2;
+        const wake2_nr = 3;
+        const wake_op = linux.FUTEX_WAKE_OP{
+            .cmd = .ANDN,
+            .arg_shift = true,
+            .cmp = .LT,
+            .oparg = 4,
+            .cmdarg = 5,
+        };
+
+        rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_OP, .private = true }, wake1_nr, .{ .val2 = wake2_nr }, &lock2.raw, @bitCast(wake_op));
+        try expectEqual(0, rc);
+    }
+
+    // WAIT_BITSET
+    {
+        // val1 return early
+        rc = linux.futex(&lock.raw, .{ .cmd = .WAIT_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0xfff);
+        try expectEqual(.AGAIN, linux.E.init(rc));
+
+        // timeout wait
+        const timeout: linux.timespec = .{ .sec = 0, .nsec = 2 };
+        rc = linux.futex(&lock.raw, .{ .cmd = .WAIT_BITSET, .private = true }, 1, .{ .timeout = &timeout }, null, 0xfff);
+        try expectEqual(.TIMEDOUT, linux.E.init(rc));
+    }
+
+    // WAKE_BITSET
+    {
+        rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0xfff000);
+        try expectEqual(0, rc);
+
+        // bitmask must have at least 1 bit set:
+        rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0);
+        try expectEqual(.INVAL, linux.E.init(rc));
+    }
+}
+
 test {
     _ = linux.IoUring;
 }
lib/std/os/linux.zig
@@ -673,12 +673,34 @@ pub fn fallocate(fd: i32, mode: i32, offset: i64, length: i64) usize {
     }
 }
 
-pub fn futex_wait(uaddr: *const i32, futex_op: u32, val: i32, timeout: ?*const timespec) usize {
-    return syscall4(.futex, @intFromPtr(uaddr), futex_op, @as(u32, @bitCast(val)), @intFromPtr(timeout));
+// The 4th parameter to the v1 futex syscall can either be an optional
+// pointer to a timespec, or a uint32, depending on which "op" is being
+// performed.
+pub const futex_param4 = extern union {
+    timeout: ?*const timespec,
+    /// On all platforms only the bottom 32-bits of `val2` are relevant.
+    /// This is 64-bit to match the pointer in the union.
+    val2: usize,
+};
+
+/// The futex v1 syscall, see also the newer the futex2_{wait,wakeup,requeue,waitv} syscalls.
+///
+/// The futex_op parameter is a sub-command and flags.  The sub-command
+/// defines which of the subsequent paramters are relevant.
+pub fn futex(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, val2timeout: futex_param4, uaddr2: ?*const anyopaque, val3: u32) usize {
+    return syscall6(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(val2timeout.timeout), @intFromPtr(uaddr2), val3);
+}
+
+/// Three-argument variation of the v1 futex call.  Only suitable for a
+/// futex_op that ignores the remaining arguments (e.g., FUTUX_OP.WAKE).
+pub fn futex_3arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32) usize {
+    return syscall3(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val);
 }
 
-pub fn futex_wake(uaddr: *const i32, futex_op: u32, val: i32) usize {
-    return syscall3(.futex, @intFromPtr(uaddr), futex_op, @as(u32, @bitCast(val)));
+/// Four-argument variation on the v1 futex call.  Only suitable for
+/// futex_op that ignores the remaining arguments (e.g., FUTEX_OP.WAIT).
+pub fn futex_4arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, timeout: ?*const timespec) usize {
+    return syscall4(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(timeout));
 }
 
 /// Given an array of `futex_waitv`, wait on each uaddr.
@@ -3385,29 +3407,6 @@ pub const FALLOC = struct {
     pub const FL_UNSHARE_RANGE = 0x40;
 };
 
-pub const FUTEX = struct {
-    pub const WAIT = 0;
-    pub const WAKE = 1;
-    pub const FD = 2;
-    pub const REQUEUE = 3;
-    pub const CMP_REQUEUE = 4;
-    pub const WAKE_OP = 5;
-    pub const LOCK_PI = 6;
-    pub const UNLOCK_PI = 7;
-    pub const TRYLOCK_PI = 8;
-    pub const WAIT_BITSET = 9;
-    pub const WAKE_BITSET = 10;
-    pub const WAIT_REQUEUE_PI = 11;
-    pub const CMP_REQUEUE_PI = 12;
-
-    pub const PRIVATE_FLAG = 128;
-
-    pub const CLOCK_REALTIME = 256;
-
-    /// Max numbers of elements in a `futex_waitv` array.
-    pub const WAITV_MAX = 128;
-};
-
 pub const FUTEX2 = struct {
     pub const SIZE_U8 = 0x00;
     pub const SIZE_U16 = 0x01;
@@ -3418,6 +3417,69 @@ pub const FUTEX2 = struct {
     pub const PRIVATE = FUTEX.PRIVATE_FLAG;
 };
 
+// Futex v1 API commands.  See futex man page for each command's
+// interpretation of the futex arguments.
+pub const FUTEX_COMMAND = enum(u7) {
+    WAIT = 0,
+    WAKE = 1,
+    FD = 2,
+    REQUEUE = 3,
+    CMP_REQUEUE = 4,
+    WAKE_OP = 5,
+    LOCK_PI = 6,
+    UNLOCK_PI = 7,
+    TRYLOCK_PI = 8,
+    WAIT_BITSET = 9,
+    WAKE_BITSET = 10,
+    WAIT_REQUEUE_PI = 11,
+    CMP_REQUEUE_PI = 12,
+};
+
+/// Futex v1 API command and flags for the `futex_op` parameter
+pub const FUTEX_OP = packed struct(u32) {
+    cmd: FUTEX_COMMAND,
+    private: bool,
+    realtime: bool = false, // realtime clock vs. monotonic clock
+    _reserved: u23 = 0,
+};
+
+/// Futex v1 FUTEX_WAKE_OP `val3` operation:
+pub const FUTEX_WAKE_OP = packed struct(u32) {
+    cmd: FUTEX_WAKE_OP_CMD,
+    /// From C API `FUTEX_OP_ARG_SHIFT`:  Use (1 << oparg) as operand
+    arg_shift: bool = false,
+    cmp: FUTEX_WAKE_OP_CMP,
+    oparg: u12,
+    cmdarg: u12,
+};
+
+/// Futex v1 cmd for FUTEX_WAKE_OP `val3` command.
+pub const FUTEX_WAKE_OP_CMD = enum(u3) {
+    /// uaddr2 = oparg
+    SET = 0,
+    /// uaddr2 += oparg
+    ADD = 1,
+    /// uaddr2 |= oparg
+    OR = 2,
+    /// uaddr2 &= ~oparg
+    ANDN = 3,
+    /// uaddr2 ^= oparg
+    XOR = 4,
+};
+
+/// Futex v1 comparison op for FUTEX_WAKE_OP `val3` cmp
+pub const FUTEX_WAKE_OP_CMP = enum(u4) {
+    EQ = 0,
+    NE = 1,
+    LT = 2,
+    LE = 3,
+    GT = 4,
+    GE = 5,
+};
+
+/// Max numbers of elements in a `futex_waitv` array.
+pub const FUTEX2_WAITV_MAX = 128;
+
 pub const PROT = struct {
     /// page can not be accessed
     pub const NONE = 0x0;
lib/std/Thread/Futex.zig
@@ -262,10 +262,10 @@ const LinuxImpl = struct {
             ts.nsec = @as(@TypeOf(ts.nsec), @intCast(timeout_ns % std.time.ns_per_s));
         }
 
-        const rc = linux.futex_wait(
-            @as(*const i32, @ptrCast(&ptr.raw)),
-            linux.FUTEX.PRIVATE_FLAG | linux.FUTEX.WAIT,
-            @as(i32, @bitCast(expect)),
+        const rc = linux.futex_4arg(
+            &ptr.raw,
+            .{ .cmd = .WAIT, .private = true },
+            expect,
             if (timeout != null) &ts else null,
         );
 
@@ -284,10 +284,10 @@ const LinuxImpl = struct {
     }
 
     fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        const rc = linux.futex_wake(
-            @as(*const i32, @ptrCast(&ptr.raw)),
-            linux.FUTEX.PRIVATE_FLAG | linux.FUTEX.WAKE,
-            std.math.cast(i32, max_waiters) orelse std.math.maxInt(i32),
+        const rc = linux.futex_3arg(
+            &ptr.raw,
+            .{ .cmd = .WAKE, .private = true },
+            @min(max_waiters, std.math.maxInt(i32)),
         );
 
         switch (linux.E.init(rc)) {
lib/std/Thread.zig
@@ -1539,10 +1539,10 @@ const LinuxThreadImpl = struct {
                 continue;
             }
 
-            switch (linux.E.init(linux.futex_wait(
+            switch (linux.E.init(linux.futex_4arg(
                 &self.thread.child_tid.raw,
-                linux.FUTEX.WAIT,
-                tid,
+                .{ .cmd = .WAIT, .private = false },
+                @bitCast(tid),
                 null,
             ))) {
                 .SUCCESS => continue,