Commit 3b80fde6f4

Andrew Kelley <andrew@ziglang.org>
2025-10-01 23:30:00
std.os.linux: remove sendmmsg workaround
This "fix" is too opinionated to belong here. Better instead to document the pitfalls.
1 parent cde5a51
Changed files (1)
lib
std
lib/std/os/linux.zig
@@ -1,10 +1,8 @@
 //! This file provides the system interface functions for Linux matching those
 //! that are provided by libc, whether or not libc is linked. The following
 //! abstractions are made:
-//! * Work around kernel bugs and limitations. For example, see sendmmsg.
 //! * Implement all the syscalls in the same way that libc functions will
 //!   provide `rename` when only the `renameat` syscall exists.
-//! * Does not support POSIX thread cancellation.
 const std = @import("../std.zig");
 const builtin = @import("builtin");
 const assert = std.debug.assert;
@@ -1836,7 +1834,7 @@ pub fn seteuid(euid: uid_t) usize {
     // id will not be changed. Since uid_t is unsigned, this wraps around to the
     // max value in C.
     comptime assert(@typeInfo(uid_t) == .int and @typeInfo(uid_t).int.signedness == .unsigned);
-    return setresuid(std.math.maxInt(uid_t), euid, std.math.maxInt(uid_t));
+    return setresuid(maxInt(uid_t), euid, maxInt(uid_t));
 }
 
 pub fn setegid(egid: gid_t) usize {
@@ -1847,7 +1845,7 @@ pub fn setegid(egid: gid_t) usize {
     // id will not be changed. Since gid_t is unsigned, this wraps around to the
     // max value in C.
     comptime assert(@typeInfo(uid_t) == .int and @typeInfo(uid_t).int.signedness == .unsigned);
-    return setresgid(std.math.maxInt(gid_t), egid, std.math.maxInt(gid_t));
+    return setresgid(maxInt(gid_t), egid, maxInt(gid_t));
 }
 
 pub fn getresuid(ruid: *uid_t, euid: *uid_t, suid: *uid_t) usize {
@@ -2081,44 +2079,13 @@ pub fn sendmsg(fd: i32, msg: *const msghdr_const, flags: u32) usize {
     }
 }
 
+/// Warning: libc is defined to have incompatible integer types with the
+/// corresponding kernel data structures for this syscall.
+///
+/// Warning: on 64-bit systems, if any message length would exceed `maxInt(i32)`,
+/// number of bytes sent cannot be determined, because the kernel uses `ssize_t`
+/// for `sendmsg` return value but `int` for the corresponding values here.
 pub fn sendmmsg(fd: i32, msgvec: [*]mmsghdr_const, vlen: u32, flags: u32) usize {
-    if (@typeInfo(usize).int.bits > @typeInfo(@typeInfo(mmsghdr).@"struct".fields[1].type).int.bits) {
-        // workaround kernel brokenness:
-        // if adding up all iov_len overflows a i32 then split into multiple calls
-        // see https://www.openwall.com/lists/musl/2014/06/07/5
-        const kvlen = if (vlen > IOV_MAX) IOV_MAX else vlen; // matches kernel
-        var next_unsent: usize = 0;
-        for (msgvec[0..kvlen], 0..) |*msg, i| {
-            var size: i32 = 0;
-            const msg_iovlen = @as(usize, @intCast(msg.hdr.iovlen)); // kernel side this is treated as unsigned
-            for (msg.hdr.iov[0..msg_iovlen]) |iov| {
-                if (iov.len > std.math.maxInt(i32) or @addWithOverflow(size, @as(i32, @intCast(iov.len)))[1] != 0) {
-                    // batch-send all messages up to the current message
-                    if (next_unsent < i) {
-                        const batch_size = i - next_unsent;
-                        const r = syscall4(.sendmmsg, @as(usize, @bitCast(@as(isize, fd))), @intFromPtr(&msgvec[next_unsent]), batch_size, flags);
-                        if (E.init(r) != .SUCCESS) return next_unsent;
-                        if (r < batch_size) return next_unsent + r;
-                    }
-                    // send current message as own packet
-                    const r = sendmsg(fd, &msg.hdr, flags);
-                    if (E.init(r) != .SUCCESS) return r;
-                    // Linux limits the total bytes sent by sendmsg to INT_MAX, so this cast is safe.
-                    msg.len = @as(u32, @intCast(r));
-                    next_unsent = i + 1;
-                    break;
-                }
-                size += @intCast(iov.len);
-            }
-        }
-        if (next_unsent < kvlen or next_unsent == 0) { // want to make sure at least one syscall occurs (e.g. to trigger MSG.EOR)
-            const batch_size = kvlen - next_unsent;
-            const r = syscall4(.sendmmsg, @as(usize, @bitCast(@as(isize, fd))), @intFromPtr(&msgvec[next_unsent]), batch_size, flags);
-            if (E.init(r) != .SUCCESS) return r;
-            return next_unsent + r;
-        }
-        return kvlen;
-    }
     return syscall4(.sendmmsg, @as(usize, @bitCast(@as(isize, fd))), @intFromPtr(msgvec), vlen, flags);
 }
 
@@ -8700,7 +8667,7 @@ pub const PR = enum(i32) {
     pub const SET_MM_MAP = 14;
     pub const SET_MM_MAP_SIZE = 15;
 
-    pub const SET_PTRACER_ANY = std.math.maxInt(c_ulong);
+    pub const SET_PTRACER_ANY = maxInt(c_ulong);
 
     pub const FP_MODE_FR = 1 << 0;
     pub const FP_MODE_FRE = 1 << 1;