Commit 53987c932c

Andrew Kelley <andrew@ziglang.org>
2020-12-18 23:38:38
std.crypto.random: introduce fork safety
Everybody gets what they want! * AT_RANDOM is completely ignored. * On Linux, MADV_WIPEONFORK is used to provide fork safety. * On pthread systems, `pthread_atfork` is used to provide fork safety. * For systems that do not have the capability to provide fork safety, the implementation falls back to calling getrandom() every time. * If madvise is unavailable or returns an error, or pthread_atfork fails for whatever reason, it falls back to calling getrandom() every time. * Applications may choose to opt-out of fork safety. * Applications may choose to opt-in to unconditionally calling getrandom() for every call to std.crypto.random.fillFn. * Added `std.meta.globalOption`. * Added `std.os.madvise` and related bits. * Bumped up the size of the main thread TLS buffer. See the comment there for justification. * Simpler hot path in TLS initialization.
1 parent 2b8dcc7
lib/std/c/linux.zig
@@ -106,6 +106,12 @@ pub extern "c" fn prlimit(pid: pid_t, resource: rlimit_resource, new_limit: *con
 pub extern "c" fn posix_memalign(memptr: *?*c_void, alignment: usize, size: usize) c_int;
 pub extern "c" fn malloc_usable_size(?*const c_void) usize;
 
+pub extern "c" fn madvise(
+    addr: *align(std.mem.page_size) c_void,
+    length: usize,
+    advice: c_uint,
+) c_int;
+
 pub const pthread_attr_t = extern struct {
     __size: [56]u8,
     __align: c_long,
lib/std/crypto/tlcsprng.zig
@@ -16,47 +16,141 @@ const mem = std.mem;
 /// We use this as a layer of indirection because global const pointers cannot
 /// point to thread-local variables.
 pub var interface = std.rand.Random{ .fillFn = tlsCsprngFill };
-pub threadlocal var csprng_state: std.crypto.core.Gimli = undefined;
-pub threadlocal var csprng_state_initialized = false;
-fn tlsCsprngFill(r: *const std.rand.Random, buf: []u8) void {
+
+const os_has_fork = switch (std.Target.current.os.tag) {
+    .dragonfly,
+    .freebsd,
+    .ios,
+    .kfreebsd,
+    .linux,
+    .macos,
+    .netbsd,
+    .openbsd,
+    .solaris,
+    .tvos,
+    .watchos,
+    => true,
+
+    else => false,
+};
+const os_has_arc4random = std.builtin.link_libc and @hasDecl(std.c, "arc4random_buf");
+const want_fork_safety = os_has_fork and !os_has_arc4random and
+    (std.meta.globalOption("crypto_fork_safety", bool) orelse true);
+const maybe_have_wipe_on_fork = std.Target.current.os.isAtLeast(.linux, .{
+    .major = 4,
+    .minor = 14,
+}) orelse true;
+
+const WipeMe = struct {
+    init_state: enum { uninitialized, initialized, failed },
+    gimli: std.crypto.core.Gimli,
+};
+const wipe_align = if (maybe_have_wipe_on_fork) mem.page_size else @alignOf(WipeMe);
+
+threadlocal var wipe_me: WipeMe align(wipe_align) = .{
+    .gimli = undefined,
+    .init_state = .uninitialized,
+};
+
+fn tlsCsprngFill(_: *const std.rand.Random, buffer: []u8) void {
     if (std.builtin.link_libc and @hasDecl(std.c, "arc4random_buf")) {
         // arc4random is already a thread-local CSPRNG.
-        return std.c.arc4random_buf(buf.ptr, buf.len);
+        return std.c.arc4random_buf(buffer.ptr, buffer.len);
     }
-    if (!csprng_state_initialized) {
-        var seed: [seed_len]u8 = undefined;
-        // Because we panic on getrandom() failing, we provide the opportunity
-        // to override the default seed function. This also makes
-        // `std.crypto.random` available on freestanding targets, provided that
-        // the `cryptoRandomSeed` function is provided.
-        if (@hasDecl(root, "cryptoRandomSeed")) {
-            root.cryptoRandomSeed(&seed);
-        } else {
-            defaultSeed(&seed);
-        }
-        init(seed);
+    // Allow applications to decide they would prefer to have every call to
+    // std.crypto.random always make an OS syscall, rather than rely on an
+    // application implementation of a CSPRNG.
+    if (comptime std.meta.globalOption("crypto_always_getrandom", bool) orelse false) {
+        return fillWithOsEntropy(buffer);
     }
-    if (buf.len != 0) {
-        csprng_state.squeeze(buf);
+    switch (wipe_me.init_state) {
+        .uninitialized => {
+            if (want_fork_safety) {
+                if (maybe_have_wipe_on_fork) {
+                    if (std.os.madvise(
+                        @ptrCast([*]align(mem.page_size) u8, &wipe_me),
+                        @sizeOf(@TypeOf(wipe_me)),
+                        std.os.MADV_WIPEONFORK,
+                    )) |_| {
+                        return initAndFill(buffer);
+                    } else |_| if (std.Thread.use_pthreads) {
+                        return setupPthreadAtforkAndFill(buffer);
+                    } else {
+                        // Since we failed to set up fork safety, we fall back to always
+                        // calling getrandom every time.
+                        wipe_me.init_state = .failed;
+                        return fillWithOsEntropy(buffer);
+                    }
+                } else if (std.Thread.use_pthreads) {
+                    return setupPthreadAtforkAndFill(buffer);
+                } else {
+                    // We have no mechanism to provide fork safety, but we want fork safety,
+                    // so we fall back to calling getrandom every time.
+                    wipe_me.init_state = .failed;
+                    return fillWithOsEntropy(buffer);
+                }
+            } else {
+                return initAndFill(buffer);
+            }
+        },
+        .initialized => {
+            return fillWithCsprng(buffer);
+        },
+        .failed => {
+            if (want_fork_safety) {
+                return fillWithOsEntropy(buffer);
+            } else {
+                unreachable;
+            }
+        },
+    }
+}
+
+fn setupPthreadAtforkAndFill(buffer: []u8) void {
+    const failed = std.c.pthread_atfork(null, null, childAtForkHandler) != 0;
+    if (failed) {
+        wipe_me.init_state = .failed;
+        return fillWithOsEntropy(buffer);
     } else {
-        csprng_state.permute();
+        return initAndFill(buffer);
     }
-    mem.set(u8, csprng_state.toSlice()[0..std.crypto.core.Gimli.RATE], 0);
 }
 
-fn defaultSeed(buffer: *[seed_len]u8) void {
-    std.os.getrandom(buffer) catch @panic("getrandom() failed to seed thread-local CSPRNG");
+fn childAtForkHandler() callconv(.C) void {
+    const wipe_slice = @ptrCast([*]u8, &wipe_me)[0..@sizeOf(@TypeOf(wipe_me))];
+    std.crypto.utils.secureZero(u8, wipe_slice);
 }
 
-pub const seed_len = 16;
+fn fillWithCsprng(buffer: []u8) void {
+    if (buffer.len != 0) {
+        wipe_me.gimli.squeeze(buffer);
+    } else {
+        wipe_me.gimli.permute();
+    }
+    mem.set(u8, wipe_me.gimli.toSlice()[0..std.crypto.core.Gimli.RATE], 0);
+}
+
+fn fillWithOsEntropy(buffer: []u8) void {
+    std.os.getrandom(buffer) catch @panic("getrandom() failed to provide entropy");
+}
 
-pub fn init(seed: [seed_len]u8) void {
-    var initial_state: [std.crypto.core.Gimli.BLOCKBYTES]u8 = undefined;
-    mem.copy(u8, initial_state[0..seed_len], &seed);
-    mem.set(u8, initial_state[seed_len..], 0);
-    csprng_state = std.crypto.core.Gimli.init(initial_state);
+fn initAndFill(buffer: []u8) void {
+    var seed: [std.crypto.core.Gimli.BLOCKBYTES]u8 = undefined;
+    // Because we panic on getrandom() failing, we provide the opportunity
+    // to override the default seed function. This also makes
+    // `std.crypto.random` available on freestanding targets, provided that
+    // the `cryptoRandomSeed` function is provided.
+    if (@hasDecl(root, "cryptoRandomSeed")) {
+        root.cryptoRandomSeed(&seed);
+    } else {
+        fillWithOsEntropy(&seed);
+    }
+
+    wipe_me.gimli = std.crypto.core.Gimli.init(seed);
 
     // This is at the end so that accidental recursive dependencies result
     // in stack overflows instead of invalid random data.
-    csprng_state_initialized = true;
+    wipe_me.init_state = .initialized;
+
+    return fillWithCsprng(buffer);
 }
lib/std/os/bits/linux.zig
@@ -2045,3 +2045,25 @@ pub const rlimit = extern struct {
     /// Hard limit
     max: rlim_t,
 };
+
+pub const MADV_NORMAL = 0;
+pub const MADV_RANDOM = 1;
+pub const MADV_SEQUENTIAL = 2;
+pub const MADV_WILLNEED = 3;
+pub const MADV_DONTNEED = 4;
+pub const MADV_FREE = 8;
+pub const MADV_REMOVE = 9;
+pub const MADV_DONTFORK = 10;
+pub const MADV_DOFORK = 11;
+pub const MADV_MERGEABLE = 12;
+pub const MADV_UNMERGEABLE = 13;
+pub const MADV_HUGEPAGE = 14;
+pub const MADV_NOHUGEPAGE = 15;
+pub const MADV_DONTDUMP = 16;
+pub const MADV_DODUMP = 17;
+pub const MADV_WIPEONFORK = 18;
+pub const MADV_KEEPONFORK = 19;
+pub const MADV_COLD = 20;
+pub const MADV_PAGEOUT = 21;
+pub const MADV_HWPOISON = 100;
+pub const MADV_SOFT_OFFLINE = 101;
lib/std/os/linux/tls.zig
@@ -327,34 +327,43 @@ pub fn prepareTLS(area: []u8) usize {
         if (tls_tp_points_past_tcb) tls_image.data_offset else tls_image.tcb_offset;
 }
 
-var main_thread_tls_buffer: [256]u8 = undefined;
+// The main motivation for the size chosen here is this is how much ends up being
+// requested for the thread local variables of the std.crypto.random implementation.
+// I'm not sure why it ends up being so much; the struct itself is only 64 bytes.
+// I think it has to do with being page aligned and LLVM or LLD is not smart enough
+// to lay out the TLS data in a space conserving way. Anyway I think it's fine
+// because it's less than 3 pages of memory, and putting it in the ELF like this
+// is equivalent to moving the mmap call below into the kernel, avoiding syscall
+// overhead.
+var main_thread_tls_buffer: [0x2100]u8 align(mem.page_size) = undefined;
 
 pub fn initStaticTLS() void {
     initTLS();
 
-    const alloc_tls_area: []u8 = blk: {
-        const full_alloc_size = tls_image.alloc_size + tls_image.alloc_align - 1;
-
+    const tls_area = blk: {
         // Fast path for the common case where the TLS data is really small,
-        // avoid an allocation and use our local buffer
-        if (full_alloc_size < main_thread_tls_buffer.len)
-            break :blk main_thread_tls_buffer[0..];
+        // avoid an allocation and use our local buffer.
+        if (tls_image.alloc_align <= mem.page_size and
+            tls_image.alloc_size <= main_thread_tls_buffer.len)
+        {
+            break :blk main_thread_tls_buffer[0..tls_image.alloc_size];
+        }
 
-        break :blk os.mmap(
+        const alloc_tls_area = os.mmap(
             null,
-            full_alloc_size,
+            tls_image.alloc_size + tls_image.alloc_align - 1,
             os.PROT_READ | os.PROT_WRITE,
             os.MAP_PRIVATE | os.MAP_ANONYMOUS,
             -1,
             0,
         ) catch os.abort();
-    };
 
-    // Make sure the slice is correctly aligned
-    const begin_addr = @ptrToInt(alloc_tls_area.ptr);
-    const begin_aligned_addr = mem.alignForward(begin_addr, tls_image.alloc_align);
-    const start = begin_aligned_addr - begin_addr;
-    const tls_area = alloc_tls_area[start .. start + tls_image.alloc_size];
+        // Make sure the slice is correctly aligned.
+        const begin_addr = @ptrToInt(alloc_tls_area.ptr);
+        const begin_aligned_addr = mem.alignForward(begin_addr, tls_image.alloc_align);
+        const start = begin_aligned_addr - begin_addr;
+        break :blk alloc_tls_area[start .. start + tls_image.alloc_size];
+    };
 
     const tp_value = prepareTLS(tls_area);
     setThreadPointer(tp_value);
lib/std/os/linux.zig
@@ -1351,6 +1351,10 @@ pub fn prlimit(pid: pid_t, resource: rlimit_resource, new_limit: ?*const rlimit,
     );
 }
 
+pub fn madvise(address: [*]u8, len: usize, advice: u32) usize {
+    return syscall3(.madvise, @ptrToInt(address), len, advice);
+}
+
 test "" {
     if (builtin.os.tag == .linux) {
         _ = @import("linux/test.zig");
lib/std/c.zig
@@ -264,6 +264,11 @@ pub extern "c" fn pthread_attr_setguardsize(attr: *pthread_attr_t, guardsize: us
 pub extern "c" fn pthread_attr_destroy(attr: *pthread_attr_t) c_int;
 pub extern "c" fn pthread_self() pthread_t;
 pub extern "c" fn pthread_join(thread: pthread_t, arg_return: ?*?*c_void) c_int;
+pub extern "c" fn pthread_atfork(
+    prepare: ?fn () callconv(.C) void,
+    parent: ?fn () callconv(.C) void,
+    child: ?fn () callconv(.C) void,
+) c_int;
 
 pub extern "c" fn kqueue() c_int;
 pub extern "c" fn kevent(
lib/std/meta.zig
@@ -9,6 +9,7 @@ const debug = std.debug;
 const mem = std.mem;
 const math = std.math;
 const testing = std.testing;
+const root = @import("root");
 
 pub const trait = @import("meta/trait.zig");
 pub const TrailerFlags = @import("meta/trailer_flags.zig").TrailerFlags;
@@ -1085,3 +1086,10 @@ test "Tuple" {
     TupleTester.assertTuple(.{ u32, f16 }, Tuple(&[_]type{ u32, f16 }));
     TupleTester.assertTuple(.{ u32, f16, []const u8, void }, Tuple(&[_]type{ u32, f16, []const u8, void }));
 }
+
+/// TODO: https://github.com/ziglang/zig/issues/425
+pub fn globalOption(comptime name: []const u8, comptime T: type) ?T {
+    if (!@hasDecl(root, name))
+        return null;
+    return @as(T, @field(root, name));
+}
lib/std/os.zig
@@ -5845,3 +5845,51 @@ pub fn setrlimit(resource: rlimit_resource, limits: rlimit) SetrlimitError!void
         else => |err| return unexpectedErrno(err),
     }
 }
+
+pub const MadviseError = error{
+    /// advice is MADV_REMOVE, but the specified address range is not a shared writable mapping.
+    AccessDenied,
+    /// advice is MADV_HWPOISON, but the caller does not have the CAP_SYS_ADMIN capability.
+    PermissionDenied,
+    /// A kernel resource was temporarily unavailable.
+    SystemResources,
+    /// One of the following:
+    /// * addr is not page-aligned or length is negative
+    /// * advice is not valid
+    /// * advice is MADV_DONTNEED or MADV_REMOVE and the specified address range
+    ///   includes locked, Huge TLB pages, or VM_PFNMAP pages.
+    /// * advice is MADV_MERGEABLE or MADV_UNMERGEABLE, but the kernel was not
+    ///   configured with CONFIG_KSM.
+    /// * advice is MADV_FREE or MADV_WIPEONFORK but the specified address range
+    ///   includes file, Huge TLB, MAP_SHARED, or VM_PFNMAP ranges.
+    InvalidSyscall,
+    /// (for MADV_WILLNEED) Paging in this area would exceed the process's
+    /// maximum resident set size.
+    WouldExceedMaximumResidentSetSize,
+    /// One of the following:
+    /// * (for MADV_WILLNEED) Not enough memory: paging in failed.
+    /// * Addresses in the specified range are not currently mapped, or
+    ///   are outside the address space of the process.
+    OutOfMemory,
+    /// The madvise syscall is not available on this version and configuration
+    /// of the Linux kernel.
+    MadviseUnavailable,
+    /// The operating system returned an undocumented error code.
+    Unexpected,
+};
+
+/// Give advice about use of memory.
+/// This syscall is optional and is sometimes configured to be disabled.
+pub fn madvise(ptr: [*]align(mem.page_size) u8, length: usize, advice: u32) MadviseError!void {
+    switch (errno(system.madvise(ptr, length, advice))) {
+        0 => return,
+        EACCES => return error.AccessDenied,
+        EAGAIN => return error.SystemResources,
+        EBADF => unreachable, // The map exists, but the area maps something that isn't a file.
+        EINVAL => return error.InvalidSyscall,
+        EIO => return error.WouldExceedMaximumResidentSetSize,
+        ENOMEM => return error.OutOfMemory,
+        ENOSYS => return error.MadviseUnavailable,
+        else => |err| return unexpectedErrno(err),
+    }
+}
lib/std/start.zig
@@ -216,12 +216,6 @@ fn posixCallMainAndExit() noreturn {
             std.os.linux.tls.initStaticTLS();
         }
 
-        if (!@hasDecl(root, "use_AT_RANDOM_auxval") or root.use_AT_RANDOM_auxval) {
-            // Initialize the per-thread CSPRNG since Linux gave us the handy-dandy
-            // AT_RANDOM. This depends on the TLS initialization above.
-            initCryptoSeedFromAuxVal(std.os.linux.getauxval(std.elf.AT_RANDOM));
-        }
-
         // TODO This is disabled because what should we do when linking libc and this code
         // does not execute? And also it's causing a test failure in stack traces in release modes.
 
@@ -257,32 +251,12 @@ fn callMainWithArgs(argc: usize, argv: [*][*:0]u8, envp: [][*:0]u8) u8 {
 }
 
 fn main(c_argc: i32, c_argv: [*][*:0]u8, c_envp: [*:null]?[*:0]u8) callconv(.C) i32 {
-    // By default, we do not attempt to initialize tlcsprng from AT_RANDOM here because
-    // libc owns the start code, not us, and therefore libc owns the random bytes
-    // from AT_RANDOM.
-    if (builtin.os.tag == .linux and
-        @hasDecl(root, "use_AT_RANDOM_auxval") and
-        root.use_AT_RANDOM_auxval)
-    {
-        initCryptoSeedFromAuxVal(std.c.getauxval(std.elf.AT_RANDOM));
-    }
     var env_count: usize = 0;
     while (c_envp[env_count] != null) : (env_count += 1) {}
     const envp = @ptrCast([*][*:0]u8, c_envp)[0..env_count];
     return @call(.{ .modifier = .always_inline }, callMainWithArgs, .{ @intCast(usize, c_argc), c_argv, envp });
 }
 
-fn initCryptoSeedFromAuxVal(addr: usize) void {
-    if (addr == 0) return;
-    // "The address of sixteen bytes containing a random value."
-    const ptr = @intToPtr(*[16]u8, addr);
-    tlcsprng.init(ptr.*);
-    // Clear AT_RANDOM after we use it, otherwise our secure
-    // seed is sitting in memory ready for some other code in the
-    // program to reuse, and hence break our security.
-    std.crypto.utils.secureZero(u8, ptr);
-}
-
 // General error message for a malformed return type
 const bad_main_ret = "expected return type of main to be 'void', '!void', 'noreturn', 'u8', or '!u8'";
 
test/stack_traces.zig
@@ -282,7 +282,7 @@ pub fn addCases(cases: *tests.StackTracesContext) void {
                     \\source.zig:10:8: [address] in main (test)
                     \\    foo();
                     \\       ^
-                    \\start.zig:377:29: [address] in std.start.posixCallMainAndExit (test)
+                    \\start.zig:342:29: [address] in std.start.posixCallMainAndExit (test)
                     \\            return root.main();
                     \\                            ^
                     \\start.zig:163:5: [address] in std.start._start (test)