Commit b7af9edb8a

Andrew Kelley <superjoe30@gmail.com>
2018-04-14 08:12:19
add std.os.createThread
this adds kernel thread support to the standard library for linux. See #174
1 parent fa05cab
Changed files (7)
src/ir.cpp
@@ -18407,6 +18407,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdAddImplicitReturnType:
         case IrInstructionIdMergeErrRetTraces:
         case IrInstructionIdMarkErrRetTracePtr:
+        case IrInstructionIdAtomicRmw:
             return true;
 
         case IrInstructionIdPhi:
@@ -18487,7 +18488,6 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdCoroSize:
         case IrInstructionIdCoroSuspend:
         case IrInstructionIdCoroFree:
-        case IrInstructionIdAtomicRmw:
         case IrInstructionIdCoroPromise:
         case IrInstructionIdPromiseResultType:
             return false;
std/fmt/index.zig
@@ -8,25 +8,25 @@ const errol3 = @import("errol/index.zig").errol3;
 
 const max_int_digits = 65;
 
-const State = enum { // TODO put inside format function and make sure the name and debug info is correct
-    Start,
-    OpenBrace,
-    CloseBrace,
-    Integer,
-    IntegerWidth,
-    Float,
-    FloatWidth,
-    Character,
-    Buf,
-    BufWidth,
-};
-
 /// Renders fmt string with args, calling output with slices of bytes.
 /// If `output` returns an error, the error is returned from `format` and
 /// `output` is not called again.
 pub fn format(context: var, comptime Errors: type, output: fn(@typeOf(context), []const u8) Errors!void,
     comptime fmt: []const u8, args: ...) Errors!void
 {
+    const State = enum {
+        Start,
+        OpenBrace,
+        CloseBrace,
+        Integer,
+        IntegerWidth,
+        Float,
+        FloatWidth,
+        Character,
+        Buf,
+        BufWidth,
+    };
+
     comptime var start_index = 0;
     comptime var state = State.Start;
     comptime var next_arg = 0;
std/os/linux/index.zig
@@ -14,6 +14,22 @@ pub const STDIN_FILENO = 0;
 pub const STDOUT_FILENO = 1;
 pub const STDERR_FILENO = 2;
 
+pub const FUTEX_WAIT = 0;
+pub const FUTEX_WAKE = 1;
+pub const FUTEX_FD = 2;
+pub const FUTEX_REQUEUE = 3;
+pub const FUTEX_CMP_REQUEUE = 4;
+pub const FUTEX_WAKE_OP = 5;
+pub const FUTEX_LOCK_PI = 6;
+pub const FUTEX_UNLOCK_PI = 7;
+pub const FUTEX_TRYLOCK_PI = 8;
+pub const FUTEX_WAIT_BITSET = 9;
+
+pub const FUTEX_PRIVATE_FLAG = 128;
+
+pub const FUTEX_CLOCK_REALTIME = 256;
+
+
 pub const PROT_NONE      = 0;
 pub const PROT_READ      = 1;
 pub const PROT_WRITE     = 2;
@@ -652,6 +668,10 @@ pub fn fork() usize {
     return syscall0(SYS_fork);
 }
 
+pub fn futex_wait(uaddr: usize, futex_op: u32, val: i32, timeout: ?&timespec) usize {
+    return syscall4(SYS_futex, uaddr, futex_op, @bitCast(u32, val), @ptrToInt(timeout));
+}
+
 pub fn getcwd(buf: &u8, size: usize) usize {
     return syscall2(SYS_getcwd, @ptrToInt(buf), size);
 }
@@ -746,6 +766,16 @@ pub fn openat(dirfd: i32, path: &const u8, flags: usize, mode: usize) usize {
     return syscall4(SYS_openat, usize(dirfd), @ptrToInt(path), flags, mode);
 }
 
+/// See also `clone` (from the arch-specific include)
+pub fn clone5(flags: usize, child_stack_ptr: usize, parent_tid: &i32, child_tid: &i32, newtls: usize) usize {
+    return syscall5(SYS_clone, flags, child_stack_ptr, @ptrToInt(parent_tid), @ptrToInt(child_tid), newtls);
+}
+
+/// See also `clone` (from the arch-specific include)
+pub fn clone2(flags: usize, child_stack_ptr: usize) usize {
+    return syscall2(SYS_clone, flags, child_stack_ptr);
+}
+
 pub fn close(fd: i32) usize {
     return syscall1(SYS_close, usize(fd));
 }
std/os/linux/x86_64.zig
@@ -443,6 +443,9 @@ pub fn syscall6(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usiz
         : "rcx", "r11");
 }
 
+/// This matches the libc clone function.
+pub extern fn clone(func: extern fn(arg: usize) u8, stack: usize, flags: usize, arg: usize, ptid: &i32, tls: usize, ctid: &i32) usize;
+
 pub nakedcc fn restore_rt() void {
     return asm volatile ("syscall"
         :
std/os/index.zig
@@ -2384,3 +2384,132 @@ pub fn posixGetSockOptConnectError(sockfd: i32) PosixConnectError!void {
         posix.ENOTSOCK => unreachable, // The file descriptor sockfd does not refer to a socket.
     }
 }
+
+pub const Thread = struct {
+    pid: i32,
+    allocator: ?&mem.Allocator,
+    stack: []u8,
+
+    pub fn wait(self: &const Thread) void {
+        while (true) {
+            const pid_value = self.pid; // TODO atomic load
+            if (pid_value == 0) break;
+            const rc = linux.futex_wait(@ptrToInt(&self.pid), linux.FUTEX_WAIT, pid_value, null);
+            switch (linux.getErrno(rc)) {
+                0 => continue,
+                posix.EINTR => continue,
+                posix.EAGAIN => continue,
+                else => unreachable,
+            }
+        }
+        if (self.allocator) |a| {
+            a.free(self.stack);
+        }
+    }
+};
+
+pub const SpawnThreadError = error {
+    /// A system-imposed limit on the number of threads was encountered.
+    /// There are a number of limits that may trigger this error:
+    /// *  the  RLIMIT_NPROC soft resource limit (set via setrlimit(2)),
+    ///    which limits the number of processes and threads for  a  real
+    ///    user ID, was reached;
+    /// *  the kernel's system-wide limit on the number of processes and
+    ///    threads,  /proc/sys/kernel/threads-max,  was   reached   (see
+    ///    proc(5));
+    /// *  the  maximum  number  of  PIDs, /proc/sys/kernel/pid_max, was
+    ///    reached (see proc(5)); or
+    /// *  the PID limit (pids.max) imposed by the cgroup "process  num‐
+    ///    ber" (PIDs) controller was reached.
+    ThreadQuotaExceeded,
+
+    /// The kernel cannot allocate sufficient memory to allocate a task structure
+    /// for the child, or to copy those parts of the caller's context that need to
+    /// be copied.
+    SystemResources,
+
+    Unexpected,
+};
+
+pub const SpawnThreadAllocatorError = SpawnThreadError || error{OutOfMemory};
+
+/// caller must call wait on the returned thread
+/// fn startFn(@typeOf(context)) T
+/// where T is u8, noreturn, void, or !void
+pub fn spawnThreadAllocator(allocator: &mem.Allocator, context: var, comptime startFn: var) SpawnThreadAllocatorError!&Thread {
+    // TODO compile-time call graph analysis to determine stack upper bound
+    // https://github.com/zig-lang/zig/issues/157
+    const default_stack_size = 8 * 1024 * 1024;
+    const stack_bytes = try allocator.alloc(u8, default_stack_size);
+    const thread = try spawnThread(stack_bytes, context, startFn);
+    thread.allocator = allocator;
+    return thread;
+}
+
+/// stack must be big enough to store one Thread and one @typeOf(context), each with default alignment, at the end
+/// fn startFn(@typeOf(context)) T
+/// where T is u8, noreturn, void, or !void
+/// caller must call wait on the returned thread
+pub fn spawnThread(stack: []u8, context: var, comptime startFn: var) SpawnThreadError!&Thread {
+    const Context = @typeOf(context);
+    comptime assert(@ArgType(@typeOf(startFn), 0) == Context);
+
+    var stack_end: usize = @ptrToInt(stack.ptr) + stack.len;
+    var arg: usize = undefined;
+    if (@sizeOf(Context) != 0) {
+        stack_end -= @sizeOf(Context);
+        stack_end -= stack_end % @alignOf(Context);
+        assert(stack_end >= @ptrToInt(stack.ptr));
+        const context_ptr = @alignCast(@alignOf(Context), @intToPtr(&Context, stack_end));
+        *context_ptr = context;
+        arg = stack_end;
+    }
+
+    stack_end -= @sizeOf(Thread);
+    stack_end -= stack_end % @alignOf(Thread);
+    assert(stack_end >= @ptrToInt(stack.ptr));
+    const thread_ptr = @alignCast(@alignOf(Thread), @intToPtr(&Thread, stack_end));
+    thread_ptr.stack = stack;
+    thread_ptr.allocator = null;
+
+    const threadMain = struct {
+        extern fn threadMain(ctx_addr: usize) u8 {
+            if (@sizeOf(Context) == 0) {
+                return startFn({});
+            } else {
+                return startFn(*@intToPtr(&const Context, ctx_addr));
+            }
+        }
+    }.threadMain;
+
+    const flags = posix.CLONE_VM | posix.CLONE_FS | posix.CLONE_FILES | posix.CLONE_SIGHAND
+        | posix.CLONE_THREAD | posix.CLONE_SYSVSEM // | posix.CLONE_SETTLS
+        | posix.CLONE_PARENT_SETTID | posix.CLONE_CHILD_CLEARTID | posix.CLONE_DETACHED;
+    const newtls: usize = 0;
+    const rc = posix.clone(threadMain, stack_end, flags, arg, &thread_ptr.pid, newtls, &thread_ptr.pid);
+    const err = posix.getErrno(rc);
+    switch (err) {
+        0 => return thread_ptr,
+        posix.EAGAIN => return SpawnThreadError.ThreadQuotaExceeded,
+        posix.EINVAL => unreachable,
+        posix.ENOMEM => return SpawnThreadError.SystemResources,
+        posix.ENOSPC => unreachable,
+        posix.EPERM => unreachable,
+        posix.EUSERS => unreachable,
+        else => return unexpectedErrorPosix(err),
+    }
+}
+
+pub fn posixWait(pid: i32) i32 {
+    var status: i32 = undefined;
+    while (true) {
+        const err = posix.getErrno(posix.waitpid(pid, &status, 0));
+        switch (err) {
+            0 => return status,
+            posix.EINTR => continue,
+            posix.ECHILD => unreachable, // The process specified does not exist. It would be a race condition to handle this error.
+            posix.EINVAL => unreachable, // The options argument was invalid
+            else => unreachable,
+        }
+    }
+}
std/os/test.zig
@@ -6,6 +6,8 @@ const io = std.io;
 const a = std.debug.global_allocator;
 
 const builtin = @import("builtin");
+const AtomicRmwOp = builtin.AtomicRmwOp;
+const AtomicOrder = builtin.AtomicOrder;
 
 test "makePath, put some files in it, deleteTree" {
     if (builtin.os == builtin.Os.windows) {
@@ -40,3 +42,40 @@ test "access file" {
     assert((try os.File.access(a, "os_test_tmp/file.txt", os.default_file_mode)) == true);
     try os.deleteTree(a, "os_test_tmp");
 }
+
+test "spawn threads" {
+    if (builtin.os != builtin.Os.linux) {
+        // TODO implement threads on macos and windows
+        return;
+    }
+
+    var direct_allocator = std.heap.DirectAllocator.init();
+    defer direct_allocator.deinit();
+
+    var shared_ctx: i32 = 1;
+
+    const thread1 = try std.os.spawnThreadAllocator(&direct_allocator.allocator, {}, start1);
+    const thread4 = try std.os.spawnThreadAllocator(&direct_allocator.allocator, &shared_ctx, start2);
+
+    var stack1: [1024]u8 = undefined;
+    var stack2: [1024]u8 = undefined;
+
+    const thread2 = try std.os.spawnThread(stack1[0..], &shared_ctx, start2);
+    const thread3 = try std.os.spawnThread(stack2[0..], &shared_ctx, start2);
+
+    thread1.wait();
+    thread2.wait();
+    thread3.wait();
+    thread4.wait();
+
+    assert(shared_ctx == 4);
+}
+
+fn start1(ctx: void) u8 {
+    return 0;
+}
+
+fn start2(ctx: &i32) u8 {
+    _ = @atomicRmw(i32, ctx, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+    return 0;
+}
std/special/builtin.zig
@@ -57,11 +57,46 @@ comptime {
     if (builtin.mode != builtin.Mode.ReleaseFast and builtin.os != builtin.Os.windows) {
         @export("__stack_chk_fail", __stack_chk_fail, builtin.GlobalLinkage.Strong);
     }
+    if (builtin.os == builtin.Os.linux and builtin.arch == builtin.Arch.x86_64) {
+        @export("clone", clone, builtin.GlobalLinkage.Strong);
+    }
 }
 extern fn __stack_chk_fail() noreturn {
     @panic("stack smashing detected");
 }
 
+// TODO we should be able to put this directly in std/linux/x86_64.zig but
+// it causes a segfault in release mode. this is a workaround of calling it
+// across .o file boundaries. fix comptime @ptrCast of nakedcc functions.
+nakedcc fn clone() void {
+    asm volatile (
+        \\      xor %%eax,%%eax
+        \\      mov $56,%%al
+        \\      mov %%rdi,%%r11
+        \\      mov %%rdx,%%rdi
+        \\      mov %%r8,%%rdx
+        \\      mov %%r9,%%r8
+        \\      mov 8(%%rsp),%%r10
+        \\      mov %%r11,%%r9
+        \\      and $-16,%%rsi
+        \\      sub $8,%%rsi
+        \\      mov %%rcx,(%%rsi)
+        \\      syscall
+        \\      test %%eax,%%eax
+        \\      jnz 1f
+        \\      xor %%ebp,%%ebp
+        \\      pop %%rdi
+        \\      call *%%r9
+        \\      mov %%eax,%%edi
+        \\      xor %%eax,%%eax
+        \\      mov $60,%%al
+        \\      syscall
+        \\      hlt
+        \\1:    ret
+        \\
+    );
+}
+
 const math = @import("../math/index.zig");
 
 export fn fmodf(x: f32, y: f32) f32 { return generic_fmod(f32, x, y); }