Commit be7cacfbbe

LemonBoy <thatlemon@gmail.com>
2019-05-07 00:49:49
Implement stack probes for x86/x86_64
Enabled on non-Windows systems only since it already requires stack probes.
1 parent 24ee765
Changed files (5)
src/codegen.cpp
@@ -399,6 +399,15 @@ static void add_uwtable_attr(CodeGen *g, LLVMValueRef fn_val) {
     }
 }
 
+static void add_probe_stack_attr(CodeGen *g, LLVMValueRef fn_val) {
+    // Windows already emits its own stack probes
+    if (g->zig_target->os != OsWindows &&
+        (g->zig_target->arch == ZigLLVM_x86 ||
+         g->zig_target->arch == ZigLLVM_x86_64)) {
+        addLLVMFnAttrStr(fn_val, "probe-stack", "__zig_probe_stack");
+    }
+}
+
 static LLVMLinkage to_llvm_linkage(GlobalLinkageId id) {
     switch (id) {
         case GlobalLinkageIdInternal:
@@ -587,6 +596,8 @@ static LLVMValueRef fn_llvm_value(CodeGen *g, ZigFn *fn_table_entry) {
                 addLLVMFnAttr(fn_table_entry->llvm_value, "sspstrong");
                 addLLVMFnAttrStr(fn_table_entry->llvm_value, "stack-protector-buffer-size", "4");
             }
+
+            add_probe_stack_attr(g, fn_table_entry->llvm_value);
         }
     } else {
         maybe_import_dll(g, fn_table_entry->llvm_value, linkage);
src/main.cpp
@@ -18,6 +18,12 @@
 
 #include <stdio.h>
 
+// Define this symbol here so that we can link with the zig objects during the
+// compiler bootstrap phase
+extern "C" {
+    void __zig_probe_stack(void) { }
+}
+
 static int print_error_usage(const char *arg0) {
     fprintf(stderr, "See `%s --help` for detailed usage information\n", arg0);
     return EXIT_FAILURE;
std/special/compiler_rt/stack_probe.zig
@@ -0,0 +1,206 @@
+const builtin = @import("builtin");
+
+// Zig's own stack-probe routine (available only on x86 and x86_64)
+pub nakedcc fn zig_probe_stack() void {
+    @setRuntimeSafety(false);
+
+    // Versions of the Linux kernel before 5.1 treat any access below SP as
+    // invalid so let's update it on the go, otherwise we'll get a segfault
+    // instead of triggering the stack growth.
+
+    switch (builtin.arch) {
+        .x86_64 => {
+            // %rax = probe length, %rsp = stack pointer
+            asm volatile (
+                \\        push   %%rcx
+                \\        mov    %%rax, %%rcx
+                \\        cmp    $0x1000,%%rcx
+                \\        jb     2f
+                \\ 1:
+                \\        sub    $0x1000,%%rsp
+                \\        orl    $0,16(%%rsp)
+                \\        sub    $0x1000,%%rcx
+                \\        cmp    $0x1000,%%rcx
+                \\        ja     1b
+                \\ 2:
+                \\        sub    %%rcx, %%rsp
+                \\        orl    $0,16(%%rsp)
+                \\        add    %%rax,%%rsp
+                \\        pop    %%rcx
+                \\        ret
+            );
+        },
+        .i386 => {
+            // %eax = probe length, %esp = stack pointer
+            asm volatile (
+                \\        push   %%ecx
+                \\        mov    %%eax, %%ecx
+                \\        cmp    $0x1000,%%ecx
+                \\        jb     2f
+                \\ 1:
+                \\        sub    $0x1000,%%esp
+                \\        orl    $0,8(%%esp)
+                \\        sub    $0x1000,%%ecx
+                \\        cmp    $0x1000,%%ecx
+                \\        ja     1b
+                \\ 2:
+                \\        sub    %%ecx, %%esp
+                \\        orl    $0,8(%%esp)
+                \\        add    %%eax,%%esp
+                \\        pop    %%ecx
+                \\        ret
+            );
+        },
+        else => { }
+    }
+
+    unreachable;
+}
+
+fn win_probe_stack_only() void {
+    @setRuntimeSafety(false);
+
+    switch (builtin.arch) {
+        .x86_64 => {
+            asm volatile (
+                \\         push   %%rcx
+                \\         push   %%rax
+                \\         cmp    $0x1000,%%rax
+                \\         lea    24(%%rsp),%%rcx
+                \\         jb     1f
+                \\ 2:
+                \\         sub    $0x1000,%%rcx
+                \\         test   %%rcx,(%%rcx)
+                \\         sub    $0x1000,%%rax
+                \\         cmp    $0x1000,%%rax
+                \\         ja     2b
+                \\ 1:
+                \\         sub    %%rax,%%rcx
+                \\         test   %%rcx,(%%rcx)
+                \\         pop    %%rax
+                \\         pop    %%rcx
+                \\         ret
+            );
+        },
+        .i386 => {
+            asm volatile (
+                \\         push   %%ecx
+                \\         push   %%eax
+                \\         cmp    $0x1000,%%eax
+                \\         lea    12(%%esp),%%ecx
+                \\         jb     1f
+                \\ 2:
+                \\         sub    $0x1000,%%ecx
+                \\         test   %%ecx,(%%ecx)
+                \\         sub    $0x1000,%%eax
+                \\         cmp    $0x1000,%%eax
+                \\         ja     2b
+                \\ 1:
+                \\         sub    %%eax,%%ecx
+                \\         test   %%ecx,(%%ecx)
+                \\         pop    %%eax
+                \\         pop    %%ecx
+                \\         ret
+            );
+        },
+        else => { }
+    }
+
+    unreachable;
+}
+
+fn win_probe_stack_adjust_sp() void {
+    @setRuntimeSafety(false);
+
+    switch (builtin.arch) {
+        .x86_64 => {
+            asm volatile (
+                \\         push   %%rcx
+                \\         cmp    $0x1000,%%rax
+                \\         lea    16(%%rsp),%%rcx
+                \\         jb     1f
+                \\ 2:
+                \\         sub    $0x1000,%%rcx
+                \\         test   %%rcx,(%%rcx)
+                \\         sub    $0x1000,%%rax
+                \\         cmp    $0x1000,%%rax
+                \\         ja     2b
+                \\ 1:
+                \\         sub    %%rax,%%rcx
+                \\         test   %%rcx,(%%rcx)
+                \\
+                \\         lea    8(%%rsp),%%rax
+                \\         mov    %%rcx,%%rsp
+                \\         mov    -8(%%rax),%%rcx
+                \\         push   (%%rax)
+                \\         sub    %%rsp,%%rax
+                \\         ret
+            );
+        },
+        .i386 => {
+            asm volatile (
+                \\         push   %%ecx
+                \\         cmp    $0x1000,%%eax
+                \\         lea    8(%%esp),%%ecx
+                \\         jb     1f
+                \\ 2:
+                \\         sub    $0x1000,%%ecx
+                \\         test   %%ecx,(%%ecx)
+                \\         sub    $0x1000,%%eax
+                \\         cmp    $0x1000,%%eax
+                \\         ja     2b
+                \\ 1:
+                \\         sub    %%eax,%%ecx
+                \\         test   %%ecx,(%%ecx)
+                \\
+                \\         lea    4(%%esp),%%eax
+                \\         mov    %%ecx,%%esp
+                \\         mov    -4(%%eax),%%ecx
+                \\         push   (%%eax)
+                \\         sub    %%esp,%%eax
+                \\         ret
+                );
+        },
+        else => { },
+    }
+
+    unreachable;
+}
+
+// Windows has a multitude of stack-probing functions with similar names and
+// slightly different behaviours: some behave as alloca() and update the stack
+// pointer after probing the stack, other do not.
+//
+// Function name        | Adjusts the SP? |
+//                      | x86    | x86_64 |
+// ----------------------------------------
+// _chkstk (_alloca)    | yes    | yes    |
+// __chkstk             | yes    | no     |
+// __chkstk_ms          | no     | no     |
+// ___chkstk (__alloca) | yes    | yes    |
+// ___chkstk_ms         | no     | no     |
+
+pub nakedcc fn _chkstk() void {
+    @setRuntimeSafety(false);
+    @inlineCall(win_probe_stack_adjust_sp);
+}
+pub nakedcc fn __chkstk() void {
+    @setRuntimeSafety(false);
+    switch (builtin.arch) {
+        .i386 => @inlineCall(win_probe_stack_adjust_sp),
+        .x86_64 => @inlineCall(win_probe_stack_only),
+        else => unreachable
+    }
+}
+pub nakedcc fn ___chkstk() void {
+    @setRuntimeSafety(false);
+    @inlineCall(win_probe_stack_adjust_sp);
+}
+pub nakedcc fn __chkstk_ms() void {
+    @setRuntimeSafety(false);
+    @inlineCall(win_probe_stack_only);
+}
+pub nakedcc fn ___chkstk_ms() void {
+    @setRuntimeSafety(false);
+    @inlineCall(win_probe_stack_only);
+}
std/special/compiler_rt.zig
@@ -1,10 +1,17 @@
 const builtin = @import("builtin");
 const is_test = builtin.is_test;
 
+const stack_probe = @import("compiler_rt/stack_probe.zig");
+
 comptime {
     const linkage = if (is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Weak;
     const strong_linkage = if (is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Strong;
 
+    switch (builtin.arch) {
+        .i386, .x86_64 => @export("__zig_probe_stack", @import("compiler_rt/stack_probe.zig").zig_probe_stack, linkage),
+        else => { }
+    }
+
     @export("__lesf2", @import("compiler_rt/comparesf2.zig").__lesf2, linkage);
     @export("__ledf2", @import("compiler_rt/comparedf2.zig").__ledf2, linkage);
     @export("__letf2", @import("compiler_rt/comparetf2.zig").__letf2, linkage);
@@ -191,20 +198,20 @@ comptime {
         @export("__aeabi_dcmpun", @import("compiler_rt/comparedf2.zig").__unorddf2, linkage);
     }
     if (builtin.os == builtin.Os.windows) {
+        if (!builtin.link_libc) {
+            @export("_chkstk", @import("compiler_rt/stack_probe.zig")._chkstk, strong_linkage);
+            @export("__chkstk", @import("compiler_rt/stack_probe.zig").__chkstk, strong_linkage);
+            @export("___chkstk", @import("compiler_rt/stack_probe.zig").___chkstk, strong_linkage);
+            @export("__chkstk_ms", @import("compiler_rt/stack_probe.zig").__chkstk_ms, strong_linkage);
+            @export("___chkstk_ms", @import("compiler_rt/stack_probe.zig").___chkstk_ms, strong_linkage);
+        }
+
         switch (builtin.arch) {
             builtin.Arch.i386 => {
-                if (!builtin.link_libc) {
-                    @export("_chkstk", _chkstk, strong_linkage);
-                    @export("__chkstk_ms", __chkstk_ms, linkage);
-                }
                 @export("_aulldiv", @import("compiler_rt/aulldiv.zig")._aulldiv, strong_linkage);
                 @export("_aullrem", @import("compiler_rt/aullrem.zig")._aullrem, strong_linkage);
             },
             builtin.Arch.x86_64 => {
-                if (!builtin.link_libc) {
-                    @export("__chkstk", __chkstk, strong_linkage);
-                    @export("___chkstk_ms", ___chkstk_ms, linkage);
-                }
                 // The "ti" functions must use @Vector(2, u64) parameter types to adhere to the ABI
                 // that LLVM expects compiler-rt to have.
                 @export("__divti3", @import("compiler_rt/divti3.zig").__divti3_windows_x86_64, linkage);
@@ -492,108 +499,6 @@ nakedcc fn __aeabi_memcmp() noreturn {
     unreachable;
 }
 
-// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments,
-// then decrement %esp by %eax.  Preserves all registers except %esp and flags.
-// This routine is windows specific
-// http://msdn.microsoft.com/en-us/library/ms648426.aspx
-nakedcc fn _chkstk() align(4) void {
-    @setRuntimeSafety(false);
-
-    asm volatile (
-        \\         push   %%ecx
-        \\         push   %%eax
-        \\         cmp    $0x1000,%%eax
-        \\         lea    12(%%esp),%%ecx
-        \\         jb     1f
-        \\ 2:
-        \\         sub    $0x1000,%%ecx
-        \\         test   %%ecx,(%%ecx)
-        \\         sub    $0x1000,%%eax
-        \\         cmp    $0x1000,%%eax
-        \\         ja     2b
-        \\ 1:
-        \\         sub    %%eax,%%ecx
-        \\         test   %%ecx,(%%ecx)
-        \\         pop    %%eax
-        \\         pop    %%ecx
-        \\         ret
-    );
-}
-
-nakedcc fn __chkstk() align(4) void {
-    @setRuntimeSafety(false);
-
-    asm volatile (
-        \\        push   %%rcx
-        \\        push   %%rax
-        \\        cmp    $0x1000,%%rax
-        \\        lea    24(%%rsp),%%rcx
-        \\        jb     1f
-        \\2:
-        \\        sub    $0x1000,%%rcx
-        \\        test   %%rcx,(%%rcx)
-        \\        sub    $0x1000,%%rax
-        \\        cmp    $0x1000,%%rax
-        \\        ja     2b
-        \\1:
-        \\        sub    %%rax,%%rcx
-        \\        test   %%rcx,(%%rcx)
-        \\        pop    %%rax
-        \\        pop    %%rcx
-        \\        ret
-    );
-}
-
-// _chkstk routine
-// This routine is windows specific
-// http://msdn.microsoft.com/en-us/library/ms648426.aspx
-nakedcc fn __chkstk_ms() align(4) void {
-    @setRuntimeSafety(false);
-
-    asm volatile (
-        \\         push   %%ecx
-        \\         push   %%eax
-        \\         cmp    $0x1000,%%eax
-        \\         lea    12(%%esp),%%ecx
-        \\         jb     1f
-        \\ 2:
-        \\         sub    $0x1000,%%ecx
-        \\         test   %%ecx,(%%ecx)
-        \\         sub    $0x1000,%%eax
-        \\         cmp    $0x1000,%%eax
-        \\         ja     2b
-        \\ 1:
-        \\         sub    %%eax,%%ecx
-        \\         test   %%ecx,(%%ecx)
-        \\         pop    %%eax
-        \\         pop    %%ecx
-        \\         ret
-    );
-}
-
-nakedcc fn ___chkstk_ms() align(4) void {
-    @setRuntimeSafety(false);
-
-    asm volatile (
-        \\        push   %%rcx
-        \\        push   %%rax
-        \\        cmp    $0x1000,%%rax
-        \\        lea    24(%%rsp),%%rcx
-        \\        jb     1f
-        \\2:
-        \\        sub    $0x1000,%%rcx
-        \\        test   %%rcx,(%%rcx)
-        \\        sub    $0x1000,%%rax
-        \\        cmp    $0x1000,%%rax
-        \\        ja     2b
-        \\1:
-        \\        sub    %%rax,%%rcx
-        \\        test   %%rcx,(%%rcx)
-        \\        pop    %%rax
-        \\        pop    %%rcx
-        \\        ret
-    );
-}
 
 extern fn __divmodsi4(a: i32, b: i32, rem: *i32) i32 {
     @setRuntimeSafety(is_test);
CMakeLists.txt
@@ -644,6 +644,7 @@ set(ZIG_STD_FILES
     "special/build_runner.zig"
     "special/builtin.zig"
     "special/compiler_rt.zig"
+    "special/compiler_rt/stack_probe.zig"
     "special/compiler_rt/arm/aeabi_fcmp.zig"
     "special/compiler_rt/arm/aeabi_dcmp.zig"
     "special/compiler_rt/addXf3.zig"