Commit d0575310dc

Andrew Kelley <andrew@ziglang.org>
2021-06-21 02:01:39
Merge pull request #9168 from LemonBoy/fix-pie
std: Fix PIE startup sequence
1 parent b72d55e
Changed files (9)
lib/std/os/linux/start_pie.zig
@@ -8,33 +8,35 @@ const R_386_RELATIVE = 8;
 const R_ARM_RELATIVE = 23;
 const R_AARCH64_RELATIVE = 1027;
 const R_RISCV_RELATIVE = 3;
+const R_SPARC_RELATIVE = 22;
 
-const ARCH_RELATIVE_RELOC = switch (builtin.cpu.arch) {
+const R_RELATIVE = switch (builtin.cpu.arch) {
     .i386 => R_386_RELATIVE,
     .x86_64 => R_AMD64_RELATIVE,
     .arm => R_ARM_RELATIVE,
     .aarch64 => R_AARCH64_RELATIVE,
     .riscv64 => R_RISCV_RELATIVE,
-    else => @compileError("unsupported architecture"),
+    else => @compileError("Missing R_RELATIVE definition for this target"),
 };
 
-// Just a convoluted (but necessary) way to obtain the address of the _DYNAMIC[]
-// vector as PC-relative so that we can use it before any relocation is applied
+// Obtain a pointer to the _DYNAMIC array.
+// We have to compute its address as a PC-relative quantity not to require a
+// relocation that, at this point, is not yet applied.
 fn getDynamicSymbol() [*]elf.Dyn {
-    const addr = switch (builtin.cpu.arch) {
+    return switch (builtin.cpu.arch) {
         .i386 => asm volatile (
             \\ .weak _DYNAMIC
             \\ .hidden _DYNAMIC
             \\ call 1f
             \\ 1: pop %[ret]
             \\ lea _DYNAMIC-1b(%[ret]), %[ret]
-            : [ret] "=r" (-> usize)
+            : [ret] "=r" (-> [*]elf.Dyn)
         ),
         .x86_64 => asm volatile (
             \\ .weak _DYNAMIC
             \\ .hidden _DYNAMIC
             \\ lea _DYNAMIC(%%rip), %[ret]
-            : [ret] "=r" (-> usize)
+            : [ret] "=r" (-> [*]elf.Dyn)
         ),
         // Work around the limited offset range of `ldr`
         .arm => asm volatile (
@@ -45,7 +47,7 @@ fn getDynamicSymbol() [*]elf.Dyn {
             \\ b 2f
             \\ 1: .word _DYNAMIC-1b
             \\ 2:
-            : [ret] "=r" (-> usize)
+            : [ret] "=r" (-> [*]elf.Dyn)
         ),
         // A simple `adr` is not enough as it has a limited offset range
         .aarch64 => asm volatile (
@@ -53,61 +55,39 @@ fn getDynamicSymbol() [*]elf.Dyn {
             \\ .hidden _DYNAMIC
             \\ adrp %[ret], _DYNAMIC
             \\ add %[ret], %[ret], #:lo12:_DYNAMIC
-            : [ret] "=r" (-> usize)
+            : [ret] "=r" (-> [*]elf.Dyn)
         ),
         .riscv64 => asm volatile (
             \\ .weak _DYNAMIC
             \\ .hidden _DYNAMIC
             \\ lla %[ret], _DYNAMIC
-            : [ret] "=r" (-> usize)
+            : [ret] "=r" (-> [*]elf.Dyn)
         ),
-        else => @compileError("???"),
+        else => {
+            @compileError("PIE startup is not yet supported for this target!");
+        },
     };
-    return @intToPtr([*]elf.Dyn, addr);
 }
 
-pub fn apply_relocations() void {
+pub fn relocate(phdrs: []elf.Phdr) void {
     @setRuntimeSafety(false);
 
     const dynv = getDynamicSymbol();
-    const auxv = std.os.linux.elf_aux_maybe.?;
-    var at_phent: usize = undefined;
-    var at_phnum: usize = undefined;
-    var at_phdr: usize = undefined;
-    var at_hwcap: usize = undefined;
-
-    {
-        var i: usize = 0;
-        while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
-            switch (auxv[i].a_type) {
-                elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
-                elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
-                elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
-                else => continue,
-            }
-        }
-    }
-
-    // Sanity check
-    assert(at_phent == @sizeOf(elf.Phdr));
-
-    // Search the TLS section
-    const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
-
-    const base_addr = blk: {
+    // Recover the delta applied by the loader by comparing the effective and
+    // the theoretical load addresses for the `_DYNAMIC` symbol.
+    const base_addr = base: {
         for (phdrs) |*phdr| {
-            if (phdr.p_type == elf.PT_DYNAMIC) {
-                break :blk @ptrToInt(&dynv[0]) - phdr.p_vaddr;
-            }
+            if (phdr.p_type != elf.PT_DYNAMIC) continue;
+            break :base @ptrToInt(dynv) - phdr.p_vaddr;
         }
-        unreachable;
+        // This is not supposed to happen for well-formed binaries.
+        std.os.abort();
     };
 
     var rel_addr: usize = 0;
     var rela_addr: usize = 0;
     var rel_size: usize = 0;
     var rela_size: usize = 0;
-
     {
         var i: usize = 0;
         while (dynv[i].d_tag != elf.DT_NULL) : (i += 1) {
@@ -121,18 +101,18 @@ pub fn apply_relocations() void {
         }
     }
 
-    // Perform the relocations
+    // Apply the relocations.
     if (rel_addr != 0) {
         const rel = std.mem.bytesAsSlice(elf.Rel, @intToPtr([*]u8, rel_addr)[0..rel_size]);
         for (rel) |r| {
-            if (r.r_type() != ARCH_RELATIVE_RELOC) continue;
+            if (r.r_type() != R_RELATIVE) continue;
             @intToPtr(*usize, base_addr + r.r_offset).* += base_addr;
         }
     }
     if (rela_addr != 0) {
         const rela = std.mem.bytesAsSlice(elf.Rela, @intToPtr([*]u8, rela_addr)[0..rela_size]);
         for (rela) |r| {
-            if (r.r_type() != ARCH_RELATIVE_RELOC) continue;
+            if (r.r_type() != R_RELATIVE) continue;
             @intToPtr(*usize, base_addr + r.r_offset).* += base_addr + @bitCast(usize, r.r_addend);
         }
     }
lib/std/os/linux/tls.zig
@@ -190,53 +190,18 @@ pub fn setThreadPointer(addr: usize) void {
     }
 }
 
-fn initTLS() void {
+fn initTLS(phdrs: []elf.Phdr) void {
     var tls_phdr: ?*elf.Phdr = null;
     var img_base: usize = 0;
 
-    const auxv = std.os.linux.elf_aux_maybe.?;
-    var at_phent: usize = undefined;
-    var at_phnum: usize = undefined;
-    var at_phdr: usize = undefined;
-    var at_hwcap: usize = undefined;
-
-    var i: usize = 0;
-    while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
-        switch (auxv[i].a_type) {
-            elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
-            elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
-            elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
-            elf.AT_HWCAP => at_hwcap = auxv[i].a_un.a_val,
-            else => continue,
-        }
-    }
-
-    // Sanity check
-    assert(at_phent == @sizeOf(elf.Phdr));
-
-    // Find the TLS section
-    const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
-
     for (phdrs) |*phdr| {
         switch (phdr.p_type) {
-            elf.PT_PHDR => img_base = at_phdr - phdr.p_vaddr,
+            elf.PT_PHDR => img_base = @ptrToInt(phdrs.ptr) - phdr.p_vaddr,
             elf.PT_TLS => tls_phdr = phdr,
             else => {},
         }
     }
 
-    // ARMv6 targets (and earlier) have no support for TLS in hardware
-    // FIXME: Elide the check for targets >= ARMv7 when the target feature API
-    // becomes less verbose (and more usable).
-    if (comptime native_arch.isARM()) {
-        if (at_hwcap & std.os.linux.HWCAP_TLS == 0) {
-            // FIXME: Make __aeabi_read_tp call the kernel helper kuser_get_tls
-            // For the time being use a simple abort instead of a @panic call to
-            // keep the binary bloat under control.
-            std.os.abort();
-        }
-    }
-
     var tls_align_factor: usize = undefined;
     var tls_data: []const u8 = undefined;
     var tls_data_alloc_size: usize = undefined;
@@ -344,8 +309,8 @@ pub fn prepareTLS(area: []u8) usize {
 // overhead.
 var main_thread_tls_buffer: [0x2100]u8 align(mem.page_size) = undefined;
 
-pub fn initStaticTLS() void {
-    initTLS();
+pub fn initStaticTLS(phdrs: []elf.Phdr) void {
+    initTLS(phdrs);
 
     const tls_area = blk: {
         // Fast path for the common case where the TLS data is really small,
lib/std/os/linux.zig
@@ -34,6 +34,7 @@ pub usingnamespace switch (native_arch) {
 };
 pub usingnamespace @import("bits.zig");
 pub const tls = @import("linux/tls.zig");
+pub const pie = @import("linux/start_pie.zig");
 pub const BPF = @import("linux/bpf.zig");
 pub usingnamespace @import("linux/io_uring.zig");
 
lib/std/start.zig
@@ -10,6 +10,7 @@ const std = @import("std.zig");
 const builtin = @import("builtin");
 const assert = std.debug.assert;
 const uefi = std.os.uefi;
+const elf = std.elf;
 const tlcsprng = @import("crypto/tlcsprng.zig");
 const native_arch = builtin.cpu.arch;
 const native_os = builtin.os.tag;
@@ -281,49 +282,60 @@ fn posixCallMainAndExit() noreturn {
 
     if (native_os == .linux) {
         // Find the beginning of the auxiliary vector
-        const auxv = @ptrCast([*]std.elf.Auxv, @alignCast(@alignOf(usize), envp.ptr + envp_count + 1));
+        const auxv = @ptrCast([*]elf.Auxv, @alignCast(@alignOf(usize), envp.ptr + envp_count + 1));
         std.os.linux.elf_aux_maybe = auxv;
 
-        // Do this as early as possible, the aux vector is needed
+        var at_hwcap: usize = 0;
+        const phdrs = init: {
+            var i: usize = 0;
+            var at_phdr: usize = 0;
+            var at_phnum: usize = 0;
+            while (auxv[i].a_type != elf.AT_NULL) : (i += 1) {
+                switch (auxv[i].a_type) {
+                    elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
+                    elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
+                    elf.AT_HWCAP => at_hwcap = auxv[i].a_un.a_val,
+                    else => continue,
+                }
+            }
+            break :init @intToPtr([*]elf.Phdr, at_phdr)[0..at_phnum];
+        };
+
+        // Apply the initial relocations as early as possible in the startup
+        // process.
         if (builtin.position_independent_executable) {
-            @import("os/linux/start_pie.zig").apply_relocations();
+            std.os.linux.pie.relocate(phdrs);
         }
 
-        // Initialize the TLS area. We do a runtime check here to make sure
-        // this code is truly being statically executed and not inside a dynamic
-        // loader, otherwise this would clobber the thread ID register.
-        const is_dynamic = @import("dynamic_library.zig").get_DYNAMIC() != null;
-        if (!is_dynamic) {
-            std.os.linux.tls.initStaticTLS();
+        // ARMv6 targets (and earlier) have no support for TLS in hardware.
+        // FIXME: Elide the check for targets >= ARMv7 when the target feature API
+        // becomes less verbose (and more usable).
+        if (comptime native_arch.isARM()) {
+            if (at_hwcap & std.os.linux.HWCAP_TLS == 0) {
+                // FIXME: Make __aeabi_read_tp call the kernel helper kuser_get_tls
+                // For the time being use a simple abort instead of a @panic call to
+                // keep the binary bloat under control.
+                std.os.abort();
+            }
         }
 
+        // Initialize the TLS area.
+        std.os.linux.tls.initStaticTLS(phdrs);
+
         // The way Linux executables represent stack size is via the PT_GNU_STACK
         // program header. However the kernel does not recognize it; it always gives 8 MiB.
         // Here we look for the stack size in our program headers and use setrlimit
         // to ask for more stack space.
-        {
-            var i: usize = 0;
-            var at_phdr: usize = undefined;
-            var at_phnum: usize = undefined;
-            while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
-                switch (auxv[i].a_type) {
-                    std.elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
-                    std.elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
-                    else => continue,
-                }
-            }
-            expandStackSize(at_phdr, at_phnum);
-        }
+        expandStackSize(phdrs);
     }
 
     std.os.exit(@call(.{ .modifier = .always_inline }, callMainWithArgs, .{ argc, argv, envp }));
 }
 
-fn expandStackSize(at_phdr: usize, at_phnum: usize) void {
-    const phdrs = (@intToPtr([*]std.elf.Phdr, at_phdr))[0..at_phnum];
+fn expandStackSize(phdrs: []elf.Phdr) void {
     for (phdrs) |*phdr| {
         switch (phdr.p_type) {
-            std.elf.PT_GNU_STACK => {
+            elf.PT_GNU_STACK => {
                 const wanted_stack_size = phdr.p_memsz;
                 assert(wanted_stack_size % std.mem.page_size == 0);
 
@@ -362,9 +374,10 @@ fn main(c_argc: i32, c_argv: [*][*:0]u8, c_envp: [*:null]?[*:0]u8) callconv(.C)
     const envp = @ptrCast([*][*:0]u8, c_envp)[0..env_count];
 
     if (builtin.os.tag == .linux) {
-        const at_phdr = std.c.getauxval(std.elf.AT_PHDR);
-        const at_phnum = std.c.getauxval(std.elf.AT_PHNUM);
-        expandStackSize(at_phdr, at_phnum);
+        const at_phdr = std.c.getauxval(elf.AT_PHDR);
+        const at_phnum = std.c.getauxval(elf.AT_PHNUM);
+        const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
+        expandStackSize(phdrs);
     }
 
     return @call(.{ .modifier = .always_inline }, callMainWithArgs, .{ @intCast(usize, c_argc), c_argv, envp });
test/stage2/darwin.zig
@@ -14,7 +14,7 @@ pub fn addCases(ctx: *TestContext) !void {
         {
             var case = ctx.exe("hello world with updates", target);
             case.addError("", &[_][]const u8{
-                ":84:9: error: struct 'test_case.test_case' has no member named 'main'",
+                ":85:9: error: struct 'test_case.test_case' has no member named 'main'",
             });
 
             // Incorrect return type
test/stage2/test.zig
@@ -24,7 +24,7 @@ pub fn addCases(ctx: *TestContext) !void {
         var case = ctx.exe("hello world with updates", linux_x64);
 
         case.addError("", &[_][]const u8{
-            ":84:9: error: struct 'test_case.test_case' has no member named 'main'",
+            ":85:9: error: struct 'test_case.test_case' has no member named 'main'",
         });
 
         // Incorrect return type
test/standalone/pie/build.zig
@@ -0,0 +1,12 @@
+const Builder = @import("std").build.Builder;
+
+pub fn build(b: *Builder) void {
+    const main = b.addTest("main.zig");
+    main.setBuildMode(b.standardReleaseOptions());
+    main.pie = true;
+
+    const test_step = b.step("test", "Test the program");
+    test_step.dependOn(&main.step);
+
+    b.default_step.dependOn(test_step);
+}
test/standalone/pie/main.zig
@@ -0,0 +1,15 @@
+const std = @import("std");
+const elf = std.elf;
+
+threadlocal var foo: u8 = 42;
+
+test "Check ELF header" {
+    // PIE executables are marked as ET_DYN, regular exes as ET_EXEC.
+    const header = @intToPtr(*elf.Ehdr, std.process.getBaseAddress());
+    try std.testing.expectEqual(elf.ET.DYN, header.e_type);
+}
+
+test "TLS is initialized" {
+    // Ensure the TLS is initialized by the startup code.
+    try std.testing.expectEqual(@as(u8, 42), foo);
+}
test/standalone.zig
@@ -31,4 +31,9 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
         cases.addBuildFile("test/stage1/c_abi/build.zig", .{});
     }
     cases.addBuildFile("test/standalone/c_compiler/build.zig", .{ .build_modes = true, .cross_targets = true });
+
+    // Try to build and run a PIE executable.
+    if (std.Target.current.os.tag == .linux) {
+        cases.addBuildFile("test/standalone/pie/build.zig", .{});
+    }
 }