Commit d8ab301aa8
Changed files (5)
std
std/os/linux/tls.zig
@@ -0,0 +1,242 @@
+const std = @import("std");
+const mem = std.mem;
+const posix = std.posix;
+const elf = std.elf;
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+
+// This file implements the two TLS variants [1] used by ELF-based systems.
+//
+// The variant I has the following layout in memory:
+// -------------------------------------------------------
+// | DTV | Zig | DTV | Alignment | TLS |
+// | storage | thread data | pointer | | block |
+// ------------------------^------------------------------
+// `-- The thread pointer register points here
+//
+// In this case we allocate additional space for our control structure that's
+// placed _before_ the DTV pointer together with the DTV.
+//
+// NOTE: Some systems such as power64 or mips use this variant with a twist: the
+// alignment is not present and the tp and DTV addresses are offset by a
+// constant.
+//
+// On the other hand the variant II has the following layout in memory:
+// ---------------------------------------
+// | TLS | TCB | Zig | DTV |
+// | block | | thread data | storage |
+// --------^------------------------------
+// `-- The thread pointer register points here
+//
+// The structure of the TCB is not defined by the ABI so we reserve enough space
+// for a single pointer as some architectures such as i386 and x86_64 need a
+// pointer to the TCB block itself at the address pointed by the tp.
+//
+// In this case the control structure and DTV are placed one after another right
+// after the TLS block data.
+//
+// At the moment the DTV is very simple since we only support static TLS, all we
+// need is a two word vector to hold the number of entries (1) and the address
+// of the first TLS block.
+//
+// [1] https://www.akkadia.org/drepper/tls.pdf
+
+const TLSVariant = enum {
+ VariantI,
+ VariantII,
+};
+
+const tls_variant = switch (builtin.arch) {
+ .arm, .armeb, .aarch64, .aarch64_be => TLSVariant.VariantI,
+ .x86_64, .i386 => TLSVariant.VariantII,
+ else => @compileError("undefined tls_variant for this architecture"),
+};
+
+// Controls how many bytes are reserved for the Thread Control Block
+const tls_tcb_size = switch (builtin.arch) {
+ // ARM EABI mandates enough space for two pointers: the first one points to
+ // the DTV while the second one is unspecified but reserved
+ .arm, .armeb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
+ .i386, .x86_64 => @sizeOf(usize),
+ else => 0,
+};
+
+// Controls if the TCB should be aligned according to the TLS segment p_align
+const tls_tcb_align_size = switch (builtin.arch) {
+ .arm, .armeb, .aarch64, .aarch64_be => true,
+ else => false,
+};
+
+// Check if the architecture-specific parameters look correct
+comptime {
+ if (tls_tcb_align_size and tls_variant != TLSVariant.VariantI) {
+ @compileError("tls_tcb_align_size is only meaningful for variant I TLS");
+ }
+}
+
+// Some architectures add some offset to the tp and dtv addresses in order to
+// make the generated code more efficient
+
+const tls_tp_offset = switch (builtin.arch) {
+ else => 0,
+};
+
+const tls_dtv_offset = switch (builtin.arch) {
+ else => 0,
+};
+
+// Per-thread storage for Zig's use
+const CustomData = packed struct {
+};
+
+// Dynamic Thread Vector
+const DTV = packed struct {
+ entries: usize,
+ tls_block: [1]usize,
+};
+
+// Holds all the information about the process TLS image
+const TLSImage = struct {
+ data_src: []u8,
+ alloc_size: usize,
+ tcb_offset: usize,
+ dtv_offset: usize,
+ data_offset: usize,
+};
+
+pub var tls_image: ?TLSImage = null;
+
+pub fn setThreadPointer(addr: usize) void {
+ switch (builtin.arch) {
+ .x86_64 => {
+ const ARCH_SET_FS = 0x1002;
+ const rc = std.os.linux.syscall2(std.os.linux.SYS_arch_prctl, ARCH_SET_FS, addr);
+ // arch_prctl is documented to never fail
+ assert(rc == 0);
+ },
+ .aarch64 => {
+ asm volatile (
+ \\ msr tpidr_el0, %[addr]
+ : : [addr] "r" (addr)
+ );
+ },
+ else => @compileError("Unsupported architecture"),
+ }
+}
+
+pub fn initTLS() void {
+ var tls_phdr: ?*elf.Phdr = null;
+ var img_base: usize = 0;
+
+ if (std.os.linux_elf_aux_maybe) |auxv| {
+ var at_phent: usize = undefined;
+ var at_phnum: usize = undefined;
+ var at_phdr: usize = undefined;
+
+ var i: usize = 0;
+ while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
+ switch (auxv[i].a_type) {
+ elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
+ elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
+ elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
+ else => continue,
+ }
+ }
+
+ // Sanity check
+ assert(at_phent == @sizeOf(elf.Phdr));
+
+ // Search the TLS section
+ const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
+
+ for (phdrs) |*phdr| {
+ switch (phdr.p_type) {
+ elf.PT_PHDR => img_base = at_phdr - phdr.p_vaddr,
+ elf.PT_TLS => tls_phdr = phdr,
+ else => continue,
+ }
+ }
+ } else {
+ @panic("no auxv vector available!");
+ }
+
+ if (tls_phdr) |phdr| {
+ // Offsets into the allocated TLS area
+ var tcb_offset: usize = undefined;
+ var dtv_offset: usize = undefined;
+ var data_offset: usize = undefined;
+ var thread_data_offset: usize = undefined;
+ // Compute the total size of the ABI-specific data plus our own control
+ // structures
+ const alloc_size = switch (tls_variant) {
+ .VariantI => blk: {
+ var l: usize = 0;
+ dtv_offset = l;
+ l += @sizeOf(DTV);
+ thread_data_offset = l;
+ l += @sizeOf(CustomData);
+ l = mem.alignForward(l, phdr.p_align);
+ tcb_offset = l;
+ if (tls_tcb_align_size) {
+ l += mem.alignForward(tls_tcb_size, phdr.p_align);
+ } else {
+ l += tls_tcb_size;
+ }
+ data_offset = l;
+ l += phdr.p_memsz;
+ break :blk l;
+ },
+ .VariantII => blk: {
+ var l: usize = 0;
+ data_offset = l;
+ l += phdr.p_memsz;
+ l = mem.alignForward(l, phdr.p_align);
+ tcb_offset = l;
+ l += tls_tcb_size;
+ thread_data_offset = l;
+ l += @sizeOf(CustomData);
+ dtv_offset = l;
+ l += @sizeOf(DTV);
+ break :blk l;
+ }
+ };
+
+ tls_image = TLSImage{
+ .data_src = @intToPtr([*]u8, phdr.p_vaddr + img_base)[0..phdr.p_filesz],
+ .alloc_size = alloc_size,
+ .tcb_offset = tcb_offset,
+ .dtv_offset = dtv_offset,
+ .data_offset = data_offset,
+ };
+ }
+}
+
+pub fn copyTLS(addr: usize) usize {
+ const tls_img = tls_image orelse @panic("copyTLS called with no TLS section!");
+
+ // Be paranoid, clear the area we're going to use
+ @memset(@intToPtr([*]u8, addr), 0, tls_img.alloc_size);
+ // Prepare the DTV
+ const dtv = @intToPtr(*DTV, addr + tls_img.dtv_offset);
+ dtv.entries = 1;
+ dtv.tls_block[0] = addr + tls_img.data_offset + tls_dtv_offset;
+ // Set-up the TCB
+ const tcb_ptr = @intToPtr(*usize, addr + tls_img.tcb_offset);
+ if (tls_variant == TLSVariant.VariantI) {
+ tcb_ptr.* = addr + tls_img.dtv_offset;
+ } else {
+ tcb_ptr.* = addr + tls_img.tcb_offset;
+ }
+ // Copy the data
+ @memcpy(@intToPtr([*]u8, addr + tls_img.data_offset), tls_img.data_src.ptr, tls_img.data_src.len);
+
+ // Return the corrected (if needed) value for the tp register
+ return addr + tls_img.tcb_offset + tls_tp_offset;
+}
+
+var main_thread_tls_buffer: [64]u8 align(32) = undefined;
+
+pub fn allocateTLS(size: usize) usize {
+ assert(size < main_thread_tls_buffer.len);
+ return @ptrToInt(&main_thread_tls_buffer);
+}
std/os/linux.zig
@@ -3,6 +3,7 @@ const assert = std.debug.assert;
const builtin = @import("builtin");
const maxInt = std.math.maxInt;
const elf = std.elf;
+pub const tls = @import("linux/tls.zig");
const vdso = @import("linux/vdso.zig");
const dl = @import("../dynamic_library.zig");
pub use switch (builtin.arch) {
std/special/bootstrap.zig
@@ -67,24 +67,19 @@ fn posixCallMainAndExit() noreturn {
var envp_count: usize = 0;
while (envp_optional[envp_count]) |_| : (envp_count += 1) {}
const envp = @ptrCast([*][*]u8, envp_optional)[0..envp_count];
+
if (builtin.os == builtin.Os.linux) {
- // Scan auxiliary vector.
const auxv = @ptrCast([*]std.elf.Auxv, envp.ptr + envp_count + 1);
std.os.linux_elf_aux_maybe = auxv;
- var i: usize = 0;
- var at_phdr: usize = 0;
- var at_phnum: usize = 0;
- var at_phent: usize = 0;
- while (auxv[i].a_un.a_val != 0) : (i += 1) {
- switch (auxv[i].a_type) {
- std.elf.AT_PAGESZ => assert(auxv[i].a_un.a_val == std.os.page_size),
- std.elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
- std.elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
- std.elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
- else => {},
+
+ std.os.linux.tls.initTLS();
+ if (!builtin.single_threaded) {
+ if (std.os.linux.tls.tls_image) |tls_img| {
+ const tls_addr = std.os.linux.tls.allocateTLS(tls_img.alloc_size);
+ const tp = std.os.linux.tls.copyTLS(tls_addr);
+ std.os.linux.tls.setThreadPointer(tp);
}
}
- if (!builtin.single_threaded) linuxInitializeThreadLocalStorage(at_phdr, at_phnum, at_phent);
}
std.os.posix.exit(callMainWithArgs(argc, argv, envp));
@@ -140,50 +135,3 @@ inline fn callMain() u8 {
const main_thread_tls_align = 32;
var main_thread_tls_bytes: [64]u8 align(main_thread_tls_align) = [1]u8{0} ** 64;
-
-fn linuxInitializeThreadLocalStorage(at_phdr: usize, at_phnum: usize, at_phent: usize) void {
- var phdr_addr = at_phdr;
- var n = at_phnum;
- var base: usize = 0;
- while (n != 0) : ({
- n -= 1;
- phdr_addr += at_phent;
- }) {
- const phdr = @intToPtr(*std.elf.Phdr, phdr_addr);
- // TODO look for PT_DYNAMIC when we have https://github.com/ziglang/zig/issues/1917
- switch (phdr.p_type) {
- std.elf.PT_PHDR => base = at_phdr - phdr.p_vaddr,
- std.elf.PT_TLS => std.os.linux_tls_phdr = phdr,
- else => continue,
- }
- }
- const tls_phdr = std.os.linux_tls_phdr orelse return;
- std.os.linux_tls_img_src = @intToPtr([*]const u8, base + tls_phdr.p_vaddr);
- const end_addr = @ptrToInt(&main_thread_tls_bytes) + tls_phdr.p_memsz;
- const max_end_addr = @ptrToInt(&main_thread_tls_bytes) + main_thread_tls_bytes.len;
- assert(max_end_addr >= end_addr + @sizeOf(usize)); // not enough preallocated Thread Local Storage
- assert(main_thread_tls_align >= tls_phdr.p_align); // preallocated Thread Local Storage not aligned enough
- @memcpy(&main_thread_tls_bytes, std.os.linux_tls_img_src, tls_phdr.p_filesz);
- const end_ptr = @intToPtr(*usize, end_addr);
- end_ptr.* = end_addr;
- linuxSetThreadArea(end_addr);
-}
-
-fn linuxSetThreadArea(addr: usize) void {
- switch (builtin.arch) {
- builtin.Arch.x86_64 => {
- const ARCH_SET_FS = 0x1002;
- const rc = std.os.linux.syscall2(std.os.linux.SYS_arch_prctl, ARCH_SET_FS, addr);
- // acrh_prctl is documented to never fail
- assert(rc == 0);
- },
- builtin.Arch.aarch64 => {
- asm volatile (
- \\ msr tpidr_el0,x0
- \\ mov w0,#0
- \\ ret
- );
- },
- else => @compileError("Unsupported architecture"),
- }
-}
std/os.zig
@@ -3126,9 +3126,6 @@ pub const SpawnThreadError = error{
Unexpected,
};
-pub var linux_tls_phdr: ?*std.elf.Phdr = null;
-pub var linux_tls_img_src: [*]const u8 = undefined; // defined if linux_tls_phdr is
-
/// caller must call wait on the returned thread
/// fn startFn(@typeOf(context)) T
/// where T is u8, noreturn, void, or !void
@@ -3238,12 +3235,10 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
}
// Finally, the Thread Local Storage, if any.
if (!Thread.use_pthreads) {
- if (linux_tls_phdr) |tls_phdr| {
- l = mem.alignForward(l, tls_phdr.p_align);
+ if (linux.tls.tls_image) |tls_img| {
+ l = mem.alignForward(l, @alignOf(usize));
tls_start_offset = l;
- l += tls_phdr.p_memsz;
- // the fs register address
- l += @sizeOf(usize);
+ l += tls_img.alloc_size;
}
}
break :blk l;
@@ -3284,10 +3279,8 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
posix.CLONE_THREAD | posix.CLONE_SYSVSEM | posix.CLONE_PARENT_SETTID | posix.CLONE_CHILD_CLEARTID |
posix.CLONE_DETACHED;
var newtls: usize = undefined;
- if (linux_tls_phdr) |tls_phdr| {
- @memcpy(@intToPtr([*]u8, mmap_addr + tls_start_offset), linux_tls_img_src, tls_phdr.p_filesz);
- newtls = mmap_addr + mmap_len - @sizeOf(usize);
- @intToPtr(*usize, newtls).* = newtls;
+ if (linux.tls.tls_image) |tls_img| {
+ newtls = linux.tls.copyTLS(mmap_addr + tls_start_offset);
flags |= posix.CLONE_SETTLS;
}
const rc = posix.clone(MainFuncs.linuxThreadMain, mmap_addr + stack_end_offset, flags, arg, &thread_ptr.data.handle, newtls, &thread_ptr.data.handle);
CMakeLists.txt
@@ -611,6 +611,7 @@ set(ZIG_STD_FILES
"os/linux.zig"
"os/linux/arm64.zig"
"os/linux/errno.zig"
+ "os/linux/tls.zig"
"os/linux/vdso.zig"
"os/linux/x86_64.zig"
"os/netbsd.zig"