Commit ab48934e9c

Ben Noordhuis <info@bnoordhuis.nl>
2018-02-19 23:06:54
add support for stack traces on macosx
Add basic address->symbol resolution support. Uses symtab data from the MachO image, not external dSYM data; that's left as a future exercise. The net effect is that we can now map addresses to function names but not much more. File names and line number data will have to wait until a future pull request. Partially fixes #434.
1 parent bde15cf
std/debug/index.zig
@@ -5,6 +5,7 @@ const io = std.io;
 const os = std.os;
 const elf = std.elf;
 const DW = std.dwarf;
+const macho = std.macho;
 const ArrayList = std.ArrayList;
 const builtin = @import("builtin");
 
@@ -180,43 +181,57 @@ pub fn writeCurrentStackTrace(out_stream: var, allocator: &mem.Allocator,
 }
 
 fn printSourceAtAddress(debug_info: &ElfStackTrace, out_stream: var, address: usize) !void {
-    if (builtin.os == builtin.Os.windows) {
-        return error.UnsupportedDebugInfo;
-    }
     // TODO we really should be able to convert @sizeOf(usize) * 2 to a string literal
     // at compile time. I'll call it issue #313
     const ptr_hex = if (@sizeOf(usize) == 4) "0x{x8}" else "0x{x16}";
 
-    const compile_unit = findCompileUnit(debug_info, address) catch {
-        try out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n    ???\n\n",
-            address);
-        return;
-    };
-    const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name);
-    if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| {
-        defer line_info.deinit();
-        try out_stream.print(WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++
-            DIM ++ ptr_hex ++ " in ??? ({})" ++ RESET ++ "\n",
-            line_info.file_name, line_info.line, line_info.column,
-            address, compile_unit_name);
-        if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) {
-            if (line_info.column == 0) {
-                try out_stream.write("\n");
-            } else {
-                {var col_i: usize = 1; while (col_i < line_info.column) : (col_i += 1) {
-                    try out_stream.writeByte(' ');
-                }}
-                try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n");
+    switch (builtin.os) {
+        builtin.Os.windows => return error.UnsupportedDebugInfo,
+        builtin.Os.macosx => {
+            // TODO(bnoordhuis) It's theoretically possible to obtain the
+            // compilation unit from the symbtab but it's not that useful
+            // in practice because the compiler dumps everything in a single
+            // object file.  Future improvement: use external dSYM data when
+            // available.
+            const unknown = macho.Symbol { .name = "???", .address = address };
+            const symbol = debug_info.symbol_table.search(address) ?? &unknown;
+            try out_stream.print(WHITE ++ "{}" ++ RESET ++ ": " ++
+                DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n",
+                symbol.name, address);
+        },
+        else => {
+            const compile_unit = findCompileUnit(debug_info, address) catch {
+                try out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n    ???\n\n",
+                    address);
+                return;
+            };
+            const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name);
+            if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| {
+                defer line_info.deinit();
+                try out_stream.print(WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++
+                    DIM ++ ptr_hex ++ " in ??? ({})" ++ RESET ++ "\n",
+                    line_info.file_name, line_info.line, line_info.column,
+                    address, compile_unit_name);
+                if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) {
+                    if (line_info.column == 0) {
+                        try out_stream.write("\n");
+                    } else {
+                        {var col_i: usize = 1; while (col_i < line_info.column) : (col_i += 1) {
+                            try out_stream.writeByte(' ');
+                        }}
+                        try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n");
+                    }
+                } else |err| switch (err) {
+                    error.EndOfFile => {},
+                    else => return err,
+                }
+            } else |err| switch (err) {
+                error.MissingDebugInfo, error.InvalidDebugInfo => {
+                    try out_stream.print(ptr_hex ++ " in ??? ({})\n", address, compile_unit_name);
+                },
+                else => return err,
             }
-        } else |err| switch (err) {
-            error.EndOfFile => {},
-            else => return err,
-        }
-    } else |err| switch (err) {
-        error.MissingDebugInfo, error.InvalidDebugInfo => {
-            try out_stream.print(ptr_hex ++ " in ??? ({})\n", address, compile_unit_name);
         },
-        else => return err,
     }
 }
 
@@ -249,12 +264,22 @@ pub fn openSelfDebugInfo(allocator: &mem.Allocator) !&ElfStackTrace {
             try scanAllCompileUnits(st);
             return st;
         },
+        builtin.ObjectFormat.macho => {
+            var exe_file = try os.openSelfExe();
+            defer exe_file.close();
+
+            const st = try allocator.create(ElfStackTrace);
+            errdefer allocator.destroy(st);
+
+            *st = ElfStackTrace {
+                .symbol_table = try macho.loadSymbols(allocator, &io.FileInStream.init(&exe_file)),
+            };
+
+            return st;
+        },
         builtin.ObjectFormat.coff => {
             return error.TodoSupportCoffDebugInfo;
         },
-        builtin.ObjectFormat.macho => {
-            return error.TodoSupportMachoDebugInfo;
-        },
         builtin.ObjectFormat.wasm => {
             return error.TodoSupportCOFFDebugInfo;
         },
@@ -297,31 +322,40 @@ fn printLineFromFile(allocator: &mem.Allocator, out_stream: var, line_info: &con
     }
 }
 
-pub const ElfStackTrace = struct {
-    self_exe_file: os.File,
-    elf: elf.Elf,
-    debug_info: &elf.SectionHeader,
-    debug_abbrev: &elf.SectionHeader,
-    debug_str: &elf.SectionHeader,
-    debug_line: &elf.SectionHeader,
-    debug_ranges: ?&elf.SectionHeader,
-    abbrev_table_list: ArrayList(AbbrevTableHeader),
-    compile_unit_list: ArrayList(CompileUnit),
-
-    pub fn allocator(self: &const ElfStackTrace) &mem.Allocator {
-        return self.abbrev_table_list.allocator;
-    }
+pub const ElfStackTrace = switch (builtin.os) {
+    builtin.Os.macosx => struct {
+        symbol_table: macho.SymbolTable,
 
-    pub fn readString(self: &ElfStackTrace) ![]u8 {
-        var in_file_stream = io.FileInStream.init(&self.self_exe_file);
-        const in_stream = &in_file_stream.stream;
-        return readStringRaw(self.allocator(), in_stream);
-    }
+        pub fn close(self: &ElfStackTrace) void {
+            self.symbol_table.deinit();
+        }
+    },
+    else => struct {
+        self_exe_file: os.File,
+        elf: elf.Elf,
+        debug_info: &elf.SectionHeader,
+        debug_abbrev: &elf.SectionHeader,
+        debug_str: &elf.SectionHeader,
+        debug_line: &elf.SectionHeader,
+        debug_ranges: ?&elf.SectionHeader,
+        abbrev_table_list: ArrayList(AbbrevTableHeader),
+        compile_unit_list: ArrayList(CompileUnit),
+
+        pub fn allocator(self: &const ElfStackTrace) &mem.Allocator {
+            return self.abbrev_table_list.allocator;
+        }
 
-    pub fn close(self: &ElfStackTrace) void {
-        self.self_exe_file.close();
-        self.elf.close();
-    }
+        pub fn readString(self: &ElfStackTrace) ![]u8 {
+            var in_file_stream = io.FileInStream.init(&self.self_exe_file);
+            const in_stream = &in_file_stream.stream;
+            return readStringRaw(self.allocator(), in_stream);
+        }
+
+        pub fn close(self: &ElfStackTrace) void {
+            self.self_exe_file.close();
+            self.elf.close();
+        }
+    },
 };
 
 const PcRange = struct {
std/index.zig
@@ -21,6 +21,7 @@ pub const endian = @import("endian.zig");
 pub const fmt = @import("fmt/index.zig");
 pub const heap = @import("heap.zig");
 pub const io = @import("io.zig");
+pub const macho = @import("macho.zig");
 pub const math = @import("math/index.zig");
 pub const mem = @import("mem.zig");
 pub const net = @import("net.zig");
@@ -51,6 +52,7 @@ test "std" {
     _ = @import("endian.zig");
     _ = @import("fmt/index.zig");
     _ = @import("io.zig");
+    _ = @import("macho.zig");
     _ = @import("math/index.zig");
     _ = @import("mem.zig");
     _ = @import("heap.zig");
std/macho.zig
@@ -0,0 +1,177 @@
+const builtin = @import("builtin");
+const std = @import("index.zig");
+const io = std.io;
+const mem = std.mem;
+
+const MH_MAGIC_64 = 0xFEEDFACF;
+const MH_PIE = 0x200000;
+const LC_SYMTAB = 2;
+
+const MachHeader64 = packed struct {
+    magic: u32,
+    cputype: u32,
+    cpusubtype: u32,
+    filetype: u32,
+    ncmds: u32,
+    sizeofcmds: u32,
+    flags: u32,
+    reserved: u32,
+};
+
+const LoadCommand = packed struct {
+    cmd: u32,
+    cmdsize: u32,
+};
+
+const SymtabCommand = packed struct {
+    symoff: u32,
+    nsyms: u32,
+    stroff: u32,
+    strsize: u32,
+};
+
+const Nlist64 = packed struct {
+    n_strx: u32,
+    n_type: u8,
+    n_sect: u8,
+    n_desc: u16,
+    n_value: u64,
+};
+
+pub const Symbol = struct {
+    name: []const u8,
+    address: u64,
+
+    fn addressLessThan(lhs: &const Symbol, rhs: &const Symbol) bool {
+        return lhs.address < rhs.address;
+    }
+};
+
+pub const SymbolTable = struct {
+    allocator: &mem.Allocator,
+    symbols: []const Symbol,
+    strings: []const u8,
+
+    // Doubles as an eyecatcher to calculate the PIE slide, see loadSymbols().
+    // Ideally we'd use _mh_execute_header because it's always at 0x100000000
+    // in the image but as it's located in a different section than executable
+    // code, its displacement is different.
+    pub fn deinit(self: &SymbolTable) void {
+        self.allocator.free(self.symbols);
+        self.symbols = []const Symbol {};
+
+        self.allocator.free(self.strings);
+        self.strings = []const u8 {};
+    }
+
+    pub fn search(self: &const SymbolTable, address: usize) ?&const Symbol {
+        var min: usize = 0;
+        var max: usize = self.symbols.len - 1;  // Exclude sentinel.
+        while (min < max) {
+            const mid = min + (max - min) / 2;
+            const curr = &self.symbols[mid];
+            const next = &self.symbols[mid + 1];
+            if (address >= next.address) {
+                min = mid + 1;
+            } else if (address < curr.address) {
+                max = mid;
+            } else {
+                return curr;
+            }
+        }
+        return null;
+    }
+};
+
+pub fn loadSymbols(allocator: &mem.Allocator, in: &io.FileInStream) !SymbolTable {
+    var file = in.file;
+    try file.seekTo(0);
+
+    var hdr: MachHeader64 = undefined;
+    try readNoEof(in, &hdr);
+    if (hdr.magic != MH_MAGIC_64) return error.MissingDebugInfo;
+    const is_pie = MH_PIE == (hdr.flags & MH_PIE);
+
+    var pos: usize = @sizeOf(@typeOf(hdr));
+    var ncmd: u32 = hdr.ncmds;
+    while (ncmd != 0) : (ncmd -= 1) {
+        try file.seekTo(pos);
+        var lc: LoadCommand = undefined;
+        try readNoEof(in, &lc);
+        if (lc.cmd == LC_SYMTAB) break;
+        pos += lc.cmdsize;
+    } else {
+        return error.MissingDebugInfo;
+    }
+
+    var cmd: SymtabCommand = undefined;
+    try readNoEof(in, &cmd);
+
+    try file.seekTo(cmd.symoff);
+    var syms = try allocator.alloc(Nlist64, cmd.nsyms);
+    defer allocator.free(syms);
+    try readNoEof(in, syms);
+
+    try file.seekTo(cmd.stroff);
+    var strings = try allocator.alloc(u8, cmd.strsize);
+    errdefer allocator.free(strings);
+    try in.stream.readNoEof(strings);
+
+    var nsyms: usize = 0;
+    for (syms) |sym| if (isSymbol(sym)) nsyms += 1;
+    if (nsyms == 0) return error.MissingDebugInfo;
+
+    var symbols = try allocator.alloc(Symbol, nsyms + 1);  // Room for sentinel.
+    errdefer allocator.free(symbols);
+
+    var pie_slide: usize = 0;
+    var nsym: usize = 0;
+    for (syms) |sym| {
+        if (!isSymbol(sym)) continue;
+        const start = sym.n_strx;
+        const end = ??mem.indexOfScalarPos(u8, strings, start, 0);
+        const name = strings[start..end];
+        const address = sym.n_value;
+        symbols[nsym] = Symbol { .name = name, .address = address };
+        nsym += 1;
+        if (is_pie and mem.eql(u8, name, "_SymbolTable_deinit")) {
+            pie_slide = @ptrToInt(SymbolTable.deinit) - address;
+        }
+    }
+
+    // Effectively a no-op, lld emits symbols in ascending order.
+    std.sort.insertionSort(Symbol, symbols[0..nsyms], Symbol.addressLessThan);
+
+    // Insert the sentinel.  Since we don't know where the last function ends,
+    // we arbitrarily limit it to the start address + 4 KB.
+    const top = symbols[nsyms - 1].address + 4096;
+    symbols[nsyms] = Symbol { .name = "", .address = top };
+
+    if (pie_slide != 0) {
+        for (symbols) |*symbol| symbol.address += pie_slide;
+    }
+
+    return SymbolTable {
+        .allocator = allocator,
+        .symbols = symbols,
+        .strings = strings,
+    };
+}
+
+fn readNoEof(in: &io.FileInStream, sink: var) !void {
+    if (@typeOf(sink) == []Nlist64) {
+        const T = @typeOf(sink[0]);
+        const len = @sizeOf(T) * sink.len;
+        const bytes = @ptrCast(&u8, &sink[0]);
+        return in.stream.readNoEof(bytes[0..len]);
+    } else {
+        const T = @typeOf(*sink);
+        const len = @sizeOf(T);
+        const bytes = @ptrCast(&u8, sink);
+        return in.stream.readNoEof(bytes[0..len]);
+    }
+}
+
+fn isSymbol(sym: &const Nlist64) bool {
+    return sym.n_value != 0 and sym.n_desc == 0;
+}
CMakeLists.txt
@@ -386,6 +386,7 @@ set(ZIG_STD_FILES
     "index.zig"
     "io.zig"
     "linked_list.zig"
+    "macho.zig"
     "math/acos.zig"
     "math/acosh.zig"
     "math/asin.zig"