Commit 94354aa6aa

kcbanner <kcbanner@gmail.com>
2023-07-08 22:39:38
macho: add unwindFrame which can unwind stack frames using the __unwind_info section dwarf: fixup missing error
1 parent d226b74
Changed files (6)
lib
test
standalone
dwarf_unwinding
lib/std/dwarf/abi.zig
@@ -45,15 +45,6 @@ pub fn spRegNum(reg_context: RegisterContext) u8 {
     };
 }
 
-fn RegBytesReturnType(comptime ContextPtrType: type) type {
-    const info = @typeInfo(ContextPtrType);
-    if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) {
-        @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType)));
-    }
-
-    return if (info.Pointer.is_const) return []const u8 else []u8;
-}
-
 pub const RegisterContext = struct {
     eh_frame: bool,
     is_macho: bool,
@@ -63,9 +54,47 @@ pub const AbiError = error{
     InvalidRegister,
     UnimplementedArch,
     UnimplementedOs,
+    RegisterContextRequired,
     ThreadContextNotSupported,
 };
 
+fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type {
+    const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType);
+    const info = @typeInfo(reg_bytes_type).Pointer;
+    return @Type(.{
+        .Pointer = .{
+            .size = .One,
+            .is_const = info.is_const,
+            .is_volatile = info.is_volatile,
+            .is_allowzero = info.is_allowzero,
+            .alignment = info.alignment,
+            .address_space = info.address_space,
+            .child = T,
+            .sentinel = null,
+        },
+    });
+}
+
+pub fn regValueNative(
+    comptime T: type,
+    thread_context_ptr: anytype,
+    reg_number: u8,
+    reg_context: ?RegisterContext,
+) !RegValueReturnType(@TypeOf(thread_context_ptr), T) {
+    const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context);
+    if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize;
+    return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]);
+}
+
+fn RegBytesReturnType(comptime ContextPtrType: type) type {
+    const info = @typeInfo(ContextPtrType);
+    if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) {
+        @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType)));
+    }
+
+    return if (info.Pointer.is_const) return []const u8 else []u8;
+}
+
 /// Returns a slice containing the backing storage for `reg_number`.
 ///
 /// `reg_context` describes in what context the register number is used, as it can have different
lib/std/debug.zig
@@ -623,11 +623,15 @@ pub const StackIterator = struct {
         const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc);
         switch (native_os) {
             .macos, .ios, .watchos, .tvos => {
-                const o_file_info = try module.getOFileInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc);
-                if (o_file_info.unwind_info == null) return error.MissingUnwindInfo;
-
-                // TODO: Unwind using __unwind_info,
-                unreachable;
+                // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding
+                // via DWARF before attempting to use the compact unwind info will produce incorrect results.
+                if (module.unwind_info) |unwind_info| {
+                    if (macho.unwindFrame(&self.dwarf_context, unwind_info, module.base_address)) |return_address| {
+                        return return_address;
+                    } else |err| {
+                        if (err != error.RequiresDWARFUnwind) return err;
+                    }
+                } else return error.MissingUnwindInfo;
             },
             else => {},
         }
@@ -1236,7 +1240,16 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn
         .ncmds = hdr.ncmds,
         .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
     };
+    var unwind_info: ?[]const u8 = null;
     const symtab = while (it.next()) |cmd| switch (cmd.cmd()) {
+        .SEGMENT_64 => {
+            for (cmd.getSections()) |sect| {
+                if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) {
+                    unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size);
+                    break;
+                }
+            }
+        },
         .SYMTAB => break cmd.cast(macho.symtab_command).?,
         else => {},
     } else return error.MissingDebugInfo;
@@ -1346,6 +1359,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn
         .ofiles = ModuleDebugInfo.OFileTable.init(allocator),
         .symbols = symbols,
         .strings = strings,
+        .unwind_info = unwind_info,
     };
 }
 
@@ -1886,12 +1900,13 @@ pub const ModuleDebugInfo = switch (native_os) {
         symbols: []const MachoSymbol,
         strings: [:0]const u8,
         ofiles: OFileTable,
+        // Backed by mapped_memory
+        unwind_info: ?[]const u8,
 
         const OFileTable = std.StringHashMap(OFileInfo);
         const OFileInfo = struct {
             di: DW.DwarfInfo,
             addr_table: std.StringHashMap(u64),
-            unwind_info: ?[]const u8,
         };
 
         fn deinit(self: *@This(), allocator: mem.Allocator) void {
@@ -1949,24 +1964,21 @@ pub const ModuleDebugInfo = switch (native_os) {
                 addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value);
             }
 
-            var unwind_info: ?[]const u8 = null;
             var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array;
             for (segcmd.?.getSections()) |sect| {
-                if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) {
-                    unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size);
-                } else if (std.mem.eql(u8, "__DWARF", sect.segName())) {
-                    var section_index: ?usize = null;
-                    inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| {
-                        if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i;
-                    }
-                    if (section_index == null) continue;
+                if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
 
-                    const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size);
-                    sections[section_index.?] = .{
-                        .data = section_bytes,
-                        .owned = false,
-                    };
+                var section_index: ?usize = null;
+                inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| {
+                    if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i;
                 }
+                if (section_index == null) continue;
+
+                const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size);
+                sections[section_index.?] = .{
+                    .data = section_bytes,
+                    .owned = false,
+                };
             }
 
             const missing_debug_info =
@@ -1986,7 +1998,6 @@ pub const ModuleDebugInfo = switch (native_os) {
             var info = OFileInfo{
                 .di = di,
                 .addr_table = addr_table,
-                .unwind_info = unwind_info,
             };
 
             // Add the debug info to the cache
lib/std/dwarf.zig
@@ -1641,7 +1641,6 @@ pub const DwarfInfo = struct {
         // instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly
         // as pointers will be decoded relative to the alreayd-mapped .eh_frame.
         var mapped_pc: usize = undefined;
-
         if (di.eh_frame_hdr) |header| {
             const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null;
             mapped_pc = context.pc;
@@ -1657,16 +1656,12 @@ pub const DwarfInfo = struct {
             mapped_pc = context.pc - module_base_address;
             const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct {
                 pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order {
-                    if (pc < mid_item.pc_begin) {
-                        return .lt;
-                    } else {
-                        const range_end = mid_item.pc_begin + mid_item.pc_range;
-                        if (pc < range_end) {
-                            return .eq;
-                        }
+                    if (pc < mid_item.pc_begin) return .lt;
 
-                        return .gt;
-                    }
+                    const range_end = mid_item.pc_begin + mid_item.pc_range;
+                    if (pc < range_end) return .eq;
+
+                    return .gt;
                 }
             }.compareFn);
 
@@ -2000,6 +1995,7 @@ pub const ExceptionFrameHeader = struct {
             }
         }
 
+        if (len == 0) return badDwarf();
         try stream.seekTo(left * entry_size);
 
         // Read past the pc_begin field of the entry
lib/std/macho.zig
@@ -2064,3 +2064,315 @@ pub const UNWIND_ARM64_FRAME_D14_D15_PAIR: u32 = 0x00000800;
 
 pub const UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK: u32 = 0x00FFF000;
 pub const UNWIND_ARM64_DWARF_SECTION_OFFSET: u32 = 0x00FFFFFF;
+
+pub const CompactUnwindEncoding = packed struct(u32) {
+    value: packed union {
+        x86_64: packed union {
+            frame: packed struct(u24) {
+                reg4: u3,
+                reg3: u3,
+                reg2: u3,
+                reg1: u3,
+                reg0: u3,
+                unused: u1 = 0,
+                frame_offset: u8,
+            },
+            frameless: packed struct(u24) {
+                stack_reg_permutation: u10,
+                stack_reg_count: u3,
+                stack_adjust: u3,
+                stack_size: u8,
+            },
+            dwarf: u24,
+        },
+        arm64: packed union {
+            frame: packed struct(u24) {
+                x_reg_pairs: packed struct {
+                    x19_x20: u1,
+                    x21_x22: u1,
+                    x23_x24: u1,
+                    x25_x26: u1,
+                    x27_x28: u1,
+                },
+                d_reg_pairs: packed struct {
+                    d8_d9: u1,
+                    d10_d11: u1,
+                    d12_d13: u1,
+                    d14_d15: u1,
+                },
+                unused: u15,
+            },
+            frameless: packed struct(u24) {
+                unused: u12 = 0,
+                stack_size: u12,
+            },
+            dwarf: u24,
+        },
+    },
+    mode: packed union {
+        x86_64: UNWIND_X86_64_MODE,
+        arm64: UNWIND_ARM64_MODE,
+    },
+    personality_index: u2,
+    has_lsda: u1,
+    start: u1,
+};
+
+/// Returns the DWARF register number for an x86_64 register number found in compact unwind info
+fn dwarfRegNumber(unwind_reg_number: u3) !u8 {
+    return switch (unwind_reg_number) {
+        1 => 3, // RBX
+        2 => 12, // R12
+        3 => 13, // R13
+        4 => 14, // R14
+        5 => 15, // R15
+        6 => 6, // RBP
+        else => error.InvalidUnwindRegisterNumber,
+    };
+}
+
+const dwarf = std.dwarf;
+const abi = dwarf.abi;
+
+pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, module_base_address: usize) !usize {
+    const header = mem.bytesAsValue(
+        unwind_info_section_header,
+        unwind_info[0..@sizeOf(unwind_info_section_header)],
+    );
+    const indices = mem.bytesAsSlice(
+        unwind_info_section_header_index_entry,
+        unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(unwind_info_section_header_index_entry)],
+    );
+    if (indices.len == 0) return error.MissingUnwindInfo;
+
+    const mapped_pc = context.pc - module_base_address;
+    const second_level_index = blk: {
+        var left: usize = 0;
+        var len: usize = indices.len;
+
+        while (len > 1) {
+            const mid = left + len / 2;
+            const offset = indices[mid].functionOffset;
+            if (mapped_pc < offset) {
+                len /= 2;
+            } else {
+                left = mid;
+                if (mapped_pc == offset) break;
+                len -= len / 2;
+            }
+        }
+
+        // Last index is a sentinel containing the highest address as its functionOffset
+        if (len == 0 or indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo;
+        break :blk &indices[left];
+    };
+
+    const common_encodings = mem.bytesAsSlice(
+        compact_unwind_encoding_t,
+        unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(compact_unwind_encoding_t)],
+    );
+
+    const start_offset = second_level_index.secondLevelPagesSectionOffset;
+    const kind = mem.bytesAsValue(
+        UNWIND_SECOND_LEVEL,
+        unwind_info[start_offset..][0..@sizeOf(UNWIND_SECOND_LEVEL)],
+    );
+    const raw_encoding = switch (kind.*) {
+        .REGULAR => blk: {
+            const page_header = mem.bytesAsValue(
+                unwind_info_regular_second_level_page_header,
+                unwind_info[start_offset..][0..@sizeOf(unwind_info_regular_second_level_page_header)],
+            );
+
+            const entries = mem.bytesAsSlice(
+                unwind_info_regular_second_level_entry,
+                unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(unwind_info_regular_second_level_entry)],
+            );
+            if (entries.len == 0) return error.InvalidUnwindInfo;
+
+            var left: usize = 0;
+            var len: usize = entries.len;
+            while (len > 1) {
+                const mid = left + len / 2;
+                const offset = entries[mid].functionOffset;
+                if (mapped_pc < offset) {
+                    len /= 2;
+                } else {
+                    left = mid;
+                    if (mapped_pc == offset) break;
+                    len -= len / 2;
+                }
+            }
+
+            if (len == 0) return error.InvalidUnwindInfo;
+            break :blk entries[left].encoding;
+        },
+        .COMPRESSED => blk: {
+            const page_header = mem.bytesAsValue(
+                unwind_info_compressed_second_level_page_header,
+                unwind_info[start_offset..][0..@sizeOf(unwind_info_compressed_second_level_page_header)],
+            );
+
+            const entries = mem.bytesAsSlice(
+                UnwindInfoCompressedEntry,
+                unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(UnwindInfoCompressedEntry)],
+            );
+            if (entries.len == 0) return error.InvalidUnwindInfo;
+
+            var left: usize = 0;
+            var len: usize = entries.len;
+            while (len > 1) {
+                const mid = left + len / 2;
+                const offset = second_level_index.functionOffset + entries[mid].funcOffset;
+                if (mapped_pc < offset) {
+                    len /= 2;
+                } else {
+                    left = mid;
+                    if (mapped_pc == offset) break;
+                    len -= len / 2;
+                }
+            }
+
+            if (len == 0) return error.InvalidUnwindInfo;
+            const entry = entries[left];
+            if (entry.encodingIndex < header.commonEncodingsArrayCount) {
+                if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo;
+                break :blk common_encodings[entry.encodingIndex];
+            } else {
+                const local_index = try std.math.sub(
+                    u8,
+                    entry.encodingIndex,
+                    std.math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo,
+                );
+                const local_encodings = mem.bytesAsSlice(
+                    compact_unwind_encoding_t,
+                    unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(compact_unwind_encoding_t)],
+                );
+                if (local_index >= local_encodings.len) return error.InvalidUnwindInfo;
+                break :blk local_encodings[local_index];
+            }
+        },
+        else => return error.InvalidUnwindInfo,
+    };
+
+    if (raw_encoding == 0) return error.NoUnwindInfo;
+    const reg_context = dwarf.abi.RegisterContext{
+        .eh_frame = false,
+        .is_macho = true,
+    };
+
+    const encoding: CompactUnwindEncoding = @bitCast(raw_encoding);
+    const new_ip = switch (builtin.cpu.arch) {
+        .x86_64 => switch (encoding.mode.x86_64) {
+            .OLD => return error.UnimplementedUnwindEncoding,
+            .RBP_FRAME => blk: {
+                const regs: [5]u3 = .{
+                    encoding.value.x86_64.frame.reg0,
+                    encoding.value.x86_64.frame.reg1,
+                    encoding.value.x86_64.frame.reg2,
+                    encoding.value.x86_64.frame.reg3,
+                    encoding.value.x86_64.frame.reg4,
+                };
+
+                const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize);
+                var max_reg: usize = 0;
+                inline for (regs, 0..) |reg, i| {
+                    if (reg > 0) max_reg = i;
+                }
+
+                const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*;
+                const new_sp = fp + 2 * @sizeOf(usize);
+
+                // Verify the stack range we're about to read register values from is valid
+                if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo;
+
+                const ip_ptr = fp + @sizeOf(usize);
+                const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
+                const new_fp = @as(*const usize, @ptrFromInt(fp)).*;
+
+                (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp;
+                (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp;
+                (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip;
+
+                for (regs, 0..) |reg, i| {
+                    if (reg == 0) continue;
+                    const addr = fp - frame_offset + i * @sizeOf(usize);
+                    const reg_number = try dwarfRegNumber(reg);
+                    (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*;
+                }
+
+                break :blk new_ip;
+            },
+            .STACK_IMMD => blk: {
+                const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*;
+
+                // Decode Lehmer-coded sequence of registers.
+                // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h
+
+                // Decode the variable-based permutation number into its digits. Each digit represents
+                // an index into the list of register numbers that weren't yet used in the sequence at
+                // the time the digit was added.
+                const reg_count = encoding.value.x86_64.frameless.stack_reg_count;
+                const ip_ptr = if (reg_count > 0) reg_blk: {
+                    var digits: [6]u3 = undefined;
+                    var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation;
+                    var base: usize = 2;
+                    for (0..reg_count) |i| {
+                        const div = accumulator / base;
+                        digits[digits.len - 1 - i] = @intCast(accumulator - base * div);
+                        accumulator = div;
+                        base += 1;
+                    }
+
+                    const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 };
+                    var registers: [reg_numbers.len]u3 = undefined;
+                    var used_indices = [_]bool{false} ** reg_numbers.len;
+                    for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| {
+                        var unused_count: u8 = 0;
+                        const unused_index = for (used_indices, 0..) |used, index| {
+                            if (!used) {
+                                if (target_unused_index == unused_count) break index;
+                                unused_count += 1;
+                            }
+                        } else unreachable;
+
+                        registers[i] = reg_numbers[unused_index];
+                        used_indices[unused_index] = true;
+                    }
+
+                    var reg_addr = sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - reg_count - 1)) * @sizeOf(usize);
+                    if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo;
+                    for (0..reg_count) |i| {
+                        const reg_number = try dwarfRegNumber(registers[i]);
+                        (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
+                        reg_addr += @sizeOf(usize);
+                    }
+
+                    break :reg_blk reg_addr;
+                } else sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - 1)) * @sizeOf(usize);
+
+                const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
+                const new_sp = ip_ptr + @sizeOf(usize);
+                if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo;
+
+                (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp;
+                (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip;
+
+                break :blk new_ip;
+            },
+            .STACK_IND => {
+                return error.UnimplementedUnwindEncoding; // TODO
+            },
+            .DWARF => return error.RequiresDWARFUnwind,
+        },
+        .aarch64 => switch (encoding.mode.x86_64) {
+            .DWARF => return error.RequiresDWARFUnwind,
+            else => return error.UnimplementedUnwindEncoding,
+        },
+        else => return error.UnimplementedArch,
+    };
+
+    context.pc = new_ip;
+    if (context.pc > 0) context.pc -= 1;
+    return new_ip;
+}
test/standalone/dwarf_unwinding/build.zig
@@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void {
             .optimize = optimize,
         });
 
+        if (target.isDarwin()) exe.unwind_tables = true;
         exe.omit_frame_pointer = true;
 
         const run_cmd = b.addRunArtifact(exe);
@@ -43,6 +44,7 @@ pub fn build(b: *std.Build) void {
             .optimize = optimize,
         });
 
+        if (target.isDarwin()) exe.unwind_tables = true;
         exe.omit_frame_pointer = true;
         exe.linkLibrary(c_shared_lib);
 
test/standalone/dwarf_unwinding/zig_unwind.zig
@@ -1,4 +1,5 @@
 const std = @import("std");
+const builtin = @import("builtin");
 const debug = std.debug;
 const testing = std.testing;
 
@@ -18,6 +19,24 @@ noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void {
 }
 
 noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void {
+    if (builtin.os.tag == .macos) {
+        // Excercise different __unwind_info encodings by forcing some registers to be restored
+        switch (builtin.cpu.arch) {
+            .x86_64 => {
+                asm volatile (
+                    \\movq $3, %%rbx
+                    \\movq $12, %%r12
+                    \\movq $13, %%r13
+                    \\movq $14, %%r14
+                    \\movq $15, %%r15
+                    \\movq $6, %%rbp
+                    ::: "rbx", "r12", "r13", "r14", "r15", "rbp");
+            },
+            .aarch64 => {},
+            else => {},
+        }
+    }
+
     expected[1] = @returnAddress();
     frame3(expected, unwound);
 }