master
1mapped_memory: []align(std.heap.page_size_min) const u8,
2symbols: []const Symbol,
3strings: []const u8,
4text_vmaddr: u64,
5
6/// Key is index into `strings` of the file path.
7ofiles: std.AutoArrayHashMapUnmanaged(u32, Error!OFile),
8
9pub const Error = error{
10 InvalidMachO,
11 InvalidDwarf,
12 MissingDebugInfo,
13 UnsupportedDebugInfo,
14 ReadFailed,
15 OutOfMemory,
16};
17
18pub fn deinit(mf: *MachOFile, gpa: Allocator) void {
19 for (mf.ofiles.values()) |*maybe_of| {
20 const of = &(maybe_of.* catch continue);
21 posix.munmap(of.mapped_memory);
22 of.dwarf.deinit(gpa);
23 of.symbols_by_name.deinit(gpa);
24 }
25 mf.ofiles.deinit(gpa);
26 gpa.free(mf.symbols);
27 posix.munmap(mf.mapped_memory);
28}
29
30pub fn load(gpa: Allocator, path: []const u8, arch: std.Target.Cpu.Arch) Error!MachOFile {
31 switch (arch) {
32 .x86_64, .aarch64 => {},
33 else => unreachable,
34 }
35
36 const all_mapped_memory = try mapDebugInfoFile(path);
37 errdefer posix.munmap(all_mapped_memory);
38
39 // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
40 // binary": a simple file format which contains Mach-O binaries for multiple targets. For
41 // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
42 // for both ARM64 macOS and x86_64 macOS.
43 if (all_mapped_memory.len < 4) return error.InvalidMachO;
44 const magic = std.mem.readInt(u32, all_mapped_memory.ptr[0..4], .little);
45
46 // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
47 const mapped_macho = switch (magic) {
48 macho.MH_MAGIC_64 => all_mapped_memory,
49
50 macho.FAT_CIGAM => mapped_macho: {
51 // This is the universal binary format (aka a "fat binary").
52 var fat_r: Io.Reader = .fixed(all_mapped_memory);
53 const hdr = fat_r.takeStruct(macho.fat_header, .big) catch |err| switch (err) {
54 error.ReadFailed => unreachable,
55 error.EndOfStream => return error.InvalidMachO,
56 };
57 const want_cpu_type = switch (arch) {
58 .x86_64 => macho.CPU_TYPE_X86_64,
59 .aarch64 => macho.CPU_TYPE_ARM64,
60 else => unreachable,
61 };
62 for (0..hdr.nfat_arch) |_| {
63 const fat_arch = fat_r.takeStruct(macho.fat_arch, .big) catch |err| switch (err) {
64 error.ReadFailed => unreachable,
65 error.EndOfStream => return error.InvalidMachO,
66 };
67 if (fat_arch.cputype != want_cpu_type) continue;
68 if (fat_arch.offset + fat_arch.size > all_mapped_memory.len) return error.InvalidMachO;
69 break :mapped_macho all_mapped_memory[fat_arch.offset..][0..fat_arch.size];
70 }
71 // `arch` was not present in the fat binary.
72 return error.MissingDebugInfo;
73 },
74
75 // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
76 // will be fairly easy to add support here if necessary; it's very similar to above.
77 macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
78
79 else => return error.InvalidMachO,
80 };
81
82 var r: Io.Reader = .fixed(mapped_macho);
83 const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) {
84 error.ReadFailed => unreachable,
85 error.EndOfStream => return error.InvalidMachO,
86 };
87
88 if (hdr.magic != macho.MH_MAGIC_64)
89 return error.InvalidMachO;
90
91 const symtab: macho.symtab_command, const text_vmaddr: u64 = lcs: {
92 var it: macho.LoadCommandIterator = try .init(&hdr, mapped_macho[@sizeOf(macho.mach_header_64)..]);
93 var symtab: ?macho.symtab_command = null;
94 var text_vmaddr: ?u64 = null;
95 while (try it.next()) |cmd| switch (cmd.hdr.cmd) {
96 .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidMachO,
97 .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
98 if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
99 text_vmaddr = seg_cmd.vmaddr;
100 },
101 else => {},
102 };
103 break :lcs .{
104 symtab orelse return error.MissingDebugInfo,
105 text_vmaddr orelse return error.MissingDebugInfo,
106 };
107 };
108
109 const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
110
111 var symbols: std.ArrayList(Symbol) = try .initCapacity(gpa, symtab.nsyms);
112 defer symbols.deinit(gpa);
113
114 // This map is temporary; it is used only to detect duplicates here. This is
115 // necessary because we prefer to use STAB ("symbolic debugging table") symbols,
116 // but they might not be present, so we track normal symbols too.
117 // Indices match 1-1 with those of `symbols`.
118 var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
119 defer symbol_names.deinit(gpa);
120 try symbol_names.ensureUnusedCapacity(gpa, symtab.nsyms);
121
122 var ofile: u32 = undefined;
123 var last_sym: Symbol = undefined;
124 var state: enum {
125 init,
126 oso_open,
127 oso_close,
128 bnsym,
129 fun_strx,
130 fun_size,
131 ensym,
132 } = .init;
133
134 var sym_r: Io.Reader = .fixed(mapped_macho[symtab.symoff..]);
135 for (0..symtab.nsyms) |_| {
136 const sym = sym_r.takeStruct(macho.nlist_64, .little) catch |err| switch (err) {
137 error.ReadFailed => unreachable,
138 error.EndOfStream => return error.InvalidMachO,
139 };
140 if (sym.n_type.bits.is_stab == 0) {
141 if (sym.n_strx == 0) continue;
142 switch (sym.n_type.bits.type) {
143 .undf, .pbud, .indr, .abs, _ => continue,
144 .sect => {
145 const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
146 const gop = symbol_names.getOrPutAssumeCapacity(name);
147 if (!gop.found_existing) {
148 assert(gop.index == symbols.items.len);
149 symbols.appendAssumeCapacity(.{
150 .strx = sym.n_strx,
151 .addr = sym.n_value,
152 .ofile = Symbol.unknown_ofile,
153 });
154 }
155 },
156 }
157 continue;
158 }
159
160 // TODO handle globals N_GSYM, and statics N_STSYM
161 switch (sym.n_type.stab) {
162 .oso => switch (state) {
163 .init, .oso_close => {
164 state = .oso_open;
165 ofile = sym.n_strx;
166 },
167 else => return error.InvalidMachO,
168 },
169 .bnsym => switch (state) {
170 .oso_open, .ensym => {
171 state = .bnsym;
172 last_sym = .{
173 .strx = 0,
174 .addr = sym.n_value,
175 .ofile = ofile,
176 };
177 },
178 else => return error.InvalidMachO,
179 },
180 .fun => switch (state) {
181 .bnsym => {
182 state = .fun_strx;
183 last_sym.strx = sym.n_strx;
184 },
185 .fun_strx => {
186 state = .fun_size;
187 },
188 else => return error.InvalidMachO,
189 },
190 .ensym => switch (state) {
191 .fun_size => {
192 state = .ensym;
193 if (last_sym.strx != 0) {
194 const name = std.mem.sliceTo(strings[last_sym.strx..], 0);
195 const gop = symbol_names.getOrPutAssumeCapacity(name);
196 if (!gop.found_existing) {
197 assert(gop.index == symbols.items.len);
198 symbols.appendAssumeCapacity(last_sym);
199 } else {
200 symbols.items[gop.index] = last_sym;
201 }
202 }
203 },
204 else => return error.InvalidMachO,
205 },
206 .so => switch (state) {
207 .init, .oso_close => {},
208 .oso_open, .ensym => {
209 state = .oso_close;
210 },
211 else => return error.InvalidMachO,
212 },
213 else => {},
214 }
215 }
216
217 switch (state) {
218 .init => {
219 // Missing STAB symtab entries is still okay, unless there were also no normal symbols.
220 if (symbols.items.len == 0) return error.MissingDebugInfo;
221 },
222 .oso_close => {},
223 else => return error.InvalidMachO, // corrupted STAB entries in symtab
224 }
225
226 const symbols_slice = try symbols.toOwnedSlice(gpa);
227 errdefer gpa.free(symbols_slice);
228
229 // Even though lld emits symbols in ascending order, this debug code
230 // should work for programs linked in any valid way.
231 // This sort is so that we can binary search later.
232 mem.sort(Symbol, symbols_slice, {}, Symbol.addressLessThan);
233
234 return .{
235 .mapped_memory = all_mapped_memory,
236 .symbols = symbols_slice,
237 .strings = strings,
238 .ofiles = .empty,
239 .text_vmaddr = text_vmaddr,
240 };
241}
242pub fn getDwarfForAddress(mf: *MachOFile, gpa: Allocator, vaddr: u64) !struct { *Dwarf, u64 } {
243 const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo;
244
245 if (symbol.ofile == Symbol.unknown_ofile) return error.MissingDebugInfo;
246
247 // offset of `address` from start of `symbol`
248 const address_symbol_offset = vaddr - symbol.addr;
249
250 // Take the symbol name from the N_FUN STAB entry, we're going to
251 // use it if we fail to find the DWARF infos
252 const stab_symbol = mem.sliceTo(mf.strings[symbol.strx..], 0);
253
254 const gop = try mf.ofiles.getOrPut(gpa, symbol.ofile);
255 if (!gop.found_existing) {
256 const name = mem.sliceTo(mf.strings[symbol.ofile..], 0);
257 gop.value_ptr.* = loadOFile(gpa, name);
258 }
259 const of = &(gop.value_ptr.* catch |err| return err);
260
261 const symbol_index = of.symbols_by_name.getKeyAdapted(
262 @as([]const u8, stab_symbol),
263 @as(OFile.SymbolAdapter, .{ .strtab = of.strtab, .symtab_raw = of.symtab_raw }),
264 ) orelse return error.MissingDebugInfo;
265
266 const symbol_ofile_vaddr = vaddr: {
267 var sym = of.symtab_raw[symbol_index];
268 if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym);
269 break :vaddr sym.n_value;
270 };
271
272 return .{ &of.dwarf, symbol_ofile_vaddr + address_symbol_offset };
273}
274pub fn lookupSymbolName(mf: *MachOFile, vaddr: u64) error{MissingDebugInfo}![]const u8 {
275 const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo;
276 return mem.sliceTo(mf.strings[symbol.strx..], 0);
277}
278
279const OFile = struct {
280 mapped_memory: []align(std.heap.page_size_min) const u8,
281 dwarf: Dwarf,
282 strtab: []const u8,
283 symtab_raw: []align(1) const macho.nlist_64,
284 /// All named symbols in `symtab_raw`. Stored `u32` key is the index into `symtab_raw`. Accessed
285 /// through `SymbolAdapter`, so that the symbol name is used as the logical key.
286 symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
287
288 const SymbolAdapter = struct {
289 strtab: []const u8,
290 symtab_raw: []align(1) const macho.nlist_64,
291 pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
292 _ = ctx;
293 return @truncate(std.hash.Wyhash.hash(0, sym_name));
294 }
295 pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
296 _ = b_index;
297 var b_sym = ctx.symtab_raw[b_sym_index];
298 if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &b_sym);
299 const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
300 return mem.eql(u8, a_sym_name, b_sym_name);
301 }
302 };
303};
304
305const Symbol = struct {
306 strx: u32,
307 addr: u64,
308 /// Value may be `unknown_ofile`.
309 ofile: u32,
310 const unknown_ofile = std.math.maxInt(u32);
311 fn addressLessThan(context: void, lhs: Symbol, rhs: Symbol) bool {
312 _ = context;
313 return lhs.addr < rhs.addr;
314 }
315 /// Assumes that `symbols` is sorted in order of ascending `addr`.
316 fn find(symbols: []const Symbol, address: usize) ?*const Symbol {
317 if (symbols.len == 0) return null; // no potential match
318 if (address < symbols[0].addr) return null; // address is before the lowest-address symbol
319 var left: usize = 0;
320 var len: usize = symbols.len;
321 while (len > 1) {
322 const mid = left + len / 2;
323 if (address < symbols[mid].addr) {
324 len /= 2;
325 } else {
326 left = mid;
327 len -= len / 2;
328 }
329 }
330 return &symbols[left];
331 }
332
333 test find {
334 const symbols: []const Symbol = &.{
335 .{ .addr = 100, .strx = undefined, .ofile = undefined },
336 .{ .addr = 200, .strx = undefined, .ofile = undefined },
337 .{ .addr = 300, .strx = undefined, .ofile = undefined },
338 };
339
340 try testing.expectEqual(null, find(symbols, 0));
341 try testing.expectEqual(null, find(symbols, 99));
342 try testing.expectEqual(&symbols[0], find(symbols, 100).?);
343 try testing.expectEqual(&symbols[0], find(symbols, 150).?);
344 try testing.expectEqual(&symbols[0], find(symbols, 199).?);
345
346 try testing.expectEqual(&symbols[1], find(symbols, 200).?);
347 try testing.expectEqual(&symbols[1], find(symbols, 250).?);
348 try testing.expectEqual(&symbols[1], find(symbols, 299).?);
349
350 try testing.expectEqual(&symbols[2], find(symbols, 300).?);
351 try testing.expectEqual(&symbols[2], find(symbols, 301).?);
352 try testing.expectEqual(&symbols[2], find(symbols, 5000).?);
353 }
354};
355test {
356 _ = Symbol;
357}
358
359fn loadOFile(gpa: Allocator, o_file_name: []const u8) !OFile {
360 const all_mapped_memory, const mapped_ofile = map: {
361 const open_paren = paren: {
362 if (std.mem.endsWith(u8, o_file_name, ")")) {
363 if (std.mem.findScalarLast(u8, o_file_name, '(')) |i| {
364 break :paren i;
365 }
366 }
367 // Not an archive, just a normal path to a .o file
368 const m = try mapDebugInfoFile(o_file_name);
369 break :map .{ m, m };
370 };
371
372 // We have the form 'path/to/archive.a(entry.o)'. Map the archive and find the object file in question.
373
374 const archive_path = o_file_name[0..open_paren];
375 const target_name_in_archive = o_file_name[open_paren + 1 .. o_file_name.len - 1];
376 const mapped_archive = try mapDebugInfoFile(archive_path);
377 errdefer posix.munmap(mapped_archive);
378
379 var ar_reader: Io.Reader = .fixed(mapped_archive);
380 const ar_magic = ar_reader.take(8) catch return error.InvalidMachO;
381 if (!std.mem.eql(u8, ar_magic, "!<arch>\n")) return error.InvalidMachO;
382 while (true) {
383 if (ar_reader.seek == ar_reader.buffer.len) return error.MissingDebugInfo;
384
385 const raw_name = ar_reader.takeArray(16) catch return error.InvalidMachO;
386 ar_reader.discardAll(12 + 6 + 6 + 8) catch return error.InvalidMachO;
387 const raw_size = ar_reader.takeArray(10) catch return error.InvalidMachO;
388 const file_magic = ar_reader.takeArray(2) catch return error.InvalidMachO;
389 if (!std.mem.eql(u8, file_magic, "`\n")) return error.InvalidMachO;
390
391 const size = std.fmt.parseInt(u32, mem.sliceTo(raw_size, ' '), 10) catch return error.InvalidMachO;
392 const raw_data = ar_reader.take(size) catch return error.InvalidMachO;
393
394 const entry_name: []const u8, const entry_contents: []const u8 = entry: {
395 if (!std.mem.startsWith(u8, raw_name, "#1/")) {
396 break :entry .{ mem.sliceTo(raw_name, '/'), raw_data };
397 }
398 const len = std.fmt.parseInt(u32, mem.sliceTo(raw_name[3..], ' '), 10) catch return error.InvalidMachO;
399 if (len > size) return error.InvalidMachO;
400 break :entry .{ mem.sliceTo(raw_data[0..len], 0), raw_data[len..] };
401 };
402
403 if (std.mem.eql(u8, entry_name, target_name_in_archive)) {
404 break :map .{ mapped_archive, entry_contents };
405 }
406 }
407 };
408 errdefer posix.munmap(all_mapped_memory);
409
410 var r: Io.Reader = .fixed(mapped_ofile);
411 const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) {
412 error.ReadFailed => unreachable,
413 error.EndOfStream => return error.InvalidMachO,
414 };
415 if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidMachO;
416
417 const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: {
418 var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null;
419 var symtab_cmd: ?macho.symtab_command = null;
420 var it: macho.LoadCommandIterator = try .init(&hdr, mapped_ofile[@sizeOf(macho.mach_header_64)..]);
421 while (try it.next()) |lc| switch (lc.hdr.cmd) {
422 .SEGMENT_64 => seg_cmd = lc,
423 .SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO,
424 else => {},
425 };
426 break :cmds .{
427 seg_cmd orelse return error.MissingDebugInfo,
428 symtab_cmd orelse return error.MissingDebugInfo,
429 };
430 };
431
432 if (mapped_ofile.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO;
433 if (mapped_ofile[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO;
434 const strtab = mapped_ofile[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
435
436 const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
437 if (mapped_ofile.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO;
438 const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_ofile[symtab_cmd.symoff..][0..n_sym_bytes]);
439
440 // TODO handle tentative (common) symbols
441 var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
442 defer symbols_by_name.deinit(gpa);
443 try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab_raw.len));
444 for (symtab_raw, 0..) |sym_raw, sym_index| {
445 var sym = sym_raw;
446 if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym);
447 if (sym.n_strx == 0) continue;
448 switch (sym.n_type.bits.type) {
449 .undf => continue, // includes tentative symbols
450 .abs => continue,
451 else => {},
452 }
453 const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
454 const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
455 @as([]const u8, sym_name),
456 @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab_raw = symtab_raw }),
457 );
458 if (gop.found_existing) return error.InvalidMachO;
459 gop.key_ptr.* = @intCast(sym_index);
460 }
461
462 var sections: Dwarf.SectionArray = @splat(null);
463 for (seg_cmd.getSections()) |sect_raw| {
464 var sect = sect_raw;
465 if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.section_64, §);
466
467 if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
468
469 const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| {
470 if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i;
471 } else continue;
472
473 if (mapped_ofile.len < sect.offset + sect.size) return error.InvalidMachO;
474 const section_bytes = mapped_ofile[sect.offset..][0..sect.size];
475 sections[section_index] = .{
476 .data = section_bytes,
477 .owned = false,
478 };
479 }
480
481 if (sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
482 sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
483 sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
484 sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null)
485 {
486 return error.MissingDebugInfo;
487 }
488
489 var dwarf: Dwarf = .{ .sections = sections };
490 errdefer dwarf.deinit(gpa);
491 dwarf.open(gpa, .little) catch |err| switch (err) {
492 error.InvalidDebugInfo,
493 error.EndOfStream,
494 error.Overflow,
495 error.StreamTooLong,
496 => return error.InvalidDwarf,
497
498 error.MissingDebugInfo,
499 error.ReadFailed,
500 error.OutOfMemory,
501 => |e| return e,
502 };
503
504 return .{
505 .mapped_memory = all_mapped_memory,
506 .dwarf = dwarf,
507 .strtab = strtab,
508 .symtab_raw = symtab_raw,
509 .symbols_by_name = symbols_by_name.move(),
510 };
511}
512
513/// Uses `mmap` to map the file at `path` into memory.
514fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 {
515 const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) {
516 error.FileNotFound => return error.MissingDebugInfo,
517 else => return error.ReadFailed,
518 };
519 defer file.close();
520
521 const file_len = std.math.cast(
522 usize,
523 file.getEndPos() catch return error.ReadFailed,
524 ) orelse return error.ReadFailed;
525
526 return posix.mmap(
527 null,
528 file_len,
529 posix.PROT.READ,
530 .{ .TYPE = .SHARED },
531 file.handle,
532 0,
533 ) catch return error.ReadFailed;
534}
535
536const std = @import("std");
537const Allocator = std.mem.Allocator;
538const Dwarf = std.debug.Dwarf;
539const Io = std.Io;
540const assert = std.debug.assert;
541const posix = std.posix;
542const macho = std.macho;
543const mem = std.mem;
544const testing = std.testing;
545
546const builtin = @import("builtin");
547
548const MachOFile = @This();