master
  1//! A helper type for loading an ELF file and collecting its DWARF debug information, unwind
  2//! information, and symbol table.
  3
  4is_64: bool,
  5endian: Endian,
  6
  7/// This is `null` iff any of the required DWARF sections were missing. `ElfFile.load` does *not*
  8/// call `Dwarf.open`, `Dwarf.scanAllFunctions`, etc; that is the caller's responsibility.
  9dwarf: ?Dwarf,
 10
 11/// If non-`null`, describes the `.eh_frame` section, which can be used with `Dwarf.Unwind`.
 12eh_frame: ?UnwindSection,
 13/// If non-`null`, describes the `.debug_frame` section, which can be used with `Dwarf.Unwind`.
 14debug_frame: ?UnwindSection,
 15
 16/// If non-`null`, this is the contents of the `.strtab` section.
 17strtab: ?[]const u8,
 18/// If non-`null`, describes the `.symtab` section.
 19symtab: ?SymtabSection,
 20
 21/// Binary search table lazily populated by `searchSymtab`.
 22symbol_search_table: ?[]usize,
 23
 24/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that
 25/// this memory can be unmapped by `ElfFile.deinit`.
 26mapped_file: []align(std.heap.page_size_min) const u8,
 27/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file`
 28/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must
 29/// be unmapped by `ElfFile.deinit`.
 30mapped_debug_file: ?[]align(std.heap.page_size_min) const u8,
 31
 32arena: std.heap.ArenaAllocator.State,
 33
 34pub const UnwindSection = struct {
 35    vaddr: u64,
 36    bytes: []const u8,
 37};
 38pub const SymtabSection = struct {
 39    entry_size: u64,
 40    bytes: []const u8,
 41};
 42
 43pub const DebugInfoSearchPaths = struct {
 44    /// The location of a debuginfod client directory, which acts as a search path for build IDs. If
 45    /// given, we can load from this directory opportunistically, but make no effort to populate it.
 46    /// To avoid allocation when building the search paths, this is given as two components which
 47    /// will be concatenated.
 48    debuginfod_client: ?[2][]const u8,
 49    /// All "global debug directories" on the system. These are used as search paths for both debug
 50    /// links and build IDs. On typical systems this is just "/usr/lib/debug".
 51    global_debug: []const []const u8,
 52    /// The path to the dirname of the ELF file, which acts as a search path for debug links.
 53    exe_dir: ?[]const u8,
 54
 55    pub const none: DebugInfoSearchPaths = .{
 56        .debuginfod_client = null,
 57        .global_debug = &.{},
 58        .exe_dir = null,
 59    };
 60
 61    pub fn native(exe_path: []const u8) DebugInfoSearchPaths {
 62        return .{
 63            .debuginfod_client = p: {
 64                if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |p| {
 65                    break :p .{ p, "" };
 66                }
 67                if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| {
 68                    break :p .{ cache_path, "/debuginfod_client" };
 69                }
 70                if (std.posix.getenv("HOME")) |home_path| {
 71                    break :p .{ home_path, "/.cache/debuginfod_client" };
 72                }
 73                break :p null;
 74            },
 75            .global_debug = &.{
 76                "/usr/lib/debug",
 77            },
 78            .exe_dir = std.fs.path.dirname(exe_path) orelse ".",
 79        };
 80    }
 81};
 82
 83pub fn deinit(ef: *ElfFile, gpa: Allocator) void {
 84    if (ef.dwarf) |*dwarf| dwarf.deinit(gpa);
 85    if (ef.symbol_search_table) |t| gpa.free(t);
 86    var arena = ef.arena.promote(gpa);
 87    arena.deinit();
 88
 89    std.posix.munmap(ef.mapped_file);
 90    if (ef.mapped_debug_file) |m| std.posix.munmap(m);
 91
 92    ef.* = undefined;
 93}
 94
 95pub const LoadError = error{
 96    OutOfMemory,
 97    Overflow,
 98    TruncatedElfFile,
 99    InvalidCompressedSection,
100    InvalidElfMagic,
101    InvalidElfVersion,
102    InvalidElfClass,
103    InvalidElfEndian,
104    // The remaining errors all occur when attemping to stat or mmap a file.
105    SystemResources,
106    MemoryMappingNotSupported,
107    AccessDenied,
108    LockedMemoryLimitExceeded,
109    ProcessFdQuotaExceeded,
110    SystemFdQuotaExceeded,
111    Streaming,
112    Canceled,
113    Unexpected,
114};
115
116pub fn load(
117    gpa: Allocator,
118    elf_file: std.fs.File,
119    opt_build_id: ?[]const u8,
120    di_search_paths: *const DebugInfoSearchPaths,
121) LoadError!ElfFile {
122    var arena_instance: std.heap.ArenaAllocator = .init(gpa);
123    errdefer arena_instance.deinit();
124    const arena = arena_instance.allocator();
125
126    var result = loadInner(arena, elf_file, null) catch |err| switch (err) {
127        error.CrcMismatch => unreachable, // we passed crc as null
128        else => |e| return e,
129    };
130    errdefer std.posix.munmap(result.mapped_mem);
131
132    // `loadInner` did most of the work, but we might need to load an external debug info file
133
134    const di_mapped_mem: ?[]align(std.heap.page_size_min) const u8 = load_di: {
135        if (result.sections.get(.debug_info) != null and
136            result.sections.get(.debug_abbrev) != null and
137            result.sections.get(.debug_str) != null and
138            result.sections.get(.debug_line) != null)
139        {
140            // The info is already loaded from this file alone!
141            break :load_di null;
142        }
143
144        // We're missing some debug info---let's try and load it from a separate file.
145
146        build_id: {
147            const build_id = opt_build_id orelse break :build_id;
148            if (build_id.len < 3) break :build_id;
149
150            for (di_search_paths.global_debug) |global_debug| {
151                if (try loadSeparateDebugFile(arena, &result, null, "{s}/.build-id/{x}/{x}.debug", .{
152                    global_debug,
153                    build_id[0..1],
154                    build_id[1..],
155                })) |mapped| break :load_di mapped;
156            }
157
158            if (di_search_paths.debuginfod_client) |components| {
159                if (try loadSeparateDebugFile(arena, &result, null, "{s}{s}/{x}/debuginfo", .{
160                    components[0],
161                    components[1],
162                    build_id,
163                })) |mapped| break :load_di mapped;
164            }
165        }
166
167        debug_link: {
168            const section = result.sections.get(.gnu_debuglink) orelse break :debug_link;
169            const debug_filename = std.mem.sliceTo(section.bytes, 0);
170            const crc_offset = std.mem.alignForward(usize, debug_filename.len + 1, 4);
171            if (section.bytes.len < crc_offset + 4) break :debug_link;
172            const debug_crc = std.mem.readInt(u32, section.bytes[crc_offset..][0..4], result.endian);
173
174            const exe_dir = di_search_paths.exe_dir orelse break :debug_link;
175
176            if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}", .{
177                exe_dir,
178                debug_filename,
179            })) |mapped| break :load_di mapped;
180            if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/.debug/{s}", .{
181                exe_dir,
182                debug_filename,
183            })) |mapped| break :load_di mapped;
184            for (di_search_paths.global_debug) |global_debug| {
185                // This looks like a bug; it isn't. They really do embed the absolute path to the
186                // exe's dirname, *under* the global debug path.
187                if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}/{s}", .{
188                    global_debug,
189                    exe_dir,
190                    debug_filename,
191                })) |mapped| break :load_di mapped;
192            }
193        }
194
195        break :load_di null;
196    };
197    errdefer comptime unreachable;
198
199    return .{
200        .is_64 = result.is_64,
201        .endian = result.endian,
202        .dwarf = dwarf: {
203            if (result.sections.get(.debug_info) == null or
204                result.sections.get(.debug_abbrev) == null or
205                result.sections.get(.debug_str) == null or
206                result.sections.get(.debug_line) == null)
207            {
208                break :dwarf null; // debug info not present
209            }
210            var sections: Dwarf.SectionArray = @splat(null);
211            inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| {
212                if (result.sections.get(@field(Section.Id, f.name))) |s| {
213                    sections[f.value] = .{ .data = s.bytes, .owned = false };
214                }
215            }
216            break :dwarf .{ .sections = sections };
217        },
218        .eh_frame = if (result.sections.get(.eh_frame)) |s| .{
219            .vaddr = s.header.sh_addr,
220            .bytes = s.bytes,
221        } else null,
222        .debug_frame = if (result.sections.get(.debug_frame)) |s| .{
223            .vaddr = s.header.sh_addr,
224            .bytes = s.bytes,
225        } else null,
226        .strtab = if (result.sections.get(.strtab)) |s| s.bytes else null,
227        .symtab = if (result.sections.get(.symtab)) |s| .{
228            .entry_size = s.header.sh_entsize,
229            .bytes = s.bytes,
230        } else null,
231        .symbol_search_table = null,
232        .mapped_file = result.mapped_mem,
233        .mapped_debug_file = di_mapped_mem,
234        .arena = arena_instance.state,
235    };
236}
237
238pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{
239    NoSymtab,
240    NoStrtab,
241    BadSymtab,
242    OutOfMemory,
243}!std.debug.Symbol {
244    const symtab = ef.symtab orelse return error.NoSymtab;
245    const strtab = ef.strtab orelse return error.NoStrtab;
246
247    if (symtab.bytes.len % symtab.entry_size != 0) return error.BadSymtab;
248
249    const swap_endian = ef.endian != @import("builtin").cpu.arch.endian();
250
251    switch (ef.is_64) {
252        inline true, false => |is_64| {
253            const Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym;
254            if (symtab.entry_size != @sizeOf(Sym)) return error.BadSymtab;
255            const symbols: []align(1) const Sym = @ptrCast(symtab.bytes);
256            if (ef.symbol_search_table == null) {
257                ef.symbol_search_table = try buildSymbolSearchTable(gpa, ef.endian, Sym, symbols);
258            }
259            const search_table = ef.symbol_search_table.?;
260            const SearchContext = struct {
261                swap_endian: bool,
262                target: u64,
263                symbols: []align(1) const Sym,
264                fn predicate(ctx: @This(), sym_index: usize) bool {
265                    // We need to return `true` for the first N items, then `false` for the rest --
266                    // the index we'll get out is the first `false` one. So, we'll return `true` iff
267                    // the target address is after the *end* of this symbol. This synchronizes with
268                    // the logic in `buildSymbolSearchTable` which sorts by *end* address.
269                    var sym = ctx.symbols[sym_index];
270                    if (ctx.swap_endian) std.mem.byteSwapAllFields(Sym, &sym);
271                    const sym_end = sym.st_value + sym.st_size;
272                    return ctx.target >= sym_end;
273                }
274            };
275            const sym_index_index = std.sort.partitionPoint(usize, search_table, @as(SearchContext, .{
276                .swap_endian = swap_endian,
277                .target = vaddr,
278                .symbols = symbols,
279            }), SearchContext.predicate);
280            if (sym_index_index == search_table.len) return .unknown;
281            var sym = symbols[search_table[sym_index_index]];
282            if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym);
283            if (vaddr < sym.st_value or vaddr >= sym.st_value + sym.st_size) return .unknown;
284            return .{
285                .name = std.mem.sliceTo(strtab[sym.st_name..], 0),
286                .compile_unit_name = null,
287                .source_location = null,
288            };
289        },
290    }
291}
292
293fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, symbols: []align(1) const Sym) error{
294    OutOfMemory,
295    BadSymtab,
296}![]usize {
297    var result: std.ArrayList(usize) = .empty;
298    defer result.deinit(gpa);
299
300    const swap_endian = endian != @import("builtin").cpu.arch.endian();
301
302    for (symbols, 0..) |sym_orig, sym_index| {
303        var sym = sym_orig;
304        if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym);
305        if (sym.st_name == 0) continue;
306        if (sym.st_shndx == elf.SHN_UNDEF) continue;
307        try result.append(gpa, sym_index);
308    }
309
310    const SortContext = struct {
311        swap_endian: bool,
312        symbols: []align(1) const Sym,
313        fn lessThan(ctx: @This(), lhs_sym_index: usize, rhs_sym_index: usize) bool {
314            // We sort by *end* address, not start address. This matches up with logic in `searchSymtab`.
315            var lhs_sym = ctx.symbols[lhs_sym_index];
316            var rhs_sym = ctx.symbols[rhs_sym_index];
317            if (ctx.swap_endian) {
318                std.mem.byteSwapAllFields(Sym, &lhs_sym);
319                std.mem.byteSwapAllFields(Sym, &rhs_sym);
320            }
321            const lhs_val = lhs_sym.st_value + lhs_sym.st_size;
322            const rhs_val = rhs_sym.st_value + rhs_sym.st_size;
323            return lhs_val < rhs_val;
324        }
325    };
326    std.mem.sort(usize, result.items, @as(SortContext, .{
327        .swap_endian = swap_endian,
328        .symbols = symbols,
329    }), SortContext.lessThan);
330
331    return result.toOwnedSlice(gpa);
332}
333
334/// Only used locally, during `load`.
335const Section = struct {
336    header: elf.Elf64_Shdr,
337    bytes: []const u8,
338    const Id = enum {
339        // DWARF sections: see `Dwarf.Section.Id`.
340        debug_info,
341        debug_abbrev,
342        debug_str,
343        debug_str_offsets,
344        debug_line,
345        debug_line_str,
346        debug_ranges,
347        debug_loclists,
348        debug_rnglists,
349        debug_addr,
350        debug_names,
351        // Then anything else we're interested in.
352        gnu_debuglink,
353        eh_frame,
354        debug_frame,
355        symtab,
356        strtab,
357    };
358    const Array = std.enums.EnumArray(Section.Id, ?Section);
359};
360
361fn loadSeparateDebugFile(arena: Allocator, main_loaded: *LoadInnerResult, opt_crc: ?u32, comptime fmt: []const u8, args: anytype) Allocator.Error!?[]align(std.heap.page_size_min) const u8 {
362    const path = try std.fmt.allocPrint(arena, fmt, args);
363    const elf_file = std.fs.cwd().openFile(path, .{}) catch return null;
364    defer elf_file.close();
365
366    const result = loadInner(arena, elf_file, opt_crc) catch |err| switch (err) {
367        error.OutOfMemory => |e| return e,
368        error.CrcMismatch => return null,
369        else => return null,
370    };
371    errdefer comptime unreachable;
372
373    const have_debug_sections = inline for (@as([]const []const u8, &.{
374        "debug_info",
375        "debug_abbrev",
376        "debug_str",
377        "debug_line",
378    })) |name| {
379        const s = @field(Section.Id, name);
380        if (main_loaded.sections.get(s) == null and result.sections.get(s) == null) {
381            break false;
382        }
383    } else true;
384
385    if (result.is_64 != main_loaded.is_64 or
386        result.endian != main_loaded.endian or
387        !have_debug_sections)
388    {
389        std.posix.munmap(result.mapped_mem);
390        return null;
391    }
392
393    inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| {
394        const id = @field(Section.Id, f.name);
395        if (main_loaded.sections.get(id) == null) {
396            main_loaded.sections.set(id, result.sections.get(id));
397        }
398    }
399
400    return result.mapped_mem;
401}
402
403const LoadInnerResult = struct {
404    is_64: bool,
405    endian: Endian,
406    sections: Section.Array,
407    mapped_mem: []align(std.heap.page_size_min) const u8,
408};
409fn loadInner(
410    arena: Allocator,
411    elf_file: std.fs.File,
412    opt_crc: ?u32,
413) (LoadError || error{ CrcMismatch, Streaming, Canceled })!LoadInnerResult {
414    const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: {
415        const file_len = std.math.cast(
416            usize,
417            elf_file.getEndPos() catch |err| switch (err) {
418                error.PermissionDenied => unreachable, // not asking for PROT_EXEC
419                else => |e| return e,
420            },
421        ) orelse return error.Overflow;
422
423        break :mapped std.posix.mmap(
424            null,
425            file_len,
426            std.posix.PROT.READ,
427            .{ .TYPE = .SHARED },
428            elf_file.handle,
429            0,
430        ) catch |err| switch (err) {
431            error.MappingAlreadyExists => unreachable, // not using FIXED_NOREPLACE
432            error.PermissionDenied => unreachable, // not asking for PROT_EXEC
433            else => |e| return e,
434        };
435    };
436
437    if (opt_crc) |crc| {
438        if (std.hash.crc.Crc32.hash(mapped_mem) != crc) {
439            return error.CrcMismatch;
440        }
441    }
442    errdefer std.posix.munmap(mapped_mem);
443
444    var fr: std.Io.Reader = .fixed(mapped_mem);
445
446    const header = elf.Header.read(&fr) catch |err| switch (err) {
447        error.ReadFailed => unreachable,
448        error.EndOfStream => return error.TruncatedElfFile,
449
450        error.InvalidElfMagic,
451        error.InvalidElfVersion,
452        error.InvalidElfClass,
453        error.InvalidElfEndian,
454        => |e| return e,
455    };
456    const endian = header.endian;
457
458    const shstrtab_shdr_off = try std.math.add(
459        u64,
460        header.shoff,
461        try std.math.mul(u64, header.shstrndx, header.shentsize),
462    );
463    fr.seek = std.math.cast(usize, shstrtab_shdr_off) orelse return error.Overflow;
464    const shstrtab: []const u8 = if (header.is_64) shstrtab: {
465        const shdr = fr.takeStruct(elf.Elf64_Shdr, endian) catch return error.TruncatedElfFile;
466        if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile;
467        break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)];
468    } else shstrtab: {
469        const shdr = fr.takeStruct(elf.Elf32_Shdr, endian) catch return error.TruncatedElfFile;
470        if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile;
471        break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)];
472    };
473
474    var sections: Section.Array = .initFill(null);
475
476    var it = header.iterateSectionHeadersBuffer(mapped_mem);
477    while (it.next() catch return error.TruncatedElfFile) |shdr| {
478        if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue;
479        if (shdr.sh_name > shstrtab.len) return error.TruncatedElfFile;
480        const name = std.mem.sliceTo(shstrtab[@intCast(shdr.sh_name)..], 0);
481
482        const section_id: Section.Id = inline for (@typeInfo(Section.Id).@"enum".fields) |s| {
483            if (std.mem.eql(u8, "." ++ s.name, name)) {
484                break @enumFromInt(s.value);
485            }
486        } else continue;
487
488        if (sections.get(section_id) != null) continue;
489
490        if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile;
491        const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)];
492        const section_bytes: []const u8 = bytes: {
493            if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) break :bytes raw_section_bytes;
494
495            var section_reader: std.Io.Reader = .fixed(raw_section_bytes);
496            const ch_type: elf.COMPRESS, const ch_size: u64 = if (header.is_64) ch: {
497                const chdr = section_reader.takeStruct(elf.Elf64_Chdr, endian) catch return error.InvalidCompressedSection;
498                break :ch .{ chdr.ch_type, chdr.ch_size };
499            } else ch: {
500                const chdr = section_reader.takeStruct(elf.Elf32_Chdr, endian) catch return error.InvalidCompressedSection;
501                break :ch .{ chdr.ch_type, chdr.ch_size };
502            };
503            if (ch_type != .ZLIB) {
504                // The compression algorithm is unsupported, but don't make that a hard error; the
505                // file might still be valid, and we might still be okay without this section.
506                continue;
507            }
508
509            const buf = try arena.alloc(u8, std.math.cast(usize, ch_size) orelse return error.Overflow);
510            var fw: std.Io.Writer = .fixed(buf);
511            var decompress: std.compress.flate.Decompress = .init(&section_reader, .zlib, &.{});
512            const n = decompress.reader.streamRemaining(&fw) catch |err| switch (err) {
513                // If a write failed, then `buf` filled up, so `ch_size` was incorrect
514                error.WriteFailed => return error.InvalidCompressedSection,
515                // If a read failed, flate expected the section to have more data
516                error.ReadFailed => return error.InvalidCompressedSection,
517            };
518            // It's also an error if the data is shorter than expected.
519            if (n != buf.len) return error.InvalidCompressedSection;
520            break :bytes buf;
521        };
522        sections.set(section_id, .{ .header = shdr, .bytes = section_bytes });
523    }
524
525    return .{
526        .is_64 = header.is_64,
527        .endian = endian,
528        .sections = sections,
529        .mapped_mem = mapped_mem,
530    };
531}
532
533const std = @import("std");
534const Endian = std.builtin.Endian;
535const Dwarf = std.debug.Dwarf;
536const ElfFile = @This();
537const Allocator = std.mem.Allocator;
538const elf = std.elf;