master
   1//! Implements parsing, decoding, and caching of DWARF information.
   2//!
   3//! This API makes no assumptions about the relationship between the host and
   4//! the target being debugged. In other words, any DWARF information can be used
   5//! from any host via this API. Note, however, that the limits of 32-bit
   6//! addressing can cause very large 64-bit binaries to be impossible to open on
   7//! 32-bit hosts.
   8//!
   9//! For unopinionated types and bits, see `std.dwarf`.
  10
  11const std = @import("../std.zig");
  12const Allocator = std.mem.Allocator;
  13const mem = std.mem;
  14const DW = std.dwarf;
  15const AT = DW.AT;
  16const FORM = DW.FORM;
  17const Format = DW.Format;
  18const RLE = DW.RLE;
  19const UT = DW.UT;
  20const assert = std.debug.assert;
  21const cast = std.math.cast;
  22const maxInt = std.math.maxInt;
  23const ArrayList = std.ArrayList;
  24const Endian = std.builtin.Endian;
  25const Reader = std.Io.Reader;
  26
  27const Dwarf = @This();
  28
  29pub const expression = @import("Dwarf/expression.zig");
  30pub const Unwind = @import("Dwarf/Unwind.zig");
  31pub const SelfUnwinder = @import("Dwarf/SelfUnwinder.zig");
  32
  33/// Useful to temporarily enable while working on this file.
  34const debug_debug_mode = false;
  35
  36sections: SectionArray = @splat(null),
  37
  38/// Filled later by the initializer
  39abbrev_table_list: ArrayList(Abbrev.Table) = .empty,
  40/// Filled later by the initializer
  41compile_unit_list: ArrayList(CompileUnit) = .empty,
  42/// Filled later by the initializer
  43func_list: ArrayList(Func) = .empty,
  44
  45/// Populated by `populateRanges`.
  46ranges: ArrayList(Range) = .empty,
  47
  48pub const Range = struct {
  49    start: u64,
  50    end: u64,
  51    /// Index into `compile_unit_list`.
  52    compile_unit_index: usize,
  53};
  54
  55pub const Section = struct {
  56    data: []const u8,
  57    /// If `data` is owned by this Dwarf.
  58    owned: bool,
  59
  60    pub const Id = enum {
  61        debug_info,
  62        debug_abbrev,
  63        debug_str,
  64        debug_str_offsets,
  65        debug_line,
  66        debug_line_str,
  67        debug_ranges,
  68        debug_loclists,
  69        debug_rnglists,
  70        debug_addr,
  71        debug_names,
  72    };
  73};
  74
  75pub const Abbrev = struct {
  76    code: u64,
  77    tag_id: u64,
  78    has_children: bool,
  79    attrs: []Attr,
  80
  81    fn deinit(abbrev: *Abbrev, gpa: Allocator) void {
  82        gpa.free(abbrev.attrs);
  83        abbrev.* = undefined;
  84    }
  85
  86    const Attr = struct {
  87        id: u64,
  88        form_id: u64,
  89        /// Only valid if form_id is .implicit_const
  90        payload: i64,
  91    };
  92
  93    const Table = struct {
  94        // offset from .debug_abbrev
  95        offset: u64,
  96        abbrevs: []Abbrev,
  97
  98        fn deinit(table: *Table, gpa: Allocator) void {
  99            for (table.abbrevs) |*abbrev| {
 100                abbrev.deinit(gpa);
 101            }
 102            gpa.free(table.abbrevs);
 103            table.* = undefined;
 104        }
 105
 106        fn get(table: *const Table, abbrev_code: u64) ?*const Abbrev {
 107            return for (table.abbrevs) |*abbrev| {
 108                if (abbrev.code == abbrev_code) break abbrev;
 109            } else null;
 110        }
 111    };
 112};
 113
 114pub const CompileUnit = struct {
 115    version: u16,
 116    format: Format,
 117    addr_size_bytes: u8,
 118    die: Die,
 119    pc_range: ?PcRange,
 120
 121    str_offsets_base: usize,
 122    addr_base: usize,
 123    rnglists_base: usize,
 124    loclists_base: usize,
 125    frame_base: ?*const FormValue,
 126
 127    src_loc_cache: ?SrcLocCache,
 128
 129    pub const SrcLocCache = struct {
 130        line_table: LineTable,
 131        directories: []const FileEntry,
 132        files: []FileEntry,
 133        version: u16,
 134
 135        pub const LineTable = std.AutoArrayHashMapUnmanaged(u64, LineEntry);
 136
 137        pub const LineEntry = struct {
 138            line: u32,
 139            column: u32,
 140            /// Offset by 1 depending on whether Dwarf version is >= 5.
 141            file: u32,
 142
 143            pub const invalid: LineEntry = .{
 144                .line = undefined,
 145                .column = undefined,
 146                .file = std.math.maxInt(u32),
 147            };
 148
 149            pub fn isInvalid(le: LineEntry) bool {
 150                return le.file == invalid.file;
 151            }
 152        };
 153
 154        pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry {
 155            const index = std.sort.upperBound(u64, slc.line_table.keys(), address, struct {
 156                fn order(context: u64, item: u64) std.math.Order {
 157                    return std.math.order(context, item);
 158                }
 159            }.order);
 160            if (index == 0) return missing();
 161            return slc.line_table.values()[index - 1];
 162        }
 163    };
 164};
 165
 166pub const FormValue = union(enum) {
 167    addr: u64,
 168    addrx: u64,
 169    block: []const u8,
 170    udata: u64,
 171    data16: *const [16]u8,
 172    sdata: i64,
 173    exprloc: []const u8,
 174    flag: bool,
 175    sec_offset: u64,
 176    ref: u64,
 177    ref_addr: u64,
 178    string: [:0]const u8,
 179    strp: u64,
 180    strx: u64,
 181    line_strp: u64,
 182    loclistx: u64,
 183    rnglistx: u64,
 184
 185    fn getString(fv: FormValue, di: Dwarf) ![:0]const u8 {
 186        switch (fv) {
 187            .string => |s| return s,
 188            .strp => |off| return di.getString(off),
 189            .line_strp => |off| return di.getLineString(off),
 190            else => return bad(),
 191        }
 192    }
 193
 194    fn getUInt(fv: FormValue, comptime U: type) !U {
 195        return switch (fv) {
 196            inline .udata,
 197            .sdata,
 198            .sec_offset,
 199            => |c| cast(U, c) orelse bad(),
 200            else => bad(),
 201        };
 202    }
 203};
 204
 205pub const Die = struct {
 206    tag_id: u64,
 207    has_children: bool,
 208    attrs: []Attr,
 209
 210    const Attr = struct {
 211        id: u64,
 212        value: FormValue,
 213    };
 214
 215    fn deinit(self: *Die, gpa: Allocator) void {
 216        gpa.free(self.attrs);
 217        self.* = undefined;
 218    }
 219
 220    fn getAttr(self: *const Die, id: u64) ?*const FormValue {
 221        for (self.attrs) |*attr| {
 222            if (attr.id == id) return &attr.value;
 223        }
 224        return null;
 225    }
 226
 227    fn getAttrAddr(
 228        self: *const Die,
 229        di: *const Dwarf,
 230        endian: Endian,
 231        id: u64,
 232        compile_unit: *const CompileUnit,
 233    ) error{ InvalidDebugInfo, MissingDebugInfo }!u64 {
 234        const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
 235        return switch (form_value.*) {
 236            .addr => |value| value,
 237            .addrx => |index| di.readDebugAddr(endian, compile_unit, index),
 238            else => bad(),
 239        };
 240    }
 241
 242    fn getAttrSecOffset(self: *const Die, id: u64) !u64 {
 243        const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
 244        return form_value.getUInt(u64);
 245    }
 246
 247    fn getAttrUnsignedLe(self: *const Die, id: u64) !u64 {
 248        const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
 249        return switch (form_value.*) {
 250            .Const => |value| value.asUnsignedLe(),
 251            else => bad(),
 252        };
 253    }
 254
 255    fn getAttrRef(self: *const Die, id: u64, unit_offset: u64, unit_len: u64) !u64 {
 256        const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
 257        return switch (form_value.*) {
 258            .ref => |offset| if (offset < unit_len) unit_offset + offset else bad(),
 259            .ref_addr => |addr| addr,
 260            else => bad(),
 261        };
 262    }
 263
 264    pub fn getAttrString(
 265        self: *const Die,
 266        di: *Dwarf,
 267        endian: Endian,
 268        id: u64,
 269        opt_str: ?[]const u8,
 270        compile_unit: *const CompileUnit,
 271    ) error{ InvalidDebugInfo, MissingDebugInfo }![]const u8 {
 272        const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
 273        switch (form_value.*) {
 274            .string => |value| return value,
 275            .strp => |offset| return di.getString(offset),
 276            .strx => |index| {
 277                const debug_str_offsets = di.section(.debug_str_offsets) orelse return bad();
 278                if (compile_unit.str_offsets_base == 0) return bad();
 279                switch (compile_unit.format) {
 280                    .@"32" => {
 281                        const byte_offset = compile_unit.str_offsets_base + 4 * index;
 282                        if (byte_offset + 4 > debug_str_offsets.len) return bad();
 283                        const offset = mem.readInt(u32, debug_str_offsets[@intCast(byte_offset)..][0..4], endian);
 284                        return getStringGeneric(opt_str, offset);
 285                    },
 286                    .@"64" => {
 287                        const byte_offset = compile_unit.str_offsets_base + 8 * index;
 288                        if (byte_offset + 8 > debug_str_offsets.len) return bad();
 289                        const offset = mem.readInt(u64, debug_str_offsets[@intCast(byte_offset)..][0..8], endian);
 290                        return getStringGeneric(opt_str, offset);
 291                    },
 292                }
 293            },
 294            .line_strp => |offset| return di.getLineString(offset),
 295            else => return bad(),
 296        }
 297    }
 298};
 299
 300const num_sections = std.enums.directEnumArrayLen(Section.Id, 0);
 301pub const SectionArray = [num_sections]?Section;
 302
 303pub const OpenError = ScanError;
 304
 305/// Initialize DWARF info. The caller has the responsibility to initialize most
 306/// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the
 307/// main binary file (not the secondary debug info file).
 308pub fn open(d: *Dwarf, gpa: Allocator, endian: Endian) OpenError!void {
 309    try d.scanAllFunctions(gpa, endian);
 310    try d.scanAllCompileUnits(gpa, endian);
 311}
 312
 313const PcRange = struct {
 314    start: u64,
 315    end: u64,
 316};
 317
 318const Func = struct {
 319    pc_range: ?PcRange,
 320    name: ?[]const u8,
 321};
 322
 323pub fn section(di: Dwarf, dwarf_section: Section.Id) ?[]const u8 {
 324    return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null;
 325}
 326
 327pub fn deinit(di: *Dwarf, gpa: Allocator) void {
 328    for (di.sections) |opt_section| {
 329        if (opt_section) |s| if (s.owned) gpa.free(s.data);
 330    }
 331    for (di.abbrev_table_list.items) |*abbrev| {
 332        abbrev.deinit(gpa);
 333    }
 334    di.abbrev_table_list.deinit(gpa);
 335    for (di.compile_unit_list.items) |*cu| {
 336        if (cu.src_loc_cache) |*slc| {
 337            slc.line_table.deinit(gpa);
 338            gpa.free(slc.directories);
 339            gpa.free(slc.files);
 340        }
 341        cu.die.deinit(gpa);
 342    }
 343    di.compile_unit_list.deinit(gpa);
 344    di.func_list.deinit(gpa);
 345    di.ranges.deinit(gpa);
 346    di.* = undefined;
 347}
 348
 349pub fn getSymbolName(di: *const Dwarf, address: u64) ?[]const u8 {
 350    // Iterate the function list backwards so that we see child DIEs before their parents. This is
 351    // important because `DW_TAG_inlined_subroutine` DIEs will have a range which is a sub-range of
 352    // their caller, and we want to return the callee's name, not the caller's.
 353    var i: usize = di.func_list.items.len;
 354    while (i > 0) {
 355        i -= 1;
 356        const func = &di.func_list.items[i];
 357        if (func.pc_range) |range| {
 358            if (address >= range.start and address < range.end) {
 359                return func.name;
 360            }
 361        }
 362    }
 363
 364    return null;
 365}
 366
 367pub const ScanError = error{
 368    InvalidDebugInfo,
 369    MissingDebugInfo,
 370    ReadFailed,
 371    EndOfStream,
 372    Overflow,
 373    StreamTooLong,
 374} || Allocator.Error;
 375
 376fn scanAllFunctions(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void {
 377    var fr: Reader = .fixed(di.section(.debug_info).?);
 378    var this_unit_offset: u64 = 0;
 379
 380    while (this_unit_offset < fr.buffer.len) {
 381        fr.seek = @intCast(this_unit_offset);
 382
 383        const unit_header = try readUnitHeader(&fr, endian);
 384        if (unit_header.unit_length == 0) return;
 385        const next_offset = unit_header.header_length + unit_header.unit_length;
 386
 387        const version = try fr.takeInt(u16, endian);
 388        if (version < 2 or version > 5) return bad();
 389
 390        var address_size: u8 = undefined;
 391        var debug_abbrev_offset: u64 = undefined;
 392        if (version >= 5) {
 393            const unit_type = try fr.takeByte();
 394            if (unit_type != DW.UT.compile) return bad();
 395            address_size = try fr.takeByte();
 396            debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
 397        } else {
 398            debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
 399            address_size = try fr.takeByte();
 400        }
 401
 402        const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset);
 403
 404        var max_attrs: usize = 0;
 405        var zig_padding_abbrev_code: u7 = 0;
 406        for (abbrev_table.abbrevs) |abbrev| {
 407            max_attrs = @max(max_attrs, abbrev.attrs.len);
 408            if (cast(u7, abbrev.code)) |code| {
 409                if (abbrev.tag_id == DW.TAG.ZIG_padding and
 410                    !abbrev.has_children and
 411                    abbrev.attrs.len == 0)
 412                {
 413                    zig_padding_abbrev_code = code;
 414                }
 415            }
 416        }
 417        const attrs_buf = try gpa.alloc(Die.Attr, max_attrs * 3);
 418        defer gpa.free(attrs_buf);
 419        var attrs_bufs: [3][]Die.Attr = undefined;
 420        for (&attrs_bufs, 0..) |*buf, index| buf.* = attrs_buf[index * max_attrs ..][0..max_attrs];
 421
 422        const next_unit_pos = this_unit_offset + next_offset;
 423
 424        var compile_unit: CompileUnit = .{
 425            .version = version,
 426            .format = unit_header.format,
 427            .addr_size_bytes = address_size,
 428            .die = undefined,
 429            .pc_range = null,
 430
 431            .str_offsets_base = 0,
 432            .addr_base = 0,
 433            .rnglists_base = 0,
 434            .loclists_base = 0,
 435            .frame_base = null,
 436            .src_loc_cache = null,
 437        };
 438
 439        while (true) {
 440            fr.seek = std.mem.indexOfNonePos(u8, fr.buffer, fr.seek, &.{
 441                zig_padding_abbrev_code, 0,
 442            }) orelse fr.buffer.len;
 443            if (fr.seek >= next_unit_pos) break;
 444            var die_obj = (try parseDie(
 445                &fr,
 446                attrs_bufs[0],
 447                abbrev_table,
 448                unit_header.format,
 449                endian,
 450                address_size,
 451            )) orelse continue;
 452
 453            switch (die_obj.tag_id) {
 454                DW.TAG.compile_unit => {
 455                    compile_unit.die = die_obj;
 456                    compile_unit.die.attrs = attrs_bufs[1][0..die_obj.attrs.len];
 457                    @memcpy(compile_unit.die.attrs, die_obj.attrs);
 458
 459                    compile_unit.str_offsets_base = if (die_obj.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0;
 460                    compile_unit.addr_base = if (die_obj.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0;
 461                    compile_unit.rnglists_base = if (die_obj.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0;
 462                    compile_unit.loclists_base = if (die_obj.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0;
 463                    compile_unit.frame_base = die_obj.getAttr(AT.frame_base);
 464                },
 465                DW.TAG.subprogram, DW.TAG.inlined_subroutine, DW.TAG.subroutine, DW.TAG.entry_point => {
 466                    const fn_name = x: {
 467                        var this_die_obj = die_obj;
 468                        // Prevent endless loops
 469                        for (0..3) |_| {
 470                            if (this_die_obj.getAttr(AT.name)) |_| {
 471                                break :x try this_die_obj.getAttrString(di, endian, AT.name, di.section(.debug_str), &compile_unit);
 472                            } else if (this_die_obj.getAttr(AT.abstract_origin)) |_| {
 473                                const after_die_offset = fr.seek;
 474                                defer fr.seek = after_die_offset;
 475
 476                                // Follow the DIE it points to and repeat
 477                                const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin, this_unit_offset, next_offset);
 478                                fr.seek = @intCast(ref_offset);
 479                                this_die_obj = (try parseDie(
 480                                    &fr,
 481                                    attrs_bufs[2],
 482                                    abbrev_table, // wrong abbrev table for different cu
 483                                    unit_header.format,
 484                                    endian,
 485                                    address_size,
 486                                )) orelse return bad();
 487                            } else if (this_die_obj.getAttr(AT.specification)) |_| {
 488                                const after_die_offset = fr.seek;
 489                                defer fr.seek = after_die_offset;
 490
 491                                // Follow the DIE it points to and repeat
 492                                const ref_offset = try this_die_obj.getAttrRef(AT.specification, this_unit_offset, next_offset);
 493                                fr.seek = @intCast(ref_offset);
 494                                this_die_obj = (try parseDie(
 495                                    &fr,
 496                                    attrs_bufs[2],
 497                                    abbrev_table, // wrong abbrev table for different cu
 498                                    unit_header.format,
 499                                    endian,
 500                                    address_size,
 501                                )) orelse return bad();
 502                            } else {
 503                                break :x null;
 504                            }
 505                        }
 506
 507                        break :x null;
 508                    };
 509
 510                    var range_added = if (die_obj.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| blk: {
 511                        if (die_obj.getAttr(AT.high_pc)) |high_pc_value| {
 512                            const pc_end = switch (high_pc_value.*) {
 513                                .addr => |value| value,
 514                                .udata => |offset| low_pc + offset,
 515                                else => return bad(),
 516                            };
 517
 518                            try di.func_list.append(gpa, .{
 519                                .name = fn_name,
 520                                .pc_range = .{
 521                                    .start = low_pc,
 522                                    .end = pc_end,
 523                                },
 524                            });
 525
 526                            break :blk true;
 527                        }
 528
 529                        break :blk false;
 530                    } else |err| blk: {
 531                        if (err != error.MissingDebugInfo) return err;
 532                        break :blk false;
 533                    };
 534
 535                    if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: {
 536                        var iter = DebugRangeIterator.init(ranges_value, di, endian, &compile_unit) catch |err| {
 537                            if (err != error.MissingDebugInfo) return err;
 538                            break :blk;
 539                        };
 540
 541                        while (try iter.next()) |range| {
 542                            range_added = true;
 543                            try di.func_list.append(gpa, .{
 544                                .name = fn_name,
 545                                .pc_range = .{
 546                                    .start = range.start,
 547                                    .end = range.end,
 548                                },
 549                            });
 550                        }
 551                    }
 552
 553                    if (fn_name != null and !range_added) {
 554                        try di.func_list.append(gpa, .{
 555                            .name = fn_name,
 556                            .pc_range = null,
 557                        });
 558                    }
 559                },
 560                else => {},
 561            }
 562        }
 563
 564        this_unit_offset += next_offset;
 565    }
 566}
 567
 568fn scanAllCompileUnits(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void {
 569    var fr: Reader = .fixed(di.section(.debug_info).?);
 570    var this_unit_offset: u64 = 0;
 571
 572    var attrs_buf = std.array_list.Managed(Die.Attr).init(gpa);
 573    defer attrs_buf.deinit();
 574
 575    while (this_unit_offset < fr.buffer.len) {
 576        fr.seek = @intCast(this_unit_offset);
 577
 578        const unit_header = try readUnitHeader(&fr, endian);
 579        if (unit_header.unit_length == 0) return;
 580        const next_offset = unit_header.header_length + unit_header.unit_length;
 581
 582        const version = try fr.takeInt(u16, endian);
 583        if (version < 2 or version > 5) return bad();
 584
 585        var address_size: u8 = undefined;
 586        var debug_abbrev_offset: u64 = undefined;
 587        if (version >= 5) {
 588            const unit_type = try fr.takeByte();
 589            if (unit_type != UT.compile) return bad();
 590            address_size = try fr.takeByte();
 591            debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
 592        } else {
 593            debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
 594            address_size = try fr.takeByte();
 595        }
 596
 597        const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset);
 598
 599        var max_attrs: usize = 0;
 600        for (abbrev_table.abbrevs) |abbrev| {
 601            max_attrs = @max(max_attrs, abbrev.attrs.len);
 602        }
 603        try attrs_buf.resize(max_attrs);
 604
 605        var compile_unit_die = (try parseDie(
 606            &fr,
 607            attrs_buf.items,
 608            abbrev_table,
 609            unit_header.format,
 610            endian,
 611            address_size,
 612        )) orelse return bad();
 613
 614        if (compile_unit_die.tag_id != DW.TAG.compile_unit) return bad();
 615
 616        compile_unit_die.attrs = try gpa.dupe(Die.Attr, compile_unit_die.attrs);
 617
 618        var compile_unit: CompileUnit = .{
 619            .version = version,
 620            .format = unit_header.format,
 621            .addr_size_bytes = address_size,
 622            .pc_range = null,
 623            .die = compile_unit_die,
 624            .str_offsets_base = if (compile_unit_die.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0,
 625            .addr_base = if (compile_unit_die.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0,
 626            .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0,
 627            .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0,
 628            .frame_base = compile_unit_die.getAttr(AT.frame_base),
 629            .src_loc_cache = null,
 630        };
 631
 632        compile_unit.pc_range = x: {
 633            if (compile_unit_die.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| {
 634                if (compile_unit_die.getAttr(AT.high_pc)) |high_pc_value| {
 635                    const pc_end = switch (high_pc_value.*) {
 636                        .addr => |value| value,
 637                        .udata => |offset| low_pc + offset,
 638                        else => return bad(),
 639                    };
 640                    break :x PcRange{
 641                        .start = low_pc,
 642                        .end = pc_end,
 643                    };
 644                } else {
 645                    break :x null;
 646                }
 647            } else |err| {
 648                if (err != error.MissingDebugInfo) return err;
 649                break :x null;
 650            }
 651        };
 652
 653        try di.compile_unit_list.append(gpa, compile_unit);
 654
 655        this_unit_offset += next_offset;
 656    }
 657}
 658
 659pub fn populateRanges(d: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void {
 660    assert(d.ranges.items.len == 0);
 661
 662    for (d.compile_unit_list.items, 0..) |*cu, cu_index| {
 663        if (cu.pc_range) |range| {
 664            try d.ranges.append(gpa, .{
 665                .start = range.start,
 666                .end = range.end,
 667                .compile_unit_index = cu_index,
 668            });
 669            continue;
 670        }
 671        const ranges_value = cu.die.getAttr(AT.ranges) orelse continue;
 672        var iter = DebugRangeIterator.init(ranges_value, d, endian, cu) catch continue;
 673        while (try iter.next()) |range| {
 674            // Not sure why LLVM thinks it's OK to emit these...
 675            if (range.start == range.end) continue;
 676
 677            try d.ranges.append(gpa, .{
 678                .start = range.start,
 679                .end = range.end,
 680                .compile_unit_index = cu_index,
 681            });
 682        }
 683    }
 684
 685    std.mem.sortUnstable(Range, d.ranges.items, {}, struct {
 686        pub fn lessThan(ctx: void, a: Range, b: Range) bool {
 687            _ = ctx;
 688            return a.start < b.start;
 689        }
 690    }.lessThan);
 691}
 692
 693const DebugRangeIterator = struct {
 694    base_address: u64,
 695    section_type: Section.Id,
 696    di: *const Dwarf,
 697    endian: Endian,
 698    compile_unit: *const CompileUnit,
 699    fr: Reader,
 700
 701    pub fn init(ranges_value: *const FormValue, di: *const Dwarf, endian: Endian, compile_unit: *const CompileUnit) !@This() {
 702        const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges;
 703        const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo;
 704
 705        const ranges_offset = switch (ranges_value.*) {
 706            .sec_offset, .udata => |off| off,
 707            .rnglistx => |idx| off: {
 708                switch (compile_unit.format) {
 709                    .@"32" => {
 710                        const offset_loc = compile_unit.rnglists_base + 4 * idx;
 711                        if (offset_loc + 4 > debug_ranges.len) return bad();
 712                        const offset = mem.readInt(u32, debug_ranges[@intCast(offset_loc)..][0..4], endian);
 713                        break :off compile_unit.rnglists_base + offset;
 714                    },
 715                    .@"64" => {
 716                        const offset_loc = compile_unit.rnglists_base + 8 * idx;
 717                        if (offset_loc + 8 > debug_ranges.len) return bad();
 718                        const offset = mem.readInt(u64, debug_ranges[@intCast(offset_loc)..][0..8], endian);
 719                        break :off compile_unit.rnglists_base + offset;
 720                    },
 721                }
 722            },
 723            else => return bad(),
 724        };
 725
 726        // All the addresses in the list are relative to the value
 727        // specified by DW_AT.low_pc or to some other value encoded
 728        // in the list itself.
 729        // If no starting value is specified use zero.
 730        const base_address = compile_unit.die.getAttrAddr(di, endian, AT.low_pc, compile_unit) catch |err| switch (err) {
 731            error.MissingDebugInfo => 0,
 732            else => return err,
 733        };
 734
 735        var fr: Reader = .fixed(debug_ranges);
 736        fr.seek = cast(usize, ranges_offset) orelse return bad();
 737
 738        return .{
 739            .base_address = base_address,
 740            .section_type = section_type,
 741            .di = di,
 742            .endian = endian,
 743            .compile_unit = compile_unit,
 744            .fr = fr,
 745        };
 746    }
 747
 748    // Returns the next range in the list, or null if the end was reached.
 749    pub fn next(self: *@This()) !?PcRange {
 750        const endian = self.endian;
 751        const addr_size_bytes = self.compile_unit.addr_size_bytes;
 752        switch (self.section_type) {
 753            .debug_rnglists => {
 754                const kind = try self.fr.takeByte();
 755                switch (kind) {
 756                    RLE.end_of_list => return null,
 757                    RLE.base_addressx => {
 758                        const index = try self.fr.takeLeb128(u64);
 759                        self.base_address = try self.di.readDebugAddr(endian, self.compile_unit, index);
 760                        return try self.next();
 761                    },
 762                    RLE.startx_endx => {
 763                        const start_index = try self.fr.takeLeb128(u64);
 764                        const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index);
 765
 766                        const end_index = try self.fr.takeLeb128(u64);
 767                        const end_addr = try self.di.readDebugAddr(endian, self.compile_unit, end_index);
 768
 769                        return .{
 770                            .start = start_addr,
 771                            .end = end_addr,
 772                        };
 773                    },
 774                    RLE.startx_length => {
 775                        const start_index = try self.fr.takeLeb128(u64);
 776                        const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index);
 777
 778                        const len = try self.fr.takeLeb128(u64);
 779                        const end_addr = start_addr + len;
 780
 781                        return .{
 782                            .start = start_addr,
 783                            .end = end_addr,
 784                        };
 785                    },
 786                    RLE.offset_pair => {
 787                        const start_addr = try self.fr.takeLeb128(u64);
 788                        const end_addr = try self.fr.takeLeb128(u64);
 789
 790                        // This is the only kind that uses the base address
 791                        return .{
 792                            .start = self.base_address + start_addr,
 793                            .end = self.base_address + end_addr,
 794                        };
 795                    },
 796                    RLE.base_address => {
 797                        self.base_address = try readAddress(&self.fr, endian, addr_size_bytes);
 798                        return try self.next();
 799                    },
 800                    RLE.start_end => {
 801                        const start_addr = try readAddress(&self.fr, endian, addr_size_bytes);
 802                        const end_addr = try readAddress(&self.fr, endian, addr_size_bytes);
 803
 804                        return .{
 805                            .start = start_addr,
 806                            .end = end_addr,
 807                        };
 808                    },
 809                    RLE.start_length => {
 810                        const start_addr = try readAddress(&self.fr, endian, addr_size_bytes);
 811                        const len = try self.fr.takeLeb128(u64);
 812                        const end_addr = start_addr + len;
 813
 814                        return .{
 815                            .start = start_addr,
 816                            .end = end_addr,
 817                        };
 818                    },
 819                    else => return bad(),
 820                }
 821            },
 822            .debug_ranges => {
 823                const start_addr = try readAddress(&self.fr, endian, addr_size_bytes);
 824                const end_addr = try readAddress(&self.fr, endian, addr_size_bytes);
 825                if (start_addr == 0 and end_addr == 0) return null;
 826
 827                // The entry with start_addr = max_representable_address selects a new value for the base address
 828                const max_representable_address = ~@as(u64, 0) >> @intCast(64 - addr_size_bytes);
 829                if (start_addr == max_representable_address) {
 830                    self.base_address = end_addr;
 831                    return try self.next();
 832                }
 833
 834                return .{
 835                    .start = self.base_address + start_addr,
 836                    .end = self.base_address + end_addr,
 837                };
 838            },
 839            else => unreachable,
 840        }
 841    }
 842};
 843
 844/// TODO: change this to binary searching the sorted compile unit list
 845pub fn findCompileUnit(di: *const Dwarf, endian: Endian, target_address: u64) !*CompileUnit {
 846    for (di.compile_unit_list.items) |*compile_unit| {
 847        if (compile_unit.pc_range) |range| {
 848            if (target_address >= range.start and target_address < range.end) return compile_unit;
 849        }
 850
 851        const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue;
 852        var iter = DebugRangeIterator.init(ranges_value, di, endian, compile_unit) catch continue;
 853        while (try iter.next()) |range| {
 854            if (target_address >= range.start and target_address < range.end) return compile_unit;
 855        }
 856    }
 857
 858    return missing();
 859}
 860
 861/// Gets an already existing AbbrevTable given the abbrev_offset, or if not found,
 862/// seeks in the stream and parses it.
 863fn getAbbrevTable(di: *Dwarf, gpa: Allocator, abbrev_offset: u64) !*const Abbrev.Table {
 864    for (di.abbrev_table_list.items) |*table| {
 865        if (table.offset == abbrev_offset) {
 866            return table;
 867        }
 868    }
 869    try di.abbrev_table_list.append(
 870        gpa,
 871        try di.parseAbbrevTable(gpa, abbrev_offset),
 872    );
 873    return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1];
 874}
 875
 876fn parseAbbrevTable(di: *Dwarf, gpa: Allocator, offset: u64) !Abbrev.Table {
 877    var fr: Reader = .fixed(di.section(.debug_abbrev).?);
 878    fr.seek = cast(usize, offset) orelse return bad();
 879
 880    var abbrevs = std.array_list.Managed(Abbrev).init(gpa);
 881    defer {
 882        for (abbrevs.items) |*abbrev| {
 883            abbrev.deinit(gpa);
 884        }
 885        abbrevs.deinit();
 886    }
 887
 888    var attrs = std.array_list.Managed(Abbrev.Attr).init(gpa);
 889    defer attrs.deinit();
 890
 891    while (true) {
 892        const code = try fr.takeLeb128(u64);
 893        if (code == 0) break;
 894        const tag_id = try fr.takeLeb128(u64);
 895        const has_children = (try fr.takeByte()) == DW.CHILDREN.yes;
 896
 897        while (true) {
 898            const attr_id = try fr.takeLeb128(u64);
 899            const form_id = try fr.takeLeb128(u64);
 900            if (attr_id == 0 and form_id == 0) break;
 901            try attrs.append(.{
 902                .id = attr_id,
 903                .form_id = form_id,
 904                .payload = switch (form_id) {
 905                    FORM.implicit_const => try fr.takeLeb128(i64),
 906                    else => undefined,
 907                },
 908            });
 909        }
 910
 911        try abbrevs.append(.{
 912            .code = code,
 913            .tag_id = tag_id,
 914            .has_children = has_children,
 915            .attrs = try attrs.toOwnedSlice(),
 916        });
 917    }
 918
 919    return .{
 920        .offset = offset,
 921        .abbrevs = try abbrevs.toOwnedSlice(),
 922    };
 923}
 924
 925fn parseDie(
 926    fr: *Reader,
 927    attrs_buf: []Die.Attr,
 928    abbrev_table: *const Abbrev.Table,
 929    format: Format,
 930    endian: Endian,
 931    addr_size_bytes: u8,
 932) ScanError!?Die {
 933    const abbrev_code = try fr.takeLeb128(u64);
 934    if (abbrev_code == 0) return null;
 935    const table_entry = abbrev_table.get(abbrev_code) orelse return bad();
 936
 937    const attrs = attrs_buf[0..table_entry.attrs.len];
 938    for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = .{
 939        .id = attr.id,
 940        .value = try parseFormValue(fr, attr.form_id, format, endian, addr_size_bytes, attr.payload),
 941    };
 942    return .{
 943        .tag_id = table_entry.tag_id,
 944        .has_children = table_entry.has_children,
 945        .attrs = attrs,
 946    };
 947}
 948
 949/// Ensures that addresses in the returned LineTable are monotonically increasing.
 950fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: *const CompileUnit) !CompileUnit.SrcLocCache {
 951    const compile_unit_cwd = try compile_unit.die.getAttrString(d, endian, AT.comp_dir, d.section(.debug_line_str), compile_unit);
 952    const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list);
 953
 954    var fr: Reader = .fixed(d.section(.debug_line).?);
 955    fr.seek = @intCast(line_info_offset);
 956
 957    const unit_header = try readUnitHeader(&fr, endian);
 958    if (unit_header.unit_length == 0) return missing();
 959
 960    const next_offset = unit_header.header_length + unit_header.unit_length;
 961
 962    const version = try fr.takeInt(u16, endian);
 963    if (version < 2) return bad();
 964
 965    const addr_size_bytes: u8, const seg_size: u8 = if (version >= 5) .{
 966        try fr.takeByte(),
 967        try fr.takeByte(),
 968    } else .{
 969        compile_unit.addr_size_bytes,
 970        0,
 971    };
 972    if (seg_size != 0) return bad(); // unsupported
 973
 974    const prologue_length = try readFormatSizedInt(&fr, unit_header.format, endian);
 975    const prog_start_offset = fr.seek + prologue_length;
 976
 977    const minimum_instruction_length = try fr.takeByte();
 978    if (minimum_instruction_length == 0) return bad();
 979
 980    if (version >= 4) {
 981        const maximum_operations_per_instruction = try fr.takeByte();
 982        _ = maximum_operations_per_instruction;
 983    }
 984
 985    const default_is_stmt = (try fr.takeByte()) != 0;
 986    const line_base = try fr.takeByteSigned();
 987
 988    const line_range = try fr.takeByte();
 989    if (line_range == 0) return bad();
 990
 991    const opcode_base = try fr.takeByte();
 992
 993    const standard_opcode_lengths = try fr.take(opcode_base - 1);
 994
 995    var directories: ArrayList(FileEntry) = .empty;
 996    defer directories.deinit(gpa);
 997    var file_entries: ArrayList(FileEntry) = .empty;
 998    defer file_entries.deinit(gpa);
 999
1000    if (version < 5) {
1001        try directories.append(gpa, .{ .path = compile_unit_cwd });
1002
1003        while (true) {
1004            const dir = try fr.takeSentinel(0);
1005            if (dir.len == 0) break;
1006            try directories.append(gpa, .{ .path = dir });
1007        }
1008
1009        while (true) {
1010            const file_name = try fr.takeSentinel(0);
1011            if (file_name.len == 0) break;
1012            const dir_index = try fr.takeLeb128(u32);
1013            const mtime = try fr.takeLeb128(u64);
1014            const size = try fr.takeLeb128(u64);
1015            try file_entries.append(gpa, .{
1016                .path = file_name,
1017                .dir_index = dir_index,
1018                .mtime = mtime,
1019                .size = size,
1020            });
1021        }
1022    } else {
1023        const FileEntFmt = struct {
1024            content_type_code: u16,
1025            form_code: u16,
1026        };
1027        {
1028            var dir_ent_fmt_buf: [10]FileEntFmt = undefined;
1029            const directory_entry_format_count = try fr.takeByte();
1030            if (directory_entry_format_count > dir_ent_fmt_buf.len) return bad();
1031            for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| {
1032                ent_fmt.* = .{
1033                    .content_type_code = try fr.takeLeb128(u8),
1034                    .form_code = try fr.takeLeb128(u16),
1035                };
1036            }
1037
1038            const directories_count = try fr.takeLeb128(usize);
1039
1040            for (try directories.addManyAsSlice(gpa, directories_count)) |*e| {
1041                e.* = .{ .path = &.{} };
1042                for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| {
1043                    const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null);
1044                    switch (ent_fmt.content_type_code) {
1045                        DW.LNCT.path => e.path = try form_value.getString(d.*),
1046                        DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
1047                        DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
1048                        DW.LNCT.size => e.size = try form_value.getUInt(u64),
1049                        DW.LNCT.MD5 => e.md5 = switch (form_value) {
1050                            .data16 => |data16| data16.*,
1051                            else => return bad(),
1052                        },
1053                        else => continue,
1054                    }
1055                }
1056            }
1057        }
1058
1059        var file_ent_fmt_buf: [10]FileEntFmt = undefined;
1060        const file_name_entry_format_count = try fr.takeByte();
1061        if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad();
1062        for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| {
1063            ent_fmt.* = .{
1064                .content_type_code = try fr.takeLeb128(u16),
1065                .form_code = try fr.takeLeb128(u16),
1066            };
1067        }
1068
1069        const file_names_count = try fr.takeLeb128(usize);
1070        try file_entries.ensureUnusedCapacity(gpa, file_names_count);
1071
1072        for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| {
1073            e.* = .{ .path = &.{} };
1074            for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| {
1075                const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null);
1076                switch (ent_fmt.content_type_code) {
1077                    DW.LNCT.path => e.path = try form_value.getString(d.*),
1078                    DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
1079                    DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
1080                    DW.LNCT.size => e.size = try form_value.getUInt(u64),
1081                    DW.LNCT.MD5 => e.md5 = switch (form_value) {
1082                        .data16 => |data16| data16.*,
1083                        else => return bad(),
1084                    },
1085                    else => continue,
1086                }
1087            }
1088        }
1089    }
1090
1091    var prog = LineNumberProgram.init(default_is_stmt, version);
1092    var line_table: CompileUnit.SrcLocCache.LineTable = .{};
1093    errdefer line_table.deinit(gpa);
1094
1095    fr.seek = @intCast(prog_start_offset);
1096
1097    const next_unit_pos = line_info_offset + next_offset;
1098
1099    while (fr.seek < next_unit_pos) {
1100        const opcode = try fr.takeByte();
1101
1102        if (opcode == DW.LNS.extended_op) {
1103            const op_size = try fr.takeLeb128(u64);
1104            if (op_size < 1) return bad();
1105            const sub_op = try fr.takeByte();
1106            switch (sub_op) {
1107                DW.LNE.end_sequence => {
1108                    // The row being added here is an "end" address, meaning
1109                    // that it does not map to the source location here -
1110                    // rather it marks the previous address as the last address
1111                    // that maps to this source location.
1112
1113                    // In this implementation we don't mark end of addresses.
1114                    // This is a performance optimization based on the fact
1115                    // that we don't need to know if an address is missing
1116                    // source location info; we are only interested in being
1117                    // able to look up source location info for addresses that
1118                    // are known to have debug info.
1119                    //if (debug_debug_mode) assert(!line_table.contains(prog.address));
1120                    //try line_table.put(gpa, prog.address, CompileUnit.SrcLocCache.LineEntry.invalid);
1121                    prog.reset();
1122                },
1123                DW.LNE.set_address => {
1124                    prog.address = try readAddress(&fr, endian, addr_size_bytes);
1125                },
1126                DW.LNE.define_file => {
1127                    const path = try fr.takeSentinel(0);
1128                    const dir_index = try fr.takeLeb128(u32);
1129                    const mtime = try fr.takeLeb128(u64);
1130                    const size = try fr.takeLeb128(u64);
1131                    try file_entries.append(gpa, .{
1132                        .path = path,
1133                        .dir_index = dir_index,
1134                        .mtime = mtime,
1135                        .size = size,
1136                    });
1137                },
1138                else => try fr.discardAll64(op_size - 1),
1139            }
1140        } else if (opcode >= opcode_base) {
1141            // special opcodes
1142            const adjusted_opcode = opcode - opcode_base;
1143            const inc_addr = minimum_instruction_length * (adjusted_opcode / line_range);
1144            const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range);
1145            prog.line += inc_line;
1146            prog.address += inc_addr;
1147            try prog.addRow(gpa, &line_table);
1148            prog.basic_block = false;
1149        } else {
1150            switch (opcode) {
1151                DW.LNS.copy => {
1152                    try prog.addRow(gpa, &line_table);
1153                    prog.basic_block = false;
1154                },
1155                DW.LNS.advance_pc => {
1156                    const arg = try fr.takeLeb128(u64);
1157                    prog.address += arg * minimum_instruction_length;
1158                },
1159                DW.LNS.advance_line => {
1160                    const arg = try fr.takeLeb128(i64);
1161                    prog.line += arg;
1162                },
1163                DW.LNS.set_file => {
1164                    const arg = try fr.takeLeb128(usize);
1165                    prog.file = arg;
1166                },
1167                DW.LNS.set_column => {
1168                    const arg = try fr.takeLeb128(u64);
1169                    prog.column = arg;
1170                },
1171                DW.LNS.negate_stmt => {
1172                    prog.is_stmt = !prog.is_stmt;
1173                },
1174                DW.LNS.set_basic_block => {
1175                    prog.basic_block = true;
1176                },
1177                DW.LNS.const_add_pc => {
1178                    const inc_addr = minimum_instruction_length * ((255 - opcode_base) / line_range);
1179                    prog.address += inc_addr;
1180                },
1181                DW.LNS.fixed_advance_pc => {
1182                    const arg = try fr.takeInt(u16, endian);
1183                    prog.address += arg;
1184                },
1185                DW.LNS.set_prologue_end => {},
1186                else => {
1187                    if (opcode - 1 >= standard_opcode_lengths.len) return bad();
1188                    try fr.discardAll(standard_opcode_lengths[opcode - 1]);
1189                },
1190            }
1191        }
1192    }
1193
1194    // Dwarf standard v5, 6.2.5 says
1195    // > Within a sequence, addresses and operation pointers may only increase.
1196    // However, this is empirically not the case in reality, so we sort here.
1197    line_table.sortUnstable(struct {
1198        keys: []const u64,
1199
1200        pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
1201            return ctx.keys[a_index] < ctx.keys[b_index];
1202        }
1203    }{ .keys = line_table.keys() });
1204
1205    return .{
1206        .line_table = line_table,
1207        .directories = try directories.toOwnedSlice(gpa),
1208        .files = try file_entries.toOwnedSlice(gpa),
1209        .version = version,
1210    };
1211}
1212
1213pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, endian: Endian, cu: *CompileUnit) ScanError!void {
1214    if (cu.src_loc_cache != null) return;
1215    cu.src_loc_cache = try d.runLineNumberProgram(gpa, endian, cu);
1216}
1217
1218pub fn getLineNumberInfo(
1219    d: *Dwarf,
1220    gpa: Allocator,
1221    endian: Endian,
1222    compile_unit: *CompileUnit,
1223    target_address: u64,
1224) !std.debug.SourceLocation {
1225    try d.populateSrcLocCache(gpa, endian, compile_unit);
1226    const slc = &compile_unit.src_loc_cache.?;
1227    const entry = try slc.findSource(target_address);
1228    const file_index = entry.file - @intFromBool(slc.version < 5);
1229    if (file_index >= slc.files.len) return bad();
1230    const file_entry = &slc.files[file_index];
1231    if (file_entry.dir_index >= slc.directories.len) return bad();
1232    const dir_name = slc.directories[file_entry.dir_index].path;
1233    const file_name = try std.fs.path.join(gpa, &.{ dir_name, file_entry.path });
1234    return .{
1235        .line = entry.line,
1236        .column = entry.column,
1237        .file_name = file_name,
1238    };
1239}
1240
1241fn getString(di: Dwarf, offset: u64) ![:0]const u8 {
1242    return getStringGeneric(di.section(.debug_str), offset);
1243}
1244
1245fn getLineString(di: Dwarf, offset: u64) ![:0]const u8 {
1246    return getStringGeneric(di.section(.debug_line_str), offset);
1247}
1248
1249fn readDebugAddr(di: Dwarf, endian: Endian, compile_unit: *const CompileUnit, index: u64) !u64 {
1250    const debug_addr = di.section(.debug_addr) orelse return bad();
1251
1252    // addr_base points to the first item after the header, however we
1253    // need to read the header to know the size of each item. Empirically,
1254    // it may disagree with is_64 on the compile unit.
1255    // The header is 8 or 12 bytes depending on is_64.
1256    if (compile_unit.addr_base < 8) return bad();
1257
1258    const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], endian);
1259    if (version != 5) return bad();
1260
1261    const addr_size = debug_addr[compile_unit.addr_base - 2];
1262    const seg_size = debug_addr[compile_unit.addr_base - 1];
1263
1264    const byte_offset = compile_unit.addr_base + (addr_size + seg_size) * index;
1265    if (byte_offset + addr_size > debug_addr.len) return bad();
1266    return switch (addr_size) {
1267        1 => debug_addr[@intCast(byte_offset)],
1268        2 => mem.readInt(u16, debug_addr[@intCast(byte_offset)..][0..2], endian),
1269        4 => mem.readInt(u32, debug_addr[@intCast(byte_offset)..][0..4], endian),
1270        8 => mem.readInt(u64, debug_addr[@intCast(byte_offset)..][0..8], endian),
1271        else => bad(),
1272    };
1273}
1274
1275fn parseFormValue(
1276    r: *Reader,
1277    form_id: u64,
1278    format: Format,
1279    endian: Endian,
1280    addr_size_bytes: u8,
1281    implicit_const: ?i64,
1282) ScanError!FormValue {
1283    return switch (form_id) {
1284        // DWARF5.pdf page 213: the size of this value is encoded in the
1285        // compilation unit header as address size.
1286        FORM.addr => .{ .addr = try readAddress(r, endian, addr_size_bytes) },
1287        FORM.addrx1 => .{ .addrx = try r.takeByte() },
1288        FORM.addrx2 => .{ .addrx = try r.takeInt(u16, endian) },
1289        FORM.addrx3 => .{ .addrx = try r.takeInt(u24, endian) },
1290        FORM.addrx4 => .{ .addrx = try r.takeInt(u32, endian) },
1291        FORM.addrx => .{ .addrx = try r.takeLeb128(u64) },
1292
1293        FORM.block1 => .{ .block = try r.take(try r.takeByte()) },
1294        FORM.block2 => .{ .block = try r.take(try r.takeInt(u16, endian)) },
1295        FORM.block4 => .{ .block = try r.take(try r.takeInt(u32, endian)) },
1296        FORM.block => .{ .block = try r.take(try r.takeLeb128(usize)) },
1297
1298        FORM.data1 => .{ .udata = try r.takeByte() },
1299        FORM.data2 => .{ .udata = try r.takeInt(u16, endian) },
1300        FORM.data4 => .{ .udata = try r.takeInt(u32, endian) },
1301        FORM.data8 => .{ .udata = try r.takeInt(u64, endian) },
1302        FORM.data16 => .{ .data16 = try r.takeArray(16) },
1303        FORM.udata => .{ .udata = try r.takeLeb128(u64) },
1304        FORM.sdata => .{ .sdata = try r.takeLeb128(i64) },
1305        FORM.exprloc => .{ .exprloc = try r.take(try r.takeLeb128(usize)) },
1306        FORM.flag => .{ .flag = (try r.takeByte()) != 0 },
1307        FORM.flag_present => .{ .flag = true },
1308        FORM.sec_offset => .{ .sec_offset = try readFormatSizedInt(r, format, endian) },
1309
1310        FORM.ref1 => .{ .ref = try r.takeByte() },
1311        FORM.ref2 => .{ .ref = try r.takeInt(u16, endian) },
1312        FORM.ref4 => .{ .ref = try r.takeInt(u32, endian) },
1313        FORM.ref8 => .{ .ref = try r.takeInt(u64, endian) },
1314        FORM.ref_udata => .{ .ref = try r.takeLeb128(u64) },
1315
1316        FORM.ref_addr => .{ .ref_addr = try readFormatSizedInt(r, format, endian) },
1317        FORM.ref_sig8 => .{ .ref = try r.takeInt(u64, endian) },
1318
1319        FORM.string => .{ .string = try r.takeSentinel(0) },
1320        FORM.strp => .{ .strp = try readFormatSizedInt(r, format, endian) },
1321        FORM.strx1 => .{ .strx = try r.takeByte() },
1322        FORM.strx2 => .{ .strx = try r.takeInt(u16, endian) },
1323        FORM.strx3 => .{ .strx = try r.takeInt(u24, endian) },
1324        FORM.strx4 => .{ .strx = try r.takeInt(u32, endian) },
1325        FORM.strx => .{ .strx = try r.takeLeb128(usize) },
1326        FORM.line_strp => .{ .line_strp = try readFormatSizedInt(r, format, endian) },
1327        FORM.indirect => parseFormValue(r, try r.takeLeb128(u64), format, endian, addr_size_bytes, implicit_const),
1328        FORM.implicit_const => .{ .sdata = implicit_const orelse return bad() },
1329        FORM.loclistx => .{ .loclistx = try r.takeLeb128(u64) },
1330        FORM.rnglistx => .{ .rnglistx = try r.takeLeb128(u64) },
1331        else => {
1332            //debug.print("unrecognized form id: {x}\n", .{form_id});
1333            return bad();
1334        },
1335    };
1336}
1337
1338const FileEntry = struct {
1339    path: []const u8,
1340    dir_index: u32 = 0,
1341    mtime: u64 = 0,
1342    size: u64 = 0,
1343    md5: [16]u8 = [1]u8{0} ** 16,
1344};
1345
1346const LineNumberProgram = struct {
1347    address: u64,
1348    file: usize,
1349    line: i64,
1350    column: u64,
1351    version: u16,
1352    is_stmt: bool,
1353    basic_block: bool,
1354
1355    default_is_stmt: bool,
1356
1357    // Reset the state machine following the DWARF specification
1358    pub fn reset(self: *LineNumberProgram) void {
1359        self.address = 0;
1360        self.file = 1;
1361        self.line = 1;
1362        self.column = 0;
1363        self.is_stmt = self.default_is_stmt;
1364        self.basic_block = false;
1365    }
1366
1367    pub fn init(is_stmt: bool, version: u16) LineNumberProgram {
1368        return .{
1369            .address = 0,
1370            .file = 1,
1371            .line = 1,
1372            .column = 0,
1373            .version = version,
1374            .is_stmt = is_stmt,
1375            .basic_block = false,
1376            .default_is_stmt = is_stmt,
1377        };
1378    }
1379
1380    pub fn addRow(prog: *LineNumberProgram, gpa: Allocator, table: *CompileUnit.SrcLocCache.LineTable) !void {
1381        if (prog.line == 0) {
1382            //if (debug_debug_mode) @panic("garbage line data");
1383            return;
1384        }
1385        if (debug_debug_mode) assert(!table.contains(prog.address));
1386        try table.put(gpa, prog.address, .{
1387            .line = cast(u32, prog.line) orelse maxInt(u32),
1388            .column = cast(u32, prog.column) orelse maxInt(u32),
1389            .file = cast(u32, prog.file) orelse return bad(),
1390        });
1391    }
1392};
1393
1394const UnitHeader = struct {
1395    format: Format,
1396    header_length: u4,
1397    unit_length: u64,
1398};
1399
1400pub fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader {
1401    return switch (try r.takeInt(u32, endian)) {
1402        0...0xfffffff0 - 1 => |unit_length| .{
1403            .format = .@"32",
1404            .header_length = 4,
1405            .unit_length = unit_length,
1406        },
1407        0xfffffff0...0xffffffff - 1 => bad(),
1408        0xffffffff => .{
1409            .format = .@"64",
1410            .header_length = 12,
1411            .unit_length = try r.takeInt(u64, endian),
1412        },
1413    };
1414}
1415
1416/// Returns the DWARF register number for an x86_64 register number found in compact unwind info
1417pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u16 {
1418    return switch (unwind_reg_number) {
1419        1 => 3, // RBX
1420        2 => 12, // R12
1421        3 => 13, // R13
1422        4 => 14, // R14
1423        5 => 15, // R15
1424        6 => 6, // RBP
1425        else => error.InvalidRegister,
1426    };
1427}
1428
1429/// Returns `null` for CPU architectures without an instruction pointer register.
1430pub fn ipRegNum(arch: std.Target.Cpu.Arch) ?u16 {
1431    return switch (arch) {
1432        .aarch64, .aarch64_be => 32,
1433        .arc, .arceb => 160,
1434        .arm, .armeb, .thumb, .thumbeb => 15,
1435        .csky => 64,
1436        .hexagon => 76,
1437        .kvx => 64,
1438        .lanai => 2,
1439        .loongarch32, .loongarch64 => 64,
1440        .m68k => 26,
1441        .mips, .mipsel, .mips64, .mips64el => 66,
1442        .or1k => 35,
1443        .powerpc, .powerpcle, .powerpc64, .powerpc64le => 67,
1444        .riscv32, .riscv32be, .riscv64, .riscv64be => 65,
1445        .s390x => 65,
1446        .sparc, .sparc64 => 32,
1447        .ve => 144,
1448        .x86 => 8,
1449        .x86_64 => 16,
1450        else => null,
1451    };
1452}
1453
1454pub fn fpRegNum(arch: std.Target.Cpu.Arch) u16 {
1455    return switch (arch) {
1456        .aarch64, .aarch64_be => 29,
1457        .arc, .arceb => 27,
1458        .arm, .armeb, .thumb, .thumbeb => 11,
1459        .csky => 14,
1460        .hexagon => 30,
1461        .kvx => 14,
1462        .lanai => 5,
1463        .loongarch32, .loongarch64 => 22,
1464        .m68k => 14,
1465        .mips, .mipsel, .mips64, .mips64el => 30,
1466        .or1k => 2,
1467        .powerpc, .powerpcle, .powerpc64, .powerpc64le => 1,
1468        .riscv32, .riscv32be, .riscv64, .riscv64be => 8,
1469        .s390x => 11,
1470        .sparc, .sparc64 => 30,
1471        .ve => 9,
1472        .x86 => 5,
1473        .x86_64 => 6,
1474        else => unreachable,
1475    };
1476}
1477
1478pub fn spRegNum(arch: std.Target.Cpu.Arch) u16 {
1479    return switch (arch) {
1480        .aarch64, .aarch64_be => 31,
1481        .arc, .arceb => 28,
1482        .arm, .armeb, .thumb, .thumbeb => 13,
1483        .csky => 14,
1484        .hexagon => 29,
1485        .kvx => 12,
1486        .lanai => 4,
1487        .loongarch32, .loongarch64 => 3,
1488        .m68k => 15,
1489        .mips, .mipsel, .mips64, .mips64el => 29,
1490        .or1k => 1,
1491        .powerpc, .powerpcle, .powerpc64, .powerpc64le => 1,
1492        .riscv32, .riscv32be, .riscv64, .riscv64be => 2,
1493        .s390x => 15,
1494        .sparc, .sparc64 => 14,
1495        .ve => 11,
1496        .x86 => 4,
1497        .x86_64 => 7,
1498        else => unreachable,
1499    };
1500}
1501
1502/// Tells whether unwinding for this target is supported by the Dwarf standard.
1503///
1504/// See also `std.debug.SelfInfo.can_unwind` which tells whether the Zig standard
1505/// library has a working implementation of unwinding for the current target.
1506pub fn supportsUnwinding(target: *const std.Target) bool {
1507    return switch (target.cpu.arch) {
1508        .amdgcn,
1509        .nvptx,
1510        .nvptx64,
1511        .spirv32,
1512        .spirv64,
1513        => false,
1514
1515        // Conservative guess. Feel free to update this logic with any targets
1516        // that are known to not support Dwarf unwinding.
1517        else => true,
1518    };
1519}
1520
1521/// This function is to make it handy to comment out the return and make it
1522/// into a crash when working on this file.
1523pub fn bad() error{InvalidDebugInfo} {
1524    invalidDebugInfoDetected();
1525    return error.InvalidDebugInfo;
1526}
1527
1528pub fn invalidDebugInfoDetected() void {
1529    if (debug_debug_mode) @panic("bad dwarf");
1530}
1531
1532pub fn missing() error{MissingDebugInfo} {
1533    if (debug_debug_mode) @panic("missing dwarf");
1534    return error.MissingDebugInfo;
1535}
1536
1537fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 {
1538    const str = opt_str orelse return bad();
1539    if (offset > str.len) return bad();
1540    const casted_offset = cast(usize, offset) orelse return bad();
1541    // Valid strings always have a terminating zero byte
1542    const last = std.mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return bad();
1543    return str[casted_offset..last :0];
1544}
1545
1546pub fn getSymbol(di: *Dwarf, gpa: Allocator, endian: Endian, address: u64) !std.debug.Symbol {
1547    const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) {
1548        error.MissingDebugInfo, error.InvalidDebugInfo => return .unknown,
1549        else => return err,
1550    };
1551    return .{
1552        .name = di.getSymbolName(address),
1553        .compile_unit_name = compile_unit.die.getAttrString(di, endian, std.dwarf.AT.name, di.section(.debug_str), compile_unit) catch |err| switch (err) {
1554            error.MissingDebugInfo, error.InvalidDebugInfo => null,
1555        },
1556        .source_location = di.getLineNumberInfo(gpa, endian, compile_unit, address) catch |err| switch (err) {
1557            error.MissingDebugInfo, error.InvalidDebugInfo => null,
1558            else => return err,
1559        },
1560    };
1561}
1562
1563/// DWARF5 7.4: "In the 32-bit DWARF format, all values that represent lengths of DWARF sections and
1564/// offsets relative to the beginning of DWARF sections are represented using four bytes. In the
1565/// 64-bit DWARF format, all values that represent lengths of DWARF sections and offsets relative to
1566/// the beginning of DWARF sections are represented using eight bytes".
1567///
1568/// This function is for reading such values.
1569fn readFormatSizedInt(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 {
1570    return switch (format) {
1571        .@"32" => try r.takeInt(u32, endian),
1572        .@"64" => try r.takeInt(u64, endian),
1573    };
1574}
1575
1576fn readAddress(r: *Reader, endian: Endian, addr_size_bytes: u8) !u64 {
1577    return switch (addr_size_bytes) {
1578        2 => try r.takeInt(u16, endian),
1579        4 => try r.takeInt(u32, endian),
1580        8 => try r.takeInt(u64, endian),
1581        else => return bad(),
1582    };
1583}