master
1//! Implements parsing, decoding, and caching of DWARF information.
2//!
3//! This API makes no assumptions about the relationship between the host and
4//! the target being debugged. In other words, any DWARF information can be used
5//! from any host via this API. Note, however, that the limits of 32-bit
6//! addressing can cause very large 64-bit binaries to be impossible to open on
7//! 32-bit hosts.
8//!
9//! For unopinionated types and bits, see `std.dwarf`.
10
11const std = @import("../std.zig");
12const Allocator = std.mem.Allocator;
13const mem = std.mem;
14const DW = std.dwarf;
15const AT = DW.AT;
16const FORM = DW.FORM;
17const Format = DW.Format;
18const RLE = DW.RLE;
19const UT = DW.UT;
20const assert = std.debug.assert;
21const cast = std.math.cast;
22const maxInt = std.math.maxInt;
23const ArrayList = std.ArrayList;
24const Endian = std.builtin.Endian;
25const Reader = std.Io.Reader;
26
27const Dwarf = @This();
28
29pub const expression = @import("Dwarf/expression.zig");
30pub const Unwind = @import("Dwarf/Unwind.zig");
31pub const SelfUnwinder = @import("Dwarf/SelfUnwinder.zig");
32
33/// Useful to temporarily enable while working on this file.
34const debug_debug_mode = false;
35
36sections: SectionArray = @splat(null),
37
38/// Filled later by the initializer
39abbrev_table_list: ArrayList(Abbrev.Table) = .empty,
40/// Filled later by the initializer
41compile_unit_list: ArrayList(CompileUnit) = .empty,
42/// Filled later by the initializer
43func_list: ArrayList(Func) = .empty,
44
45/// Populated by `populateRanges`.
46ranges: ArrayList(Range) = .empty,
47
48pub const Range = struct {
49 start: u64,
50 end: u64,
51 /// Index into `compile_unit_list`.
52 compile_unit_index: usize,
53};
54
55pub const Section = struct {
56 data: []const u8,
57 /// If `data` is owned by this Dwarf.
58 owned: bool,
59
60 pub const Id = enum {
61 debug_info,
62 debug_abbrev,
63 debug_str,
64 debug_str_offsets,
65 debug_line,
66 debug_line_str,
67 debug_ranges,
68 debug_loclists,
69 debug_rnglists,
70 debug_addr,
71 debug_names,
72 };
73};
74
75pub const Abbrev = struct {
76 code: u64,
77 tag_id: u64,
78 has_children: bool,
79 attrs: []Attr,
80
81 fn deinit(abbrev: *Abbrev, gpa: Allocator) void {
82 gpa.free(abbrev.attrs);
83 abbrev.* = undefined;
84 }
85
86 const Attr = struct {
87 id: u64,
88 form_id: u64,
89 /// Only valid if form_id is .implicit_const
90 payload: i64,
91 };
92
93 const Table = struct {
94 // offset from .debug_abbrev
95 offset: u64,
96 abbrevs: []Abbrev,
97
98 fn deinit(table: *Table, gpa: Allocator) void {
99 for (table.abbrevs) |*abbrev| {
100 abbrev.deinit(gpa);
101 }
102 gpa.free(table.abbrevs);
103 table.* = undefined;
104 }
105
106 fn get(table: *const Table, abbrev_code: u64) ?*const Abbrev {
107 return for (table.abbrevs) |*abbrev| {
108 if (abbrev.code == abbrev_code) break abbrev;
109 } else null;
110 }
111 };
112};
113
114pub const CompileUnit = struct {
115 version: u16,
116 format: Format,
117 addr_size_bytes: u8,
118 die: Die,
119 pc_range: ?PcRange,
120
121 str_offsets_base: usize,
122 addr_base: usize,
123 rnglists_base: usize,
124 loclists_base: usize,
125 frame_base: ?*const FormValue,
126
127 src_loc_cache: ?SrcLocCache,
128
129 pub const SrcLocCache = struct {
130 line_table: LineTable,
131 directories: []const FileEntry,
132 files: []FileEntry,
133 version: u16,
134
135 pub const LineTable = std.AutoArrayHashMapUnmanaged(u64, LineEntry);
136
137 pub const LineEntry = struct {
138 line: u32,
139 column: u32,
140 /// Offset by 1 depending on whether Dwarf version is >= 5.
141 file: u32,
142
143 pub const invalid: LineEntry = .{
144 .line = undefined,
145 .column = undefined,
146 .file = std.math.maxInt(u32),
147 };
148
149 pub fn isInvalid(le: LineEntry) bool {
150 return le.file == invalid.file;
151 }
152 };
153
154 pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry {
155 const index = std.sort.upperBound(u64, slc.line_table.keys(), address, struct {
156 fn order(context: u64, item: u64) std.math.Order {
157 return std.math.order(context, item);
158 }
159 }.order);
160 if (index == 0) return missing();
161 return slc.line_table.values()[index - 1];
162 }
163 };
164};
165
166pub const FormValue = union(enum) {
167 addr: u64,
168 addrx: u64,
169 block: []const u8,
170 udata: u64,
171 data16: *const [16]u8,
172 sdata: i64,
173 exprloc: []const u8,
174 flag: bool,
175 sec_offset: u64,
176 ref: u64,
177 ref_addr: u64,
178 string: [:0]const u8,
179 strp: u64,
180 strx: u64,
181 line_strp: u64,
182 loclistx: u64,
183 rnglistx: u64,
184
185 fn getString(fv: FormValue, di: Dwarf) ![:0]const u8 {
186 switch (fv) {
187 .string => |s| return s,
188 .strp => |off| return di.getString(off),
189 .line_strp => |off| return di.getLineString(off),
190 else => return bad(),
191 }
192 }
193
194 fn getUInt(fv: FormValue, comptime U: type) !U {
195 return switch (fv) {
196 inline .udata,
197 .sdata,
198 .sec_offset,
199 => |c| cast(U, c) orelse bad(),
200 else => bad(),
201 };
202 }
203};
204
205pub const Die = struct {
206 tag_id: u64,
207 has_children: bool,
208 attrs: []Attr,
209
210 const Attr = struct {
211 id: u64,
212 value: FormValue,
213 };
214
215 fn deinit(self: *Die, gpa: Allocator) void {
216 gpa.free(self.attrs);
217 self.* = undefined;
218 }
219
220 fn getAttr(self: *const Die, id: u64) ?*const FormValue {
221 for (self.attrs) |*attr| {
222 if (attr.id == id) return &attr.value;
223 }
224 return null;
225 }
226
227 fn getAttrAddr(
228 self: *const Die,
229 di: *const Dwarf,
230 endian: Endian,
231 id: u64,
232 compile_unit: *const CompileUnit,
233 ) error{ InvalidDebugInfo, MissingDebugInfo }!u64 {
234 const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
235 return switch (form_value.*) {
236 .addr => |value| value,
237 .addrx => |index| di.readDebugAddr(endian, compile_unit, index),
238 else => bad(),
239 };
240 }
241
242 fn getAttrSecOffset(self: *const Die, id: u64) !u64 {
243 const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
244 return form_value.getUInt(u64);
245 }
246
247 fn getAttrUnsignedLe(self: *const Die, id: u64) !u64 {
248 const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
249 return switch (form_value.*) {
250 .Const => |value| value.asUnsignedLe(),
251 else => bad(),
252 };
253 }
254
255 fn getAttrRef(self: *const Die, id: u64, unit_offset: u64, unit_len: u64) !u64 {
256 const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
257 return switch (form_value.*) {
258 .ref => |offset| if (offset < unit_len) unit_offset + offset else bad(),
259 .ref_addr => |addr| addr,
260 else => bad(),
261 };
262 }
263
264 pub fn getAttrString(
265 self: *const Die,
266 di: *Dwarf,
267 endian: Endian,
268 id: u64,
269 opt_str: ?[]const u8,
270 compile_unit: *const CompileUnit,
271 ) error{ InvalidDebugInfo, MissingDebugInfo }![]const u8 {
272 const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
273 switch (form_value.*) {
274 .string => |value| return value,
275 .strp => |offset| return di.getString(offset),
276 .strx => |index| {
277 const debug_str_offsets = di.section(.debug_str_offsets) orelse return bad();
278 if (compile_unit.str_offsets_base == 0) return bad();
279 switch (compile_unit.format) {
280 .@"32" => {
281 const byte_offset = compile_unit.str_offsets_base + 4 * index;
282 if (byte_offset + 4 > debug_str_offsets.len) return bad();
283 const offset = mem.readInt(u32, debug_str_offsets[@intCast(byte_offset)..][0..4], endian);
284 return getStringGeneric(opt_str, offset);
285 },
286 .@"64" => {
287 const byte_offset = compile_unit.str_offsets_base + 8 * index;
288 if (byte_offset + 8 > debug_str_offsets.len) return bad();
289 const offset = mem.readInt(u64, debug_str_offsets[@intCast(byte_offset)..][0..8], endian);
290 return getStringGeneric(opt_str, offset);
291 },
292 }
293 },
294 .line_strp => |offset| return di.getLineString(offset),
295 else => return bad(),
296 }
297 }
298};
299
300const num_sections = std.enums.directEnumArrayLen(Section.Id, 0);
301pub const SectionArray = [num_sections]?Section;
302
303pub const OpenError = ScanError;
304
305/// Initialize DWARF info. The caller has the responsibility to initialize most
306/// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the
307/// main binary file (not the secondary debug info file).
308pub fn open(d: *Dwarf, gpa: Allocator, endian: Endian) OpenError!void {
309 try d.scanAllFunctions(gpa, endian);
310 try d.scanAllCompileUnits(gpa, endian);
311}
312
313const PcRange = struct {
314 start: u64,
315 end: u64,
316};
317
318const Func = struct {
319 pc_range: ?PcRange,
320 name: ?[]const u8,
321};
322
323pub fn section(di: Dwarf, dwarf_section: Section.Id) ?[]const u8 {
324 return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null;
325}
326
327pub fn deinit(di: *Dwarf, gpa: Allocator) void {
328 for (di.sections) |opt_section| {
329 if (opt_section) |s| if (s.owned) gpa.free(s.data);
330 }
331 for (di.abbrev_table_list.items) |*abbrev| {
332 abbrev.deinit(gpa);
333 }
334 di.abbrev_table_list.deinit(gpa);
335 for (di.compile_unit_list.items) |*cu| {
336 if (cu.src_loc_cache) |*slc| {
337 slc.line_table.deinit(gpa);
338 gpa.free(slc.directories);
339 gpa.free(slc.files);
340 }
341 cu.die.deinit(gpa);
342 }
343 di.compile_unit_list.deinit(gpa);
344 di.func_list.deinit(gpa);
345 di.ranges.deinit(gpa);
346 di.* = undefined;
347}
348
349pub fn getSymbolName(di: *const Dwarf, address: u64) ?[]const u8 {
350 // Iterate the function list backwards so that we see child DIEs before their parents. This is
351 // important because `DW_TAG_inlined_subroutine` DIEs will have a range which is a sub-range of
352 // their caller, and we want to return the callee's name, not the caller's.
353 var i: usize = di.func_list.items.len;
354 while (i > 0) {
355 i -= 1;
356 const func = &di.func_list.items[i];
357 if (func.pc_range) |range| {
358 if (address >= range.start and address < range.end) {
359 return func.name;
360 }
361 }
362 }
363
364 return null;
365}
366
367pub const ScanError = error{
368 InvalidDebugInfo,
369 MissingDebugInfo,
370 ReadFailed,
371 EndOfStream,
372 Overflow,
373 StreamTooLong,
374} || Allocator.Error;
375
376fn scanAllFunctions(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void {
377 var fr: Reader = .fixed(di.section(.debug_info).?);
378 var this_unit_offset: u64 = 0;
379
380 while (this_unit_offset < fr.buffer.len) {
381 fr.seek = @intCast(this_unit_offset);
382
383 const unit_header = try readUnitHeader(&fr, endian);
384 if (unit_header.unit_length == 0) return;
385 const next_offset = unit_header.header_length + unit_header.unit_length;
386
387 const version = try fr.takeInt(u16, endian);
388 if (version < 2 or version > 5) return bad();
389
390 var address_size: u8 = undefined;
391 var debug_abbrev_offset: u64 = undefined;
392 if (version >= 5) {
393 const unit_type = try fr.takeByte();
394 if (unit_type != DW.UT.compile) return bad();
395 address_size = try fr.takeByte();
396 debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
397 } else {
398 debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
399 address_size = try fr.takeByte();
400 }
401
402 const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset);
403
404 var max_attrs: usize = 0;
405 var zig_padding_abbrev_code: u7 = 0;
406 for (abbrev_table.abbrevs) |abbrev| {
407 max_attrs = @max(max_attrs, abbrev.attrs.len);
408 if (cast(u7, abbrev.code)) |code| {
409 if (abbrev.tag_id == DW.TAG.ZIG_padding and
410 !abbrev.has_children and
411 abbrev.attrs.len == 0)
412 {
413 zig_padding_abbrev_code = code;
414 }
415 }
416 }
417 const attrs_buf = try gpa.alloc(Die.Attr, max_attrs * 3);
418 defer gpa.free(attrs_buf);
419 var attrs_bufs: [3][]Die.Attr = undefined;
420 for (&attrs_bufs, 0..) |*buf, index| buf.* = attrs_buf[index * max_attrs ..][0..max_attrs];
421
422 const next_unit_pos = this_unit_offset + next_offset;
423
424 var compile_unit: CompileUnit = .{
425 .version = version,
426 .format = unit_header.format,
427 .addr_size_bytes = address_size,
428 .die = undefined,
429 .pc_range = null,
430
431 .str_offsets_base = 0,
432 .addr_base = 0,
433 .rnglists_base = 0,
434 .loclists_base = 0,
435 .frame_base = null,
436 .src_loc_cache = null,
437 };
438
439 while (true) {
440 fr.seek = std.mem.indexOfNonePos(u8, fr.buffer, fr.seek, &.{
441 zig_padding_abbrev_code, 0,
442 }) orelse fr.buffer.len;
443 if (fr.seek >= next_unit_pos) break;
444 var die_obj = (try parseDie(
445 &fr,
446 attrs_bufs[0],
447 abbrev_table,
448 unit_header.format,
449 endian,
450 address_size,
451 )) orelse continue;
452
453 switch (die_obj.tag_id) {
454 DW.TAG.compile_unit => {
455 compile_unit.die = die_obj;
456 compile_unit.die.attrs = attrs_bufs[1][0..die_obj.attrs.len];
457 @memcpy(compile_unit.die.attrs, die_obj.attrs);
458
459 compile_unit.str_offsets_base = if (die_obj.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0;
460 compile_unit.addr_base = if (die_obj.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0;
461 compile_unit.rnglists_base = if (die_obj.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0;
462 compile_unit.loclists_base = if (die_obj.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0;
463 compile_unit.frame_base = die_obj.getAttr(AT.frame_base);
464 },
465 DW.TAG.subprogram, DW.TAG.inlined_subroutine, DW.TAG.subroutine, DW.TAG.entry_point => {
466 const fn_name = x: {
467 var this_die_obj = die_obj;
468 // Prevent endless loops
469 for (0..3) |_| {
470 if (this_die_obj.getAttr(AT.name)) |_| {
471 break :x try this_die_obj.getAttrString(di, endian, AT.name, di.section(.debug_str), &compile_unit);
472 } else if (this_die_obj.getAttr(AT.abstract_origin)) |_| {
473 const after_die_offset = fr.seek;
474 defer fr.seek = after_die_offset;
475
476 // Follow the DIE it points to and repeat
477 const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin, this_unit_offset, next_offset);
478 fr.seek = @intCast(ref_offset);
479 this_die_obj = (try parseDie(
480 &fr,
481 attrs_bufs[2],
482 abbrev_table, // wrong abbrev table for different cu
483 unit_header.format,
484 endian,
485 address_size,
486 )) orelse return bad();
487 } else if (this_die_obj.getAttr(AT.specification)) |_| {
488 const after_die_offset = fr.seek;
489 defer fr.seek = after_die_offset;
490
491 // Follow the DIE it points to and repeat
492 const ref_offset = try this_die_obj.getAttrRef(AT.specification, this_unit_offset, next_offset);
493 fr.seek = @intCast(ref_offset);
494 this_die_obj = (try parseDie(
495 &fr,
496 attrs_bufs[2],
497 abbrev_table, // wrong abbrev table for different cu
498 unit_header.format,
499 endian,
500 address_size,
501 )) orelse return bad();
502 } else {
503 break :x null;
504 }
505 }
506
507 break :x null;
508 };
509
510 var range_added = if (die_obj.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| blk: {
511 if (die_obj.getAttr(AT.high_pc)) |high_pc_value| {
512 const pc_end = switch (high_pc_value.*) {
513 .addr => |value| value,
514 .udata => |offset| low_pc + offset,
515 else => return bad(),
516 };
517
518 try di.func_list.append(gpa, .{
519 .name = fn_name,
520 .pc_range = .{
521 .start = low_pc,
522 .end = pc_end,
523 },
524 });
525
526 break :blk true;
527 }
528
529 break :blk false;
530 } else |err| blk: {
531 if (err != error.MissingDebugInfo) return err;
532 break :blk false;
533 };
534
535 if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: {
536 var iter = DebugRangeIterator.init(ranges_value, di, endian, &compile_unit) catch |err| {
537 if (err != error.MissingDebugInfo) return err;
538 break :blk;
539 };
540
541 while (try iter.next()) |range| {
542 range_added = true;
543 try di.func_list.append(gpa, .{
544 .name = fn_name,
545 .pc_range = .{
546 .start = range.start,
547 .end = range.end,
548 },
549 });
550 }
551 }
552
553 if (fn_name != null and !range_added) {
554 try di.func_list.append(gpa, .{
555 .name = fn_name,
556 .pc_range = null,
557 });
558 }
559 },
560 else => {},
561 }
562 }
563
564 this_unit_offset += next_offset;
565 }
566}
567
568fn scanAllCompileUnits(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void {
569 var fr: Reader = .fixed(di.section(.debug_info).?);
570 var this_unit_offset: u64 = 0;
571
572 var attrs_buf = std.array_list.Managed(Die.Attr).init(gpa);
573 defer attrs_buf.deinit();
574
575 while (this_unit_offset < fr.buffer.len) {
576 fr.seek = @intCast(this_unit_offset);
577
578 const unit_header = try readUnitHeader(&fr, endian);
579 if (unit_header.unit_length == 0) return;
580 const next_offset = unit_header.header_length + unit_header.unit_length;
581
582 const version = try fr.takeInt(u16, endian);
583 if (version < 2 or version > 5) return bad();
584
585 var address_size: u8 = undefined;
586 var debug_abbrev_offset: u64 = undefined;
587 if (version >= 5) {
588 const unit_type = try fr.takeByte();
589 if (unit_type != UT.compile) return bad();
590 address_size = try fr.takeByte();
591 debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
592 } else {
593 debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian);
594 address_size = try fr.takeByte();
595 }
596
597 const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset);
598
599 var max_attrs: usize = 0;
600 for (abbrev_table.abbrevs) |abbrev| {
601 max_attrs = @max(max_attrs, abbrev.attrs.len);
602 }
603 try attrs_buf.resize(max_attrs);
604
605 var compile_unit_die = (try parseDie(
606 &fr,
607 attrs_buf.items,
608 abbrev_table,
609 unit_header.format,
610 endian,
611 address_size,
612 )) orelse return bad();
613
614 if (compile_unit_die.tag_id != DW.TAG.compile_unit) return bad();
615
616 compile_unit_die.attrs = try gpa.dupe(Die.Attr, compile_unit_die.attrs);
617
618 var compile_unit: CompileUnit = .{
619 .version = version,
620 .format = unit_header.format,
621 .addr_size_bytes = address_size,
622 .pc_range = null,
623 .die = compile_unit_die,
624 .str_offsets_base = if (compile_unit_die.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0,
625 .addr_base = if (compile_unit_die.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0,
626 .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0,
627 .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0,
628 .frame_base = compile_unit_die.getAttr(AT.frame_base),
629 .src_loc_cache = null,
630 };
631
632 compile_unit.pc_range = x: {
633 if (compile_unit_die.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| {
634 if (compile_unit_die.getAttr(AT.high_pc)) |high_pc_value| {
635 const pc_end = switch (high_pc_value.*) {
636 .addr => |value| value,
637 .udata => |offset| low_pc + offset,
638 else => return bad(),
639 };
640 break :x PcRange{
641 .start = low_pc,
642 .end = pc_end,
643 };
644 } else {
645 break :x null;
646 }
647 } else |err| {
648 if (err != error.MissingDebugInfo) return err;
649 break :x null;
650 }
651 };
652
653 try di.compile_unit_list.append(gpa, compile_unit);
654
655 this_unit_offset += next_offset;
656 }
657}
658
659pub fn populateRanges(d: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void {
660 assert(d.ranges.items.len == 0);
661
662 for (d.compile_unit_list.items, 0..) |*cu, cu_index| {
663 if (cu.pc_range) |range| {
664 try d.ranges.append(gpa, .{
665 .start = range.start,
666 .end = range.end,
667 .compile_unit_index = cu_index,
668 });
669 continue;
670 }
671 const ranges_value = cu.die.getAttr(AT.ranges) orelse continue;
672 var iter = DebugRangeIterator.init(ranges_value, d, endian, cu) catch continue;
673 while (try iter.next()) |range| {
674 // Not sure why LLVM thinks it's OK to emit these...
675 if (range.start == range.end) continue;
676
677 try d.ranges.append(gpa, .{
678 .start = range.start,
679 .end = range.end,
680 .compile_unit_index = cu_index,
681 });
682 }
683 }
684
685 std.mem.sortUnstable(Range, d.ranges.items, {}, struct {
686 pub fn lessThan(ctx: void, a: Range, b: Range) bool {
687 _ = ctx;
688 return a.start < b.start;
689 }
690 }.lessThan);
691}
692
693const DebugRangeIterator = struct {
694 base_address: u64,
695 section_type: Section.Id,
696 di: *const Dwarf,
697 endian: Endian,
698 compile_unit: *const CompileUnit,
699 fr: Reader,
700
701 pub fn init(ranges_value: *const FormValue, di: *const Dwarf, endian: Endian, compile_unit: *const CompileUnit) !@This() {
702 const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges;
703 const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo;
704
705 const ranges_offset = switch (ranges_value.*) {
706 .sec_offset, .udata => |off| off,
707 .rnglistx => |idx| off: {
708 switch (compile_unit.format) {
709 .@"32" => {
710 const offset_loc = compile_unit.rnglists_base + 4 * idx;
711 if (offset_loc + 4 > debug_ranges.len) return bad();
712 const offset = mem.readInt(u32, debug_ranges[@intCast(offset_loc)..][0..4], endian);
713 break :off compile_unit.rnglists_base + offset;
714 },
715 .@"64" => {
716 const offset_loc = compile_unit.rnglists_base + 8 * idx;
717 if (offset_loc + 8 > debug_ranges.len) return bad();
718 const offset = mem.readInt(u64, debug_ranges[@intCast(offset_loc)..][0..8], endian);
719 break :off compile_unit.rnglists_base + offset;
720 },
721 }
722 },
723 else => return bad(),
724 };
725
726 // All the addresses in the list are relative to the value
727 // specified by DW_AT.low_pc or to some other value encoded
728 // in the list itself.
729 // If no starting value is specified use zero.
730 const base_address = compile_unit.die.getAttrAddr(di, endian, AT.low_pc, compile_unit) catch |err| switch (err) {
731 error.MissingDebugInfo => 0,
732 else => return err,
733 };
734
735 var fr: Reader = .fixed(debug_ranges);
736 fr.seek = cast(usize, ranges_offset) orelse return bad();
737
738 return .{
739 .base_address = base_address,
740 .section_type = section_type,
741 .di = di,
742 .endian = endian,
743 .compile_unit = compile_unit,
744 .fr = fr,
745 };
746 }
747
748 // Returns the next range in the list, or null if the end was reached.
749 pub fn next(self: *@This()) !?PcRange {
750 const endian = self.endian;
751 const addr_size_bytes = self.compile_unit.addr_size_bytes;
752 switch (self.section_type) {
753 .debug_rnglists => {
754 const kind = try self.fr.takeByte();
755 switch (kind) {
756 RLE.end_of_list => return null,
757 RLE.base_addressx => {
758 const index = try self.fr.takeLeb128(u64);
759 self.base_address = try self.di.readDebugAddr(endian, self.compile_unit, index);
760 return try self.next();
761 },
762 RLE.startx_endx => {
763 const start_index = try self.fr.takeLeb128(u64);
764 const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index);
765
766 const end_index = try self.fr.takeLeb128(u64);
767 const end_addr = try self.di.readDebugAddr(endian, self.compile_unit, end_index);
768
769 return .{
770 .start = start_addr,
771 .end = end_addr,
772 };
773 },
774 RLE.startx_length => {
775 const start_index = try self.fr.takeLeb128(u64);
776 const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index);
777
778 const len = try self.fr.takeLeb128(u64);
779 const end_addr = start_addr + len;
780
781 return .{
782 .start = start_addr,
783 .end = end_addr,
784 };
785 },
786 RLE.offset_pair => {
787 const start_addr = try self.fr.takeLeb128(u64);
788 const end_addr = try self.fr.takeLeb128(u64);
789
790 // This is the only kind that uses the base address
791 return .{
792 .start = self.base_address + start_addr,
793 .end = self.base_address + end_addr,
794 };
795 },
796 RLE.base_address => {
797 self.base_address = try readAddress(&self.fr, endian, addr_size_bytes);
798 return try self.next();
799 },
800 RLE.start_end => {
801 const start_addr = try readAddress(&self.fr, endian, addr_size_bytes);
802 const end_addr = try readAddress(&self.fr, endian, addr_size_bytes);
803
804 return .{
805 .start = start_addr,
806 .end = end_addr,
807 };
808 },
809 RLE.start_length => {
810 const start_addr = try readAddress(&self.fr, endian, addr_size_bytes);
811 const len = try self.fr.takeLeb128(u64);
812 const end_addr = start_addr + len;
813
814 return .{
815 .start = start_addr,
816 .end = end_addr,
817 };
818 },
819 else => return bad(),
820 }
821 },
822 .debug_ranges => {
823 const start_addr = try readAddress(&self.fr, endian, addr_size_bytes);
824 const end_addr = try readAddress(&self.fr, endian, addr_size_bytes);
825 if (start_addr == 0 and end_addr == 0) return null;
826
827 // The entry with start_addr = max_representable_address selects a new value for the base address
828 const max_representable_address = ~@as(u64, 0) >> @intCast(64 - addr_size_bytes);
829 if (start_addr == max_representable_address) {
830 self.base_address = end_addr;
831 return try self.next();
832 }
833
834 return .{
835 .start = self.base_address + start_addr,
836 .end = self.base_address + end_addr,
837 };
838 },
839 else => unreachable,
840 }
841 }
842};
843
844/// TODO: change this to binary searching the sorted compile unit list
845pub fn findCompileUnit(di: *const Dwarf, endian: Endian, target_address: u64) !*CompileUnit {
846 for (di.compile_unit_list.items) |*compile_unit| {
847 if (compile_unit.pc_range) |range| {
848 if (target_address >= range.start and target_address < range.end) return compile_unit;
849 }
850
851 const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue;
852 var iter = DebugRangeIterator.init(ranges_value, di, endian, compile_unit) catch continue;
853 while (try iter.next()) |range| {
854 if (target_address >= range.start and target_address < range.end) return compile_unit;
855 }
856 }
857
858 return missing();
859}
860
861/// Gets an already existing AbbrevTable given the abbrev_offset, or if not found,
862/// seeks in the stream and parses it.
863fn getAbbrevTable(di: *Dwarf, gpa: Allocator, abbrev_offset: u64) !*const Abbrev.Table {
864 for (di.abbrev_table_list.items) |*table| {
865 if (table.offset == abbrev_offset) {
866 return table;
867 }
868 }
869 try di.abbrev_table_list.append(
870 gpa,
871 try di.parseAbbrevTable(gpa, abbrev_offset),
872 );
873 return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1];
874}
875
876fn parseAbbrevTable(di: *Dwarf, gpa: Allocator, offset: u64) !Abbrev.Table {
877 var fr: Reader = .fixed(di.section(.debug_abbrev).?);
878 fr.seek = cast(usize, offset) orelse return bad();
879
880 var abbrevs = std.array_list.Managed(Abbrev).init(gpa);
881 defer {
882 for (abbrevs.items) |*abbrev| {
883 abbrev.deinit(gpa);
884 }
885 abbrevs.deinit();
886 }
887
888 var attrs = std.array_list.Managed(Abbrev.Attr).init(gpa);
889 defer attrs.deinit();
890
891 while (true) {
892 const code = try fr.takeLeb128(u64);
893 if (code == 0) break;
894 const tag_id = try fr.takeLeb128(u64);
895 const has_children = (try fr.takeByte()) == DW.CHILDREN.yes;
896
897 while (true) {
898 const attr_id = try fr.takeLeb128(u64);
899 const form_id = try fr.takeLeb128(u64);
900 if (attr_id == 0 and form_id == 0) break;
901 try attrs.append(.{
902 .id = attr_id,
903 .form_id = form_id,
904 .payload = switch (form_id) {
905 FORM.implicit_const => try fr.takeLeb128(i64),
906 else => undefined,
907 },
908 });
909 }
910
911 try abbrevs.append(.{
912 .code = code,
913 .tag_id = tag_id,
914 .has_children = has_children,
915 .attrs = try attrs.toOwnedSlice(),
916 });
917 }
918
919 return .{
920 .offset = offset,
921 .abbrevs = try abbrevs.toOwnedSlice(),
922 };
923}
924
925fn parseDie(
926 fr: *Reader,
927 attrs_buf: []Die.Attr,
928 abbrev_table: *const Abbrev.Table,
929 format: Format,
930 endian: Endian,
931 addr_size_bytes: u8,
932) ScanError!?Die {
933 const abbrev_code = try fr.takeLeb128(u64);
934 if (abbrev_code == 0) return null;
935 const table_entry = abbrev_table.get(abbrev_code) orelse return bad();
936
937 const attrs = attrs_buf[0..table_entry.attrs.len];
938 for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = .{
939 .id = attr.id,
940 .value = try parseFormValue(fr, attr.form_id, format, endian, addr_size_bytes, attr.payload),
941 };
942 return .{
943 .tag_id = table_entry.tag_id,
944 .has_children = table_entry.has_children,
945 .attrs = attrs,
946 };
947}
948
949/// Ensures that addresses in the returned LineTable are monotonically increasing.
950fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: *const CompileUnit) !CompileUnit.SrcLocCache {
951 const compile_unit_cwd = try compile_unit.die.getAttrString(d, endian, AT.comp_dir, d.section(.debug_line_str), compile_unit);
952 const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list);
953
954 var fr: Reader = .fixed(d.section(.debug_line).?);
955 fr.seek = @intCast(line_info_offset);
956
957 const unit_header = try readUnitHeader(&fr, endian);
958 if (unit_header.unit_length == 0) return missing();
959
960 const next_offset = unit_header.header_length + unit_header.unit_length;
961
962 const version = try fr.takeInt(u16, endian);
963 if (version < 2) return bad();
964
965 const addr_size_bytes: u8, const seg_size: u8 = if (version >= 5) .{
966 try fr.takeByte(),
967 try fr.takeByte(),
968 } else .{
969 compile_unit.addr_size_bytes,
970 0,
971 };
972 if (seg_size != 0) return bad(); // unsupported
973
974 const prologue_length = try readFormatSizedInt(&fr, unit_header.format, endian);
975 const prog_start_offset = fr.seek + prologue_length;
976
977 const minimum_instruction_length = try fr.takeByte();
978 if (minimum_instruction_length == 0) return bad();
979
980 if (version >= 4) {
981 const maximum_operations_per_instruction = try fr.takeByte();
982 _ = maximum_operations_per_instruction;
983 }
984
985 const default_is_stmt = (try fr.takeByte()) != 0;
986 const line_base = try fr.takeByteSigned();
987
988 const line_range = try fr.takeByte();
989 if (line_range == 0) return bad();
990
991 const opcode_base = try fr.takeByte();
992
993 const standard_opcode_lengths = try fr.take(opcode_base - 1);
994
995 var directories: ArrayList(FileEntry) = .empty;
996 defer directories.deinit(gpa);
997 var file_entries: ArrayList(FileEntry) = .empty;
998 defer file_entries.deinit(gpa);
999
1000 if (version < 5) {
1001 try directories.append(gpa, .{ .path = compile_unit_cwd });
1002
1003 while (true) {
1004 const dir = try fr.takeSentinel(0);
1005 if (dir.len == 0) break;
1006 try directories.append(gpa, .{ .path = dir });
1007 }
1008
1009 while (true) {
1010 const file_name = try fr.takeSentinel(0);
1011 if (file_name.len == 0) break;
1012 const dir_index = try fr.takeLeb128(u32);
1013 const mtime = try fr.takeLeb128(u64);
1014 const size = try fr.takeLeb128(u64);
1015 try file_entries.append(gpa, .{
1016 .path = file_name,
1017 .dir_index = dir_index,
1018 .mtime = mtime,
1019 .size = size,
1020 });
1021 }
1022 } else {
1023 const FileEntFmt = struct {
1024 content_type_code: u16,
1025 form_code: u16,
1026 };
1027 {
1028 var dir_ent_fmt_buf: [10]FileEntFmt = undefined;
1029 const directory_entry_format_count = try fr.takeByte();
1030 if (directory_entry_format_count > dir_ent_fmt_buf.len) return bad();
1031 for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| {
1032 ent_fmt.* = .{
1033 .content_type_code = try fr.takeLeb128(u8),
1034 .form_code = try fr.takeLeb128(u16),
1035 };
1036 }
1037
1038 const directories_count = try fr.takeLeb128(usize);
1039
1040 for (try directories.addManyAsSlice(gpa, directories_count)) |*e| {
1041 e.* = .{ .path = &.{} };
1042 for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| {
1043 const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null);
1044 switch (ent_fmt.content_type_code) {
1045 DW.LNCT.path => e.path = try form_value.getString(d.*),
1046 DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
1047 DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
1048 DW.LNCT.size => e.size = try form_value.getUInt(u64),
1049 DW.LNCT.MD5 => e.md5 = switch (form_value) {
1050 .data16 => |data16| data16.*,
1051 else => return bad(),
1052 },
1053 else => continue,
1054 }
1055 }
1056 }
1057 }
1058
1059 var file_ent_fmt_buf: [10]FileEntFmt = undefined;
1060 const file_name_entry_format_count = try fr.takeByte();
1061 if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad();
1062 for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| {
1063 ent_fmt.* = .{
1064 .content_type_code = try fr.takeLeb128(u16),
1065 .form_code = try fr.takeLeb128(u16),
1066 };
1067 }
1068
1069 const file_names_count = try fr.takeLeb128(usize);
1070 try file_entries.ensureUnusedCapacity(gpa, file_names_count);
1071
1072 for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| {
1073 e.* = .{ .path = &.{} };
1074 for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| {
1075 const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null);
1076 switch (ent_fmt.content_type_code) {
1077 DW.LNCT.path => e.path = try form_value.getString(d.*),
1078 DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
1079 DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
1080 DW.LNCT.size => e.size = try form_value.getUInt(u64),
1081 DW.LNCT.MD5 => e.md5 = switch (form_value) {
1082 .data16 => |data16| data16.*,
1083 else => return bad(),
1084 },
1085 else => continue,
1086 }
1087 }
1088 }
1089 }
1090
1091 var prog = LineNumberProgram.init(default_is_stmt, version);
1092 var line_table: CompileUnit.SrcLocCache.LineTable = .{};
1093 errdefer line_table.deinit(gpa);
1094
1095 fr.seek = @intCast(prog_start_offset);
1096
1097 const next_unit_pos = line_info_offset + next_offset;
1098
1099 while (fr.seek < next_unit_pos) {
1100 const opcode = try fr.takeByte();
1101
1102 if (opcode == DW.LNS.extended_op) {
1103 const op_size = try fr.takeLeb128(u64);
1104 if (op_size < 1) return bad();
1105 const sub_op = try fr.takeByte();
1106 switch (sub_op) {
1107 DW.LNE.end_sequence => {
1108 // The row being added here is an "end" address, meaning
1109 // that it does not map to the source location here -
1110 // rather it marks the previous address as the last address
1111 // that maps to this source location.
1112
1113 // In this implementation we don't mark end of addresses.
1114 // This is a performance optimization based on the fact
1115 // that we don't need to know if an address is missing
1116 // source location info; we are only interested in being
1117 // able to look up source location info for addresses that
1118 // are known to have debug info.
1119 //if (debug_debug_mode) assert(!line_table.contains(prog.address));
1120 //try line_table.put(gpa, prog.address, CompileUnit.SrcLocCache.LineEntry.invalid);
1121 prog.reset();
1122 },
1123 DW.LNE.set_address => {
1124 prog.address = try readAddress(&fr, endian, addr_size_bytes);
1125 },
1126 DW.LNE.define_file => {
1127 const path = try fr.takeSentinel(0);
1128 const dir_index = try fr.takeLeb128(u32);
1129 const mtime = try fr.takeLeb128(u64);
1130 const size = try fr.takeLeb128(u64);
1131 try file_entries.append(gpa, .{
1132 .path = path,
1133 .dir_index = dir_index,
1134 .mtime = mtime,
1135 .size = size,
1136 });
1137 },
1138 else => try fr.discardAll64(op_size - 1),
1139 }
1140 } else if (opcode >= opcode_base) {
1141 // special opcodes
1142 const adjusted_opcode = opcode - opcode_base;
1143 const inc_addr = minimum_instruction_length * (adjusted_opcode / line_range);
1144 const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range);
1145 prog.line += inc_line;
1146 prog.address += inc_addr;
1147 try prog.addRow(gpa, &line_table);
1148 prog.basic_block = false;
1149 } else {
1150 switch (opcode) {
1151 DW.LNS.copy => {
1152 try prog.addRow(gpa, &line_table);
1153 prog.basic_block = false;
1154 },
1155 DW.LNS.advance_pc => {
1156 const arg = try fr.takeLeb128(u64);
1157 prog.address += arg * minimum_instruction_length;
1158 },
1159 DW.LNS.advance_line => {
1160 const arg = try fr.takeLeb128(i64);
1161 prog.line += arg;
1162 },
1163 DW.LNS.set_file => {
1164 const arg = try fr.takeLeb128(usize);
1165 prog.file = arg;
1166 },
1167 DW.LNS.set_column => {
1168 const arg = try fr.takeLeb128(u64);
1169 prog.column = arg;
1170 },
1171 DW.LNS.negate_stmt => {
1172 prog.is_stmt = !prog.is_stmt;
1173 },
1174 DW.LNS.set_basic_block => {
1175 prog.basic_block = true;
1176 },
1177 DW.LNS.const_add_pc => {
1178 const inc_addr = minimum_instruction_length * ((255 - opcode_base) / line_range);
1179 prog.address += inc_addr;
1180 },
1181 DW.LNS.fixed_advance_pc => {
1182 const arg = try fr.takeInt(u16, endian);
1183 prog.address += arg;
1184 },
1185 DW.LNS.set_prologue_end => {},
1186 else => {
1187 if (opcode - 1 >= standard_opcode_lengths.len) return bad();
1188 try fr.discardAll(standard_opcode_lengths[opcode - 1]);
1189 },
1190 }
1191 }
1192 }
1193
1194 // Dwarf standard v5, 6.2.5 says
1195 // > Within a sequence, addresses and operation pointers may only increase.
1196 // However, this is empirically not the case in reality, so we sort here.
1197 line_table.sortUnstable(struct {
1198 keys: []const u64,
1199
1200 pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
1201 return ctx.keys[a_index] < ctx.keys[b_index];
1202 }
1203 }{ .keys = line_table.keys() });
1204
1205 return .{
1206 .line_table = line_table,
1207 .directories = try directories.toOwnedSlice(gpa),
1208 .files = try file_entries.toOwnedSlice(gpa),
1209 .version = version,
1210 };
1211}
1212
1213pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, endian: Endian, cu: *CompileUnit) ScanError!void {
1214 if (cu.src_loc_cache != null) return;
1215 cu.src_loc_cache = try d.runLineNumberProgram(gpa, endian, cu);
1216}
1217
1218pub fn getLineNumberInfo(
1219 d: *Dwarf,
1220 gpa: Allocator,
1221 endian: Endian,
1222 compile_unit: *CompileUnit,
1223 target_address: u64,
1224) !std.debug.SourceLocation {
1225 try d.populateSrcLocCache(gpa, endian, compile_unit);
1226 const slc = &compile_unit.src_loc_cache.?;
1227 const entry = try slc.findSource(target_address);
1228 const file_index = entry.file - @intFromBool(slc.version < 5);
1229 if (file_index >= slc.files.len) return bad();
1230 const file_entry = &slc.files[file_index];
1231 if (file_entry.dir_index >= slc.directories.len) return bad();
1232 const dir_name = slc.directories[file_entry.dir_index].path;
1233 const file_name = try std.fs.path.join(gpa, &.{ dir_name, file_entry.path });
1234 return .{
1235 .line = entry.line,
1236 .column = entry.column,
1237 .file_name = file_name,
1238 };
1239}
1240
1241fn getString(di: Dwarf, offset: u64) ![:0]const u8 {
1242 return getStringGeneric(di.section(.debug_str), offset);
1243}
1244
1245fn getLineString(di: Dwarf, offset: u64) ![:0]const u8 {
1246 return getStringGeneric(di.section(.debug_line_str), offset);
1247}
1248
1249fn readDebugAddr(di: Dwarf, endian: Endian, compile_unit: *const CompileUnit, index: u64) !u64 {
1250 const debug_addr = di.section(.debug_addr) orelse return bad();
1251
1252 // addr_base points to the first item after the header, however we
1253 // need to read the header to know the size of each item. Empirically,
1254 // it may disagree with is_64 on the compile unit.
1255 // The header is 8 or 12 bytes depending on is_64.
1256 if (compile_unit.addr_base < 8) return bad();
1257
1258 const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], endian);
1259 if (version != 5) return bad();
1260
1261 const addr_size = debug_addr[compile_unit.addr_base - 2];
1262 const seg_size = debug_addr[compile_unit.addr_base - 1];
1263
1264 const byte_offset = compile_unit.addr_base + (addr_size + seg_size) * index;
1265 if (byte_offset + addr_size > debug_addr.len) return bad();
1266 return switch (addr_size) {
1267 1 => debug_addr[@intCast(byte_offset)],
1268 2 => mem.readInt(u16, debug_addr[@intCast(byte_offset)..][0..2], endian),
1269 4 => mem.readInt(u32, debug_addr[@intCast(byte_offset)..][0..4], endian),
1270 8 => mem.readInt(u64, debug_addr[@intCast(byte_offset)..][0..8], endian),
1271 else => bad(),
1272 };
1273}
1274
1275fn parseFormValue(
1276 r: *Reader,
1277 form_id: u64,
1278 format: Format,
1279 endian: Endian,
1280 addr_size_bytes: u8,
1281 implicit_const: ?i64,
1282) ScanError!FormValue {
1283 return switch (form_id) {
1284 // DWARF5.pdf page 213: the size of this value is encoded in the
1285 // compilation unit header as address size.
1286 FORM.addr => .{ .addr = try readAddress(r, endian, addr_size_bytes) },
1287 FORM.addrx1 => .{ .addrx = try r.takeByte() },
1288 FORM.addrx2 => .{ .addrx = try r.takeInt(u16, endian) },
1289 FORM.addrx3 => .{ .addrx = try r.takeInt(u24, endian) },
1290 FORM.addrx4 => .{ .addrx = try r.takeInt(u32, endian) },
1291 FORM.addrx => .{ .addrx = try r.takeLeb128(u64) },
1292
1293 FORM.block1 => .{ .block = try r.take(try r.takeByte()) },
1294 FORM.block2 => .{ .block = try r.take(try r.takeInt(u16, endian)) },
1295 FORM.block4 => .{ .block = try r.take(try r.takeInt(u32, endian)) },
1296 FORM.block => .{ .block = try r.take(try r.takeLeb128(usize)) },
1297
1298 FORM.data1 => .{ .udata = try r.takeByte() },
1299 FORM.data2 => .{ .udata = try r.takeInt(u16, endian) },
1300 FORM.data4 => .{ .udata = try r.takeInt(u32, endian) },
1301 FORM.data8 => .{ .udata = try r.takeInt(u64, endian) },
1302 FORM.data16 => .{ .data16 = try r.takeArray(16) },
1303 FORM.udata => .{ .udata = try r.takeLeb128(u64) },
1304 FORM.sdata => .{ .sdata = try r.takeLeb128(i64) },
1305 FORM.exprloc => .{ .exprloc = try r.take(try r.takeLeb128(usize)) },
1306 FORM.flag => .{ .flag = (try r.takeByte()) != 0 },
1307 FORM.flag_present => .{ .flag = true },
1308 FORM.sec_offset => .{ .sec_offset = try readFormatSizedInt(r, format, endian) },
1309
1310 FORM.ref1 => .{ .ref = try r.takeByte() },
1311 FORM.ref2 => .{ .ref = try r.takeInt(u16, endian) },
1312 FORM.ref4 => .{ .ref = try r.takeInt(u32, endian) },
1313 FORM.ref8 => .{ .ref = try r.takeInt(u64, endian) },
1314 FORM.ref_udata => .{ .ref = try r.takeLeb128(u64) },
1315
1316 FORM.ref_addr => .{ .ref_addr = try readFormatSizedInt(r, format, endian) },
1317 FORM.ref_sig8 => .{ .ref = try r.takeInt(u64, endian) },
1318
1319 FORM.string => .{ .string = try r.takeSentinel(0) },
1320 FORM.strp => .{ .strp = try readFormatSizedInt(r, format, endian) },
1321 FORM.strx1 => .{ .strx = try r.takeByte() },
1322 FORM.strx2 => .{ .strx = try r.takeInt(u16, endian) },
1323 FORM.strx3 => .{ .strx = try r.takeInt(u24, endian) },
1324 FORM.strx4 => .{ .strx = try r.takeInt(u32, endian) },
1325 FORM.strx => .{ .strx = try r.takeLeb128(usize) },
1326 FORM.line_strp => .{ .line_strp = try readFormatSizedInt(r, format, endian) },
1327 FORM.indirect => parseFormValue(r, try r.takeLeb128(u64), format, endian, addr_size_bytes, implicit_const),
1328 FORM.implicit_const => .{ .sdata = implicit_const orelse return bad() },
1329 FORM.loclistx => .{ .loclistx = try r.takeLeb128(u64) },
1330 FORM.rnglistx => .{ .rnglistx = try r.takeLeb128(u64) },
1331 else => {
1332 //debug.print("unrecognized form id: {x}\n", .{form_id});
1333 return bad();
1334 },
1335 };
1336}
1337
1338const FileEntry = struct {
1339 path: []const u8,
1340 dir_index: u32 = 0,
1341 mtime: u64 = 0,
1342 size: u64 = 0,
1343 md5: [16]u8 = [1]u8{0} ** 16,
1344};
1345
1346const LineNumberProgram = struct {
1347 address: u64,
1348 file: usize,
1349 line: i64,
1350 column: u64,
1351 version: u16,
1352 is_stmt: bool,
1353 basic_block: bool,
1354
1355 default_is_stmt: bool,
1356
1357 // Reset the state machine following the DWARF specification
1358 pub fn reset(self: *LineNumberProgram) void {
1359 self.address = 0;
1360 self.file = 1;
1361 self.line = 1;
1362 self.column = 0;
1363 self.is_stmt = self.default_is_stmt;
1364 self.basic_block = false;
1365 }
1366
1367 pub fn init(is_stmt: bool, version: u16) LineNumberProgram {
1368 return .{
1369 .address = 0,
1370 .file = 1,
1371 .line = 1,
1372 .column = 0,
1373 .version = version,
1374 .is_stmt = is_stmt,
1375 .basic_block = false,
1376 .default_is_stmt = is_stmt,
1377 };
1378 }
1379
1380 pub fn addRow(prog: *LineNumberProgram, gpa: Allocator, table: *CompileUnit.SrcLocCache.LineTable) !void {
1381 if (prog.line == 0) {
1382 //if (debug_debug_mode) @panic("garbage line data");
1383 return;
1384 }
1385 if (debug_debug_mode) assert(!table.contains(prog.address));
1386 try table.put(gpa, prog.address, .{
1387 .line = cast(u32, prog.line) orelse maxInt(u32),
1388 .column = cast(u32, prog.column) orelse maxInt(u32),
1389 .file = cast(u32, prog.file) orelse return bad(),
1390 });
1391 }
1392};
1393
1394const UnitHeader = struct {
1395 format: Format,
1396 header_length: u4,
1397 unit_length: u64,
1398};
1399
1400pub fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader {
1401 return switch (try r.takeInt(u32, endian)) {
1402 0...0xfffffff0 - 1 => |unit_length| .{
1403 .format = .@"32",
1404 .header_length = 4,
1405 .unit_length = unit_length,
1406 },
1407 0xfffffff0...0xffffffff - 1 => bad(),
1408 0xffffffff => .{
1409 .format = .@"64",
1410 .header_length = 12,
1411 .unit_length = try r.takeInt(u64, endian),
1412 },
1413 };
1414}
1415
1416/// Returns the DWARF register number for an x86_64 register number found in compact unwind info
1417pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u16 {
1418 return switch (unwind_reg_number) {
1419 1 => 3, // RBX
1420 2 => 12, // R12
1421 3 => 13, // R13
1422 4 => 14, // R14
1423 5 => 15, // R15
1424 6 => 6, // RBP
1425 else => error.InvalidRegister,
1426 };
1427}
1428
1429/// Returns `null` for CPU architectures without an instruction pointer register.
1430pub fn ipRegNum(arch: std.Target.Cpu.Arch) ?u16 {
1431 return switch (arch) {
1432 .aarch64, .aarch64_be => 32,
1433 .arc, .arceb => 160,
1434 .arm, .armeb, .thumb, .thumbeb => 15,
1435 .csky => 64,
1436 .hexagon => 76,
1437 .kvx => 64,
1438 .lanai => 2,
1439 .loongarch32, .loongarch64 => 64,
1440 .m68k => 26,
1441 .mips, .mipsel, .mips64, .mips64el => 66,
1442 .or1k => 35,
1443 .powerpc, .powerpcle, .powerpc64, .powerpc64le => 67,
1444 .riscv32, .riscv32be, .riscv64, .riscv64be => 65,
1445 .s390x => 65,
1446 .sparc, .sparc64 => 32,
1447 .ve => 144,
1448 .x86 => 8,
1449 .x86_64 => 16,
1450 else => null,
1451 };
1452}
1453
1454pub fn fpRegNum(arch: std.Target.Cpu.Arch) u16 {
1455 return switch (arch) {
1456 .aarch64, .aarch64_be => 29,
1457 .arc, .arceb => 27,
1458 .arm, .armeb, .thumb, .thumbeb => 11,
1459 .csky => 14,
1460 .hexagon => 30,
1461 .kvx => 14,
1462 .lanai => 5,
1463 .loongarch32, .loongarch64 => 22,
1464 .m68k => 14,
1465 .mips, .mipsel, .mips64, .mips64el => 30,
1466 .or1k => 2,
1467 .powerpc, .powerpcle, .powerpc64, .powerpc64le => 1,
1468 .riscv32, .riscv32be, .riscv64, .riscv64be => 8,
1469 .s390x => 11,
1470 .sparc, .sparc64 => 30,
1471 .ve => 9,
1472 .x86 => 5,
1473 .x86_64 => 6,
1474 else => unreachable,
1475 };
1476}
1477
1478pub fn spRegNum(arch: std.Target.Cpu.Arch) u16 {
1479 return switch (arch) {
1480 .aarch64, .aarch64_be => 31,
1481 .arc, .arceb => 28,
1482 .arm, .armeb, .thumb, .thumbeb => 13,
1483 .csky => 14,
1484 .hexagon => 29,
1485 .kvx => 12,
1486 .lanai => 4,
1487 .loongarch32, .loongarch64 => 3,
1488 .m68k => 15,
1489 .mips, .mipsel, .mips64, .mips64el => 29,
1490 .or1k => 1,
1491 .powerpc, .powerpcle, .powerpc64, .powerpc64le => 1,
1492 .riscv32, .riscv32be, .riscv64, .riscv64be => 2,
1493 .s390x => 15,
1494 .sparc, .sparc64 => 14,
1495 .ve => 11,
1496 .x86 => 4,
1497 .x86_64 => 7,
1498 else => unreachable,
1499 };
1500}
1501
1502/// Tells whether unwinding for this target is supported by the Dwarf standard.
1503///
1504/// See also `std.debug.SelfInfo.can_unwind` which tells whether the Zig standard
1505/// library has a working implementation of unwinding for the current target.
1506pub fn supportsUnwinding(target: *const std.Target) bool {
1507 return switch (target.cpu.arch) {
1508 .amdgcn,
1509 .nvptx,
1510 .nvptx64,
1511 .spirv32,
1512 .spirv64,
1513 => false,
1514
1515 // Conservative guess. Feel free to update this logic with any targets
1516 // that are known to not support Dwarf unwinding.
1517 else => true,
1518 };
1519}
1520
1521/// This function is to make it handy to comment out the return and make it
1522/// into a crash when working on this file.
1523pub fn bad() error{InvalidDebugInfo} {
1524 invalidDebugInfoDetected();
1525 return error.InvalidDebugInfo;
1526}
1527
1528pub fn invalidDebugInfoDetected() void {
1529 if (debug_debug_mode) @panic("bad dwarf");
1530}
1531
1532pub fn missing() error{MissingDebugInfo} {
1533 if (debug_debug_mode) @panic("missing dwarf");
1534 return error.MissingDebugInfo;
1535}
1536
1537fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 {
1538 const str = opt_str orelse return bad();
1539 if (offset > str.len) return bad();
1540 const casted_offset = cast(usize, offset) orelse return bad();
1541 // Valid strings always have a terminating zero byte
1542 const last = std.mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return bad();
1543 return str[casted_offset..last :0];
1544}
1545
1546pub fn getSymbol(di: *Dwarf, gpa: Allocator, endian: Endian, address: u64) !std.debug.Symbol {
1547 const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) {
1548 error.MissingDebugInfo, error.InvalidDebugInfo => return .unknown,
1549 else => return err,
1550 };
1551 return .{
1552 .name = di.getSymbolName(address),
1553 .compile_unit_name = compile_unit.die.getAttrString(di, endian, std.dwarf.AT.name, di.section(.debug_str), compile_unit) catch |err| switch (err) {
1554 error.MissingDebugInfo, error.InvalidDebugInfo => null,
1555 },
1556 .source_location = di.getLineNumberInfo(gpa, endian, compile_unit, address) catch |err| switch (err) {
1557 error.MissingDebugInfo, error.InvalidDebugInfo => null,
1558 else => return err,
1559 },
1560 };
1561}
1562
1563/// DWARF5 7.4: "In the 32-bit DWARF format, all values that represent lengths of DWARF sections and
1564/// offsets relative to the beginning of DWARF sections are represented using four bytes. In the
1565/// 64-bit DWARF format, all values that represent lengths of DWARF sections and offsets relative to
1566/// the beginning of DWARF sections are represented using eight bytes".
1567///
1568/// This function is for reading such values.
1569fn readFormatSizedInt(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 {
1570 return switch (format) {
1571 .@"32" => try r.takeInt(u32, endian),
1572 .@"64" => try r.takeInt(u64, endian),
1573 };
1574}
1575
1576fn readAddress(r: *Reader, endian: Endian, addr_size_bytes: u8) !u64 {
1577 return switch (addr_size_bytes) {
1578 2 => try r.takeInt(u16, endian),
1579 4 => try r.takeInt(u32, endian),
1580 8 => try r.takeInt(u64, endian),
1581 else => return bad(),
1582 };
1583}