Commit 46e724ab28

mlugg <mlugg@mlugg.co.uk>
2023-05-26 08:14:54
std.dwarf: handle DWARF 5 compile unit DW_AT_ranges correctly
This data changed quite significantly between DWARF 4 and 5. Some systems are shipping DWARF 5 libraries (Void Linux on musl libc seems to use it for crt1 etc), which meant when printing stack traces, a random compile unit might be incorrectly identified as containing an address, resulting in incorrect location information. I was consistently experiencing this issue with compiler stack traces, and this change fixed it.
1 parent 1697d44
Changed files (1)
lib
lib/std/dwarf.zig
@@ -299,30 +299,7 @@ const Die = struct {
         const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
         return switch (form_value.*) {
             FormValue.Address => |value| value,
-            FormValue.AddrOffset => |index| {
-                const debug_addr = di.debug_addr orelse return badDwarf();
-                // addr_base points to the first item after the header, however we
-                // need to read the header to know the size of each item. Empirically,
-                // it may disagree with is_64 on the compile unit.
-                // The header is 8 or 12 bytes depending on is_64.
-                if (compile_unit.addr_base < 8) return badDwarf();
-
-                const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian);
-                if (version != 5) return badDwarf();
-
-                const addr_size = debug_addr[compile_unit.addr_base - 2];
-                const seg_size = debug_addr[compile_unit.addr_base - 1];
-
-                const byte_offset = compile_unit.addr_base + (addr_size + seg_size) * index;
-                if (byte_offset + addr_size > debug_addr.len) return badDwarf();
-                switch (addr_size) {
-                    1 => return debug_addr[byte_offset],
-                    2 => return mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian),
-                    4 => return mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian),
-                    8 => return mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian),
-                    else => return badDwarf(),
-                }
-            },
+            FormValue.AddrOffset => |index| di.readDebugAddr(compile_unit, index),
             else => error.InvalidDebugInfo,
         };
     }
@@ -952,41 +929,119 @@ pub const DwarfInfo = struct {
             if (compile_unit.pc_range) |range| {
                 if (target_address >= range.start and target_address < range.end) return compile_unit;
             }
-            if (di.debug_ranges) |debug_ranges| {
-                if (compile_unit.die.getAttrSecOffset(AT.ranges)) |ranges_offset| {
-                    var stream = io.fixedBufferStream(debug_ranges);
-                    const in = &stream.reader();
-                    const seekable = &stream.seekableStream();
-
-                    // All the addresses in the list are relative to the value
-                    // specified by DW_AT.low_pc or to some other value encoded
-                    // in the list itself.
-                    // If no starting value is specified use zero.
-                    var base_address = compile_unit.die.getAttrAddr(di, AT.low_pc, compile_unit.*) catch |err| switch (err) {
-                        error.MissingDebugInfo => @as(u64, 0), // TODO https://github.com/ziglang/zig/issues/11135
-                        else => return err,
-                    };
 
-                    try seekable.seekTo(ranges_offset);
+            const opt_debug_ranges = if (compile_unit.version >= 5) di.debug_rnglists else di.debug_ranges;
+            const debug_ranges = opt_debug_ranges orelse continue;
+
+            const ranges_val = compile_unit.die.getAttr(AT.ranges) orelse continue;
+            const ranges_offset = switch (ranges_val.*) {
+                .SecOffset => |off| off,
+                .RangeListOffset => |idx| off: {
+                    if (compile_unit.is_64) {
+                        const offset_loc = @intCast(usize, compile_unit.rnglists_base + 8 * idx);
+                        if (offset_loc + 8 > debug_ranges.len) return badDwarf();
+                        const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian);
+                        break :off compile_unit.rnglists_base + offset;
+                    } else {
+                        const offset_loc = @intCast(usize, compile_unit.rnglists_base + 4 * idx);
+                        if (offset_loc + 4 > debug_ranges.len) return badDwarf();
+                        const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian);
+                        break :off compile_unit.rnglists_base + offset;
+                    }
+                },
+                else => return badDwarf(),
+            };
 
-                    while (true) {
-                        const begin_addr = try in.readInt(usize, di.endian);
-                        const end_addr = try in.readInt(usize, di.endian);
-                        if (begin_addr == 0 and end_addr == 0) {
-                            break;
-                        }
-                        // This entry selects a new value for the base address
-                        if (begin_addr == math.maxInt(usize)) {
-                            base_address = end_addr;
-                            continue;
-                        }
-                        if (target_address >= base_address + begin_addr and target_address < base_address + end_addr) {
-                            return compile_unit;
-                        }
+            var stream = io.fixedBufferStream(debug_ranges);
+            const in = &stream.reader();
+            const seekable = &stream.seekableStream();
+
+            // All the addresses in the list are relative to the value
+            // specified by DW_AT.low_pc or to some other value encoded
+            // in the list itself.
+            // If no starting value is specified use zero.
+            var base_address = compile_unit.die.getAttrAddr(di, AT.low_pc, compile_unit.*) catch |err| switch (err) {
+                error.MissingDebugInfo => @as(u64, 0), // TODO https://github.com/ziglang/zig/issues/11135
+                else => return err,
+            };
+
+            try seekable.seekTo(ranges_offset);
+
+            if (compile_unit.version >= 5) {
+                while (true) {
+                    const kind = try in.readByte();
+                    switch (kind) {
+                        RLE.end_of_list => break,
+                        RLE.base_addressx => {
+                            const index = try leb.readULEB128(usize, in);
+                            base_address = try di.readDebugAddr(compile_unit.*, index);
+                        },
+                        RLE.startx_endx => {
+                            const start_index = try leb.readULEB128(usize, in);
+                            const start_addr = try di.readDebugAddr(compile_unit.*, start_index);
+
+                            const end_index = try leb.readULEB128(usize, in);
+                            const end_addr = try di.readDebugAddr(compile_unit.*, end_index);
+
+                            if (target_address >= start_addr and target_address < end_addr) {
+                                return compile_unit;
+                            }
+                        },
+                        RLE.startx_length => {
+                            const start_index = try leb.readULEB128(usize, in);
+                            const start_addr = try di.readDebugAddr(compile_unit.*, start_index);
+
+                            const len = try leb.readULEB128(usize, in);
+                            const end_addr = start_addr + len;
+
+                            if (target_address >= start_addr and target_address < end_addr) {
+                                return compile_unit;
+                            }
+                        },
+                        RLE.offset_pair => {
+                            const start_addr = try leb.readULEB128(usize, in);
+                            const end_addr = try leb.readULEB128(usize, in);
+                            // This is the only kind that uses the base address
+                            if (target_address >= base_address + start_addr and target_address < base_address + end_addr) {
+                                return compile_unit;
+                            }
+                        },
+                        RLE.base_address => {
+                            base_address = try in.readInt(usize, di.endian);
+                        },
+                        RLE.start_end => {
+                            const start_addr = try in.readInt(usize, di.endian);
+                            const end_addr = try in.readInt(usize, di.endian);
+                            if (target_address >= start_addr and target_address < end_addr) {
+                                return compile_unit;
+                            }
+                        },
+                        RLE.start_length => {
+                            const start_addr = try in.readInt(usize, di.endian);
+                            const len = try leb.readULEB128(usize, in);
+                            const end_addr = start_addr + len;
+                            if (target_address >= start_addr and target_address < end_addr) {
+                                return compile_unit;
+                            }
+                        },
+                        else => return badDwarf(),
+                    }
+                }
+            } else {
+                while (true) {
+                    const begin_addr = try in.readInt(usize, di.endian);
+                    const end_addr = try in.readInt(usize, di.endian);
+                    if (begin_addr == 0 and end_addr == 0) {
+                        break;
+                    }
+                    // This entry selects a new value for the base address
+                    if (begin_addr == math.maxInt(usize)) {
+                        base_address = end_addr;
+                        continue;
+                    }
+                    if (target_address >= base_address + begin_addr and target_address < base_address + end_addr) {
+                        return compile_unit;
                     }
-                } else |err| {
-                    if (err != error.MissingDebugInfo) return err;
-                    continue;
                 }
             }
         }
@@ -1366,6 +1421,32 @@ pub const DwarfInfo = struct {
     fn getLineString(di: DwarfInfo, offset: u64) ![]const u8 {
         return getStringGeneric(di.debug_line_str, offset);
     }
+
+    fn readDebugAddr(di: DwarfInfo, compile_unit: CompileUnit, index: u64) !u64 {
+        const debug_addr = di.debug_addr orelse return badDwarf();
+
+        // addr_base points to the first item after the header, however we
+        // need to read the header to know the size of each item. Empirically,
+        // it may disagree with is_64 on the compile unit.
+        // The header is 8 or 12 bytes depending on is_64.
+        if (compile_unit.addr_base < 8) return badDwarf();
+
+        const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian);
+        if (version != 5) return badDwarf();
+
+        const addr_size = debug_addr[compile_unit.addr_base - 2];
+        const seg_size = debug_addr[compile_unit.addr_base - 1];
+
+        const byte_offset = @intCast(usize, compile_unit.addr_base + (addr_size + seg_size) * index);
+        if (byte_offset + addr_size > debug_addr.len) return badDwarf();
+        return switch (addr_size) {
+            1 => debug_addr[byte_offset],
+            2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian),
+            4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian),
+            8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian),
+            else => badDwarf(),
+        };
+    }
 };
 
 /// Initialize DWARF info. The caller has the responsibility to initialize most