Commit f04f9705cc

kcbanner <kcbanner@gmail.com>
2023-07-03 00:13:19
dwarf: add support for DWARF5 DW_AT_ranges in subprograms, add DebugRangeIterator
Some DWARF5 subprograms have non-contiguous instruction ranges. An example of such a function is `puts` in Ubuntu's libc. This change fixes name lookups for functions that use DW_AT_range in their DIE.
1 parent 395ab47
Changed files (2)
lib
lib/std/dwarf/abi.zig
@@ -245,7 +245,8 @@ pub fn regBytes(ucontext_ptr: anytype, reg_number: u8, reg_ctx: ?RegisterContext
 }
 
 /// Returns the ABI-defined default value this register has in the unwinding table
-/// before running any of the CIE instructions.
+/// before running any of the CIE instructions. The DWARF spec defines these values
+//  to be undefined, but allows ABI authors to override that default.
 pub fn getRegDefaultValue(reg_number: u8, out: []u8) void {
     // TODO: Implement any ABI-specific rules for the default value for registers
     _ = reg_number;
lib/std/dwarf.zig
@@ -717,7 +717,6 @@ pub const DwarfInfo = struct {
     }
 
     pub fn getSymbolName(di: *DwarfInfo, address: u64) ?[]const u8 {
-        // TODO: Can this be binary searched?
         for (di.func_list.items) |*func| {
             if (func.pc_range) |range| {
                 if (address >= range.start and address < range.end) {
@@ -835,37 +834,56 @@ pub const DwarfInfo = struct {
                             break :x null;
                         };
 
-                        const pc_range = x: {
-                            if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| {
-                                if (die_obj.getAttr(AT.high_pc)) |high_pc_value| {
-                                    const pc_end = switch (high_pc_value.*) {
-                                        FormValue.Address => |value| value,
-                                        FormValue.Const => |value| b: {
-                                            const offset = try value.asUnsignedLe();
-                                            break :b (low_pc + offset);
-                                        },
-                                        else => return badDwarf(),
-                                    };
-                                    break :x PcRange{
+                        var found_range = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: {
+                            if (die_obj.getAttr(AT.high_pc)) |high_pc_value| {
+                                const pc_end = switch (high_pc_value.*) {
+                                    FormValue.Address => |value| value,
+                                    FormValue.Const => |value| b: {
+                                        const offset = try value.asUnsignedLe();
+                                        break :b (low_pc + offset);
+                                    },
+                                    else => return badDwarf(),
+                                };
+
+                                try di.func_list.append(allocator, Func{
+                                    .name = fn_name,
+                                    .pc_range = .{
                                         .start = low_pc,
                                         .end = pc_end,
-                                    };
-                                } else {
-                                    break :x null;
-                                }
-                            } else |err| {
-                                if (err != error.MissingDebugInfo) return err;
-                                break :x null;
+                                    },
+                                });
                             }
+
+                            break :blk true;
+                        } else |err| blk: {
+                            if (err != error.MissingDebugInfo) return err;
+                            break :blk false;
                         };
 
-                        // TODO: Debug issue where `puts` in Ubuntu's libc was not found
-                        //if (fn_name != null and pc_range != null) debug.print("func_list: {s} -> 0x{x}-0x{x}\n", .{fn_name.?, pc_range.?.start, pc_range.?.end});
+                        if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: {
+                            var iter = DebugRangeIterator.init(ranges_value, di, &compile_unit) catch |err| {
+                                if (err != error.MissingDebugInfo) return err;
+                                break :blk;
+                            };
+
+                            while (try iter.next()) |range| {
+                                found_range = true;
+                                try di.func_list.append(allocator, Func{
+                                    .name = fn_name,
+                                    .pc_range = .{
+                                        .start = range.start_addr,
+                                        .end = range.end_addr,
+                                    },
+                                });
+                            }
+                        }
 
-                        try di.func_list.append(allocator, Func{
-                            .name = fn_name,
-                            .pc_range = pc_range,
-                        });
+                        if (!found_range) {
+                            try di.func_list.append(allocator, Func{
+                                .name = fn_name,
+                                .pc_range = null,
+                            });
+                        }
                     },
                     else => {},
                 }
@@ -966,17 +984,18 @@ pub const DwarfInfo = struct {
         }
     }
 
-    pub fn findCompileUnit(di: *DwarfInfo, target_address: u64) !*const CompileUnit {
-        for (di.compile_unit_list.items) |*compile_unit| {
-            if (compile_unit.pc_range) |range| {
-                if (target_address >= range.start and target_address < range.end) return compile_unit;
-            }
+    const DebugRangeIterator = struct {
+        base_address: u64,
+        section_type: DwarfSection,
+        di: *DwarfInfo,
+        compile_unit: *const CompileUnit,
+        stream: io.FixedBufferStream([]const u8),
 
-            const opt_debug_ranges = if (compile_unit.version >= 5) di.section(.debug_rnglists) else di.section(.debug_ranges);
-            const debug_ranges = opt_debug_ranges orelse continue;
+        pub fn init(ranges_value: *const FormValue, di: *DwarfInfo, compile_unit: *const CompileUnit) !@This() {
+            const section_type = if (compile_unit.version >= 5) DwarfSection.debug_rnglists else DwarfSection.debug_ranges;
+            const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo;
 
-            const ranges_val = compile_unit.die.getAttr(AT.ranges) orelse continue;
-            const ranges_offset = switch (ranges_val.*) {
+            const ranges_offset = switch (ranges_value.*) {
                 .SecOffset => |off| off,
                 .Const => |c| try c.asUnsignedLe(),
                 .RangeListOffset => |idx| off: {
@@ -996,8 +1015,7 @@ pub const DwarfInfo = struct {
             };
 
             var stream = io.fixedBufferStream(debug_ranges);
-            const in = &stream.reader();
-            const seekable = &stream.seekableStream();
+            try stream.seekTo(ranges_offset);
 
             // All the addresses in the list are relative to the value
             // specified by DW_AT.low_pc or to some other value encoded
@@ -1008,86 +1026,122 @@ pub const DwarfInfo = struct {
                 else => return err,
             };
 
-            try seekable.seekTo(ranges_offset);
+            return .{
+                .base_address = base_address,
+                .section_type = section_type,
+                .di = di,
+                .compile_unit = compile_unit,
+                .stream = stream,
+            };
+        }
 
-            if (compile_unit.version >= 5) {
-                while (true) {
+        // Returns the next range in the list, or null if the end was reached.
+        pub fn next(self: *@This()) !?struct{ start_addr: u64, end_addr: u64 } {
+            const in = self.stream.reader();
+            switch (self.section_type) {
+                .debug_rnglists => {
                     const kind = try in.readByte();
                     switch (kind) {
-                        RLE.end_of_list => break,
+                        RLE.end_of_list => return null,
                         RLE.base_addressx => {
                             const index = try leb.readULEB128(usize, in);
-                            base_address = try di.readDebugAddr(compile_unit.*, index);
+                            self.base_address = try self.di.readDebugAddr(self.compile_unit.*, index);
+                            return try self.next();
                         },
                         RLE.startx_endx => {
                             const start_index = try leb.readULEB128(usize, in);
-                            const start_addr = try di.readDebugAddr(compile_unit.*, start_index);
+                            const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index);
 
                             const end_index = try leb.readULEB128(usize, in);
-                            const end_addr = try di.readDebugAddr(compile_unit.*, end_index);
+                            const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index);
 
-                            if (target_address >= start_addr and target_address < end_addr) {
-                                return compile_unit;
-                            }
+                            return .{
+                                .start_addr = start_addr,
+                                .end_addr = end_addr,
+                            };
                         },
                         RLE.startx_length => {
                             const start_index = try leb.readULEB128(usize, in);
-                            const start_addr = try di.readDebugAddr(compile_unit.*, start_index);
+                            const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index);
 
                             const len = try leb.readULEB128(usize, in);
                             const end_addr = start_addr + len;
 
-                            if (target_address >= start_addr and target_address < end_addr) {
-                                return compile_unit;
-                            }
+                            return .{
+                                .start_addr = start_addr,
+                                .end_addr = end_addr,
+                            };
                         },
                         RLE.offset_pair => {
                             const start_addr = try leb.readULEB128(usize, in);
                             const end_addr = try leb.readULEB128(usize, in);
+
                             // This is the only kind that uses the base address
-                            if (target_address >= base_address + start_addr and target_address < base_address + end_addr) {
-                                return compile_unit;
-                            }
+                            return .{
+                                .start_addr = self.base_address + start_addr,
+                                .end_addr = self.base_address + end_addr,
+                            };
                         },
                         RLE.base_address => {
-                            base_address = try in.readInt(usize, di.endian);
+                            self.base_address = try in.readInt(usize, self.di.endian);
+                            return try self.next();
                         },
                         RLE.start_end => {
-                            const start_addr = try in.readInt(usize, di.endian);
-                            const end_addr = try in.readInt(usize, di.endian);
-                            if (target_address >= start_addr and target_address < end_addr) {
-                                return compile_unit;
-                            }
+                            const start_addr = try in.readInt(usize, self.di.endian);
+                            const end_addr = try in.readInt(usize, self.di.endian);
+
+                            return .{
+                                .start_addr = start_addr,
+                                .end_addr = end_addr,
+                            };
                         },
                         RLE.start_length => {
-                            const start_addr = try in.readInt(usize, di.endian);
+                            const start_addr = try in.readInt(usize, self.di.endian);
                             const len = try leb.readULEB128(usize, in);
                             const end_addr = start_addr + len;
-                            if (target_address >= start_addr and target_address < end_addr) {
-                                return compile_unit;
-                            }
+
+                            return .{
+                                .start_addr = start_addr,
+                                .end_addr = end_addr,
+                            };
                         },
                         else => return badDwarf(),
                     }
-                }
-            } else {
-                while (true) {
-                    const begin_addr = try in.readInt(usize, di.endian);
-                    const end_addr = try in.readInt(usize, di.endian);
-                    if (begin_addr == 0 and end_addr == 0) {
-                        break;
-                    }
+                },
+                .debug_ranges => {
+                    const start_addr = try in.readInt(usize, self.di.endian);
+                    const end_addr = try in.readInt(usize, self.di.endian);
+                    if (start_addr == 0 and end_addr == 0) return null;
+
                     // This entry selects a new value for the base address
-                    if (begin_addr == math.maxInt(usize)) {
-                        base_address = end_addr;
-                        continue;
-                    }
-                    if (target_address >= base_address + begin_addr and target_address < base_address + end_addr) {
-                        return compile_unit;
+                    if (start_addr == math.maxInt(usize)) {
+                        self.base_address = end_addr;
+                        return try self.next();
                     }
-                }
+
+                    return .{
+                        .start_addr = self.base_address + start_addr,
+                        .end_addr = self.base_address + end_addr,
+                    };
+                },
+                else => unreachable,
             }
         }
+    };
+
+    pub fn findCompileUnit(di: *DwarfInfo, target_address: u64) !*const CompileUnit {
+        for (di.compile_unit_list.items) |*compile_unit| {
+            if (compile_unit.pc_range) |range| {
+                if (target_address >= range.start and target_address < range.end) return compile_unit;
+            }
+
+            const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue;
+            var iter = DebugRangeIterator.init(ranges_value, di, compile_unit) catch continue;
+            while (try iter.next()) |range| {
+                if (target_address >= range.start_addr and target_address < range.end_addr) return compile_unit;
+            }
+        }
+
         return missingDwarf();
     }
 
@@ -1566,7 +1620,6 @@ pub const DwarfInfo = struct {
                     }
                 }
 
-                // TODO: Avoiding sorting if has_eh_frame_hdr exists
                 std.mem.sort(FrameDescriptionEntry, di.fde_list.items, {}, struct {
                     fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool {
                         _ = ctx;