Commit de47acd732

Andrew Kelley <andrew@ziglang.org>
2024-08-03 02:45:31
code coverage dumping tool basic implementation
* std.debug.Dwarf: add `sortCompileUnits` along with a field to track the state for the purpose of assertions and correct API usage. This makes batch lookups faster. - in the future, findCompileUnit should be enhanced to rely on sorted compile units as well. * implement `std.debug.Dwarf.resolveSourceLocations` as well as `std.debug.Info.resolveSourceLocations`. It's still pretty slow, since it calls getLineNumberInfo for each array element, repeating a lot of work unnecessarily. * integrate these APIs with `std.Progress` to understand what is taking so long. The output I'm seeing from this tool shows a lot of missing source locations. In particular, the main area of interest is missing for my tokenizer fuzzing example.
1 parent 2e12b45
Changed files (4)
lib/std/debug/Dwarf.zig
@@ -39,6 +39,7 @@ pub const call_frame = @import("Dwarf/call_frame.zig");
 endian: std.builtin.Endian,
 sections: SectionArray = null_section_array,
 is_macho: bool,
+compile_units_sorted: bool,
 
 // Filled later by the initializer
 abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{},
@@ -728,9 +729,9 @@ pub const OpenError = ScanError;
 /// Initialize DWARF info. The caller has the responsibility to initialize most
 /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the
 /// main binary file (not the secondary debug info file).
-pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void {
-    try di.scanAllFunctions(gpa);
-    try di.scanAllCompileUnits(gpa);
+pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void {
+    try d.scanAllFunctions(gpa);
+    try d.scanAllCompileUnits(gpa);
 }
 
 const PcRange = struct {
@@ -1061,6 +1062,39 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void {
     }
 }
 
+/// Populate missing PC ranges in compilation units, and then sort them by start address.
+/// Does not guarantee pc_range to be non-null because there could be missing debug info.
+pub fn sortCompileUnits(d: *Dwarf) ScanError!void {
+    assert(!d.compile_units_sorted);
+
+    for (d.compile_unit_list.items) |*cu| {
+        if (cu.pc_range != null) continue;
+        const ranges_value = cu.die.getAttr(AT.ranges) orelse continue;
+        var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue;
+        var start: u64 = maxInt(u64);
+        var end: u64 = 0;
+        while (try iter.next()) |range| {
+            start = @min(start, range.start_addr);
+            end = @max(end, range.end_addr);
+        }
+        if (end != 0) cu.pc_range = .{
+            .start = start,
+            .end = end,
+        };
+    }
+
+    std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct {
+        fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool {
+            _ = ctx;
+            const a_range = a.pc_range orelse return false;
+            const b_range = b.pc_range orelse return true;
+            return a_range.start < b_range.start;
+        }
+    }.lessThan);
+
+    d.compile_units_sorted = true;
+}
+
 const DebugRangeIterator = struct {
     base_address: u64,
     section_type: Section.Id,
@@ -1208,6 +1242,7 @@ const DebugRangeIterator = struct {
     }
 };
 
+/// TODO: change this to binary searching the sorted compile unit list
 pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit {
     for (di.compile_unit_list.items) |*compile_unit| {
         if (compile_unit.pc_range) |range| {
@@ -2275,6 +2310,7 @@ pub const ElfModule = struct {
             .endian = endian,
             .sections = sections,
             .is_macho = false,
+            .compile_units_sorted = false,
         };
 
         try Dwarf.open(&di, gpa);
@@ -2326,6 +2362,8 @@ pub const ElfModule = struct {
     }
 };
 
+pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error;
+
 /// Given an array of virtual memory addresses, sorted ascending, outputs a
 /// corresponding array of source locations, by appending to the provided
 /// array list.
@@ -2335,11 +2373,44 @@ pub fn resolveSourceLocations(
     sorted_pc_addrs: []const u64,
     /// Asserts its length equals length of `sorted_pc_addrs`.
     output: []std.debug.SourceLocation,
-) error{ MissingDebugInfo, InvalidDebugInfo }!void {
+    parent_prog_node: std.Progress.Node,
+) ResolveSourceLocationsError!void {
     assert(sorted_pc_addrs.len == output.len);
-    _ = d;
-    _ = gpa;
-    @panic("TODO");
+    assert(d.compile_units_sorted);
+
+    const prog_node = parent_prog_node.start("Resolve Source Locations", sorted_pc_addrs.len);
+    defer prog_node.end();
+
+    var cu_i: usize = 0;
+    var cu: *const CompileUnit = &d.compile_unit_list.items[0];
+    var range = cu.pc_range.?;
+    next_pc: for (sorted_pc_addrs, output) |pc, *out| {
+        defer prog_node.completeOne();
+        while (pc >= range.end) {
+            cu_i += 1;
+            if (cu_i >= d.compile_unit_list.items.len) {
+                out.* = std.debug.SourceLocation.invalid;
+                continue :next_pc;
+            }
+            cu = &d.compile_unit_list.items[cu_i];
+            range = cu.pc_range orelse {
+                out.* = std.debug.SourceLocation.invalid;
+                continue :next_pc;
+            };
+        }
+        if (pc < range.start) {
+            out.* = std.debug.SourceLocation.invalid;
+            continue :next_pc;
+        }
+        // TODO: instead of calling this function, break the function up into one that parses the
+        // information once and prepares a context that can be reused for the entire batch.
+        if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| {
+            out.* = src_loc;
+        } else |err| switch (err) {
+            error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid,
+            else => |e| return e,
+        }
+    }
 }
 
 fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol {
lib/std/debug/Info.zig
@@ -20,9 +20,14 @@ address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule),
 
 pub const LoadError = Dwarf.ElfModule.LoadError;
 
-pub fn load(gpa: Allocator, path: Path) LoadError!Info {
+pub fn load(gpa: Allocator, path: Path, parent_prog_node: std.Progress.Node) LoadError!Info {
     var sections: Dwarf.SectionArray = Dwarf.null_section_array;
-    const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, &sections, null);
+    var prog_node = parent_prog_node.start("Loading Debug Info", 0);
+    defer prog_node.end();
+    var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, &sections, null);
+    prog_node.end();
+    prog_node = parent_prog_node.start("Sort Compile Units", 0);
+    try elf_module.dwarf.sortCompileUnits();
     var info: Info = .{
         .address_map = .{},
     };
@@ -38,10 +43,7 @@ pub fn deinit(info: *Info, gpa: Allocator) void {
     info.* = undefined;
 }
 
-pub const ResolveSourceLocationsError = error{
-    MissingDebugInfo,
-    InvalidDebugInfo,
-} || Allocator.Error;
+pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError;
 
 pub fn resolveSourceLocations(
     info: *Info,
@@ -49,9 +51,10 @@ pub fn resolveSourceLocations(
     sorted_pc_addrs: []const u64,
     /// Asserts its length equals length of `sorted_pc_addrs`.
     output: []std.debug.SourceLocation,
+    parent_prog_node: std.Progress.Node,
 ) ResolveSourceLocationsError!void {
     assert(sorted_pc_addrs.len == output.len);
     if (info.address_map.entries.len != 1) @panic("TODO");
     const elf_module = &info.address_map.values()[0];
-    return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output);
+    return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output, parent_prog_node);
 }
lib/std/debug.zig
@@ -27,6 +27,12 @@ pub const SourceLocation = struct {
     line: u64,
     column: u64,
     file_name: []const u8,
+
+    pub const invalid: SourceLocation = .{
+        .line = 0,
+        .column = 0,
+        .file_name = &.{},
+    };
 };
 
 pub const Symbol = struct {
tools/dump-cov.zig
@@ -28,7 +28,10 @@ pub fn main() !void {
         .sub_path = cov_file_name,
     };
 
-    var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| {
+    const prog_node = std.Progress.start(.{});
+    defer prog_node.end();
+
+    var debug_info = std.debug.Info.load(gpa, exe_path, prog_node) catch |err| {
         fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) });
     };
     defer debug_info.deinit(gpa);
@@ -51,7 +54,10 @@ pub fn main() !void {
     assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize)));
 
     const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len);
-    try debug_info.resolveSourceLocations(gpa, pcs, source_locations);
+    try debug_info.resolveSourceLocations(gpa, pcs, source_locations, prog_node);
+    defer for (source_locations) |sl| {
+        gpa.free(sl.file_name);
+    };
 
     for (pcs, source_locations) |pc, sl| {
         try stdout.print("{x}: {s}:{d}:{d}\n", .{