Commit 53aa9d75a9

Andrew Kelley <andrew@ziglang.org>
2024-08-04 02:42:08
std.debug.Info.resolveSourceLocations: O(N) implementation
1 parent c2ab461
Changed files (3)
lib
tools
lib/std/debug/Dwarf.zig
@@ -152,6 +152,7 @@ pub const CompileUnit = struct {
         pub const LineEntry = struct {
             line: u32,
             column: u32,
+            /// Offset by 1 depending on whether Dwarf version is >= 5.
             file: u32,
         };
 
@@ -809,7 +810,7 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 {
     return null;
 }
 
-const ScanError = error{
+pub const ScanError = error{
     InvalidDebugInfo,
     MissingDebugInfo,
 } || Allocator.Error || std.debug.FixedBufferReader.Error;
@@ -1113,7 +1114,7 @@ pub fn sortCompileUnits(d: *Dwarf) ScanError!void {
     }
 
     std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct {
-        fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool {
+        pub fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool {
             _ = ctx;
             const a_range = a.pc_range orelse return false;
             const b_range = b.pc_range orelse return true;
@@ -1641,14 +1642,18 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !
     };
 }
 
+pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, cu: *CompileUnit) ScanError!void {
+    if (cu.src_loc_cache != null) return;
+    cu.src_loc_cache = try runLineNumberProgram(d, gpa, cu);
+}
+
 pub fn getLineNumberInfo(
     d: *Dwarf,
     gpa: Allocator,
     compile_unit: *CompileUnit,
     target_address: u64,
 ) !std.debug.SourceLocation {
-    if (compile_unit.src_loc_cache == null)
-        compile_unit.src_loc_cache = try runLineNumberProgram(d, gpa, compile_unit);
+    try populateSrcLocCache(d, gpa, compile_unit);
     const slc = &compile_unit.src_loc_cache.?;
     const entry = try slc.findSource(target_address);
     const file_index = entry.file - @intFromBool(slc.version < 5);
@@ -2343,52 +2348,6 @@ pub const ElfModule = struct {
     }
 };
 
-pub const ResolveSourceLocationsError = Allocator.Error || FixedBufferReader.Error;
-
-/// Given an array of virtual memory addresses, sorted ascending, outputs a
-/// corresponding array of source locations, by appending to the provided
-/// array list.
-pub fn resolveSourceLocations(
-    d: *Dwarf,
-    gpa: Allocator,
-    sorted_pc_addrs: []const u64,
-    /// Asserts its length equals length of `sorted_pc_addrs`.
-    output: []std.debug.SourceLocation,
-) ResolveSourceLocationsError!void {
-    assert(sorted_pc_addrs.len == output.len);
-    assert(d.compile_units_sorted);
-
-    var cu_i: usize = 0;
-    var cu: *CompileUnit = &d.compile_unit_list.items[0];
-    var range = cu.pc_range.?;
-    next_pc: for (sorted_pc_addrs, output) |pc, *out| {
-        while (pc >= range.end) {
-            cu_i += 1;
-            if (cu_i >= d.compile_unit_list.items.len) {
-                out.* = std.debug.SourceLocation.invalid;
-                continue :next_pc;
-            }
-            cu = &d.compile_unit_list.items[cu_i];
-            range = cu.pc_range orelse {
-                out.* = std.debug.SourceLocation.invalid;
-                continue :next_pc;
-            };
-        }
-        if (pc < range.start) {
-            out.* = std.debug.SourceLocation.invalid;
-            continue :next_pc;
-        }
-        // TODO: instead of calling this function, break the function up into one that parses the
-        // information once and prepares a context that can be reused for the entire batch.
-        if (getLineNumberInfo(d, gpa, cu, pc)) |src_loc| {
-            out.* = src_loc;
-        } else |err| switch (err) {
-            error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid,
-            else => |e| return e,
-        }
-    }
-}
-
 fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol {
     if (di.findCompileUnit(address)) |compile_unit| {
         return .{
lib/std/debug/Info.zig
@@ -12,12 +12,66 @@ const Path = std.Build.Cache.Path;
 const Dwarf = std.debug.Dwarf;
 const page_size = std.mem.page_size;
 const assert = std.debug.assert;
+const Hash = std.hash.Wyhash;
 
 const Info = @This();
 
 /// Sorted by key, ascending.
 address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule),
 
+/// Provides a globally-scoped integer index for directories.
+///
+/// As opposed to, for example, a directory index that is compilation-unit
+/// scoped inside a single ELF module.
+///
+/// String memory references the memory-mapped debug information.
+///
+/// Protected by `mutex`.
+directories: std.StringArrayHashMapUnmanaged(void),
+/// Provides a globally-scoped integer index for files.
+///
+/// String memory references the memory-mapped debug information.
+///
+/// Protected by `mutex`.
+files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false),
+/// Protects `directories` and `files`.
+mutex: std.Thread.Mutex,
+
+pub const SourceLocation = struct {
+    file: File.Index,
+    line: u32,
+    column: u32,
+
+    pub const invalid: SourceLocation = .{
+        .file = .invalid,
+        .line = 0,
+        .column = 0,
+    };
+};
+
+pub const File = struct {
+    directory_index: u32,
+    basename: []const u8,
+
+    pub const Index = enum(u32) {
+        invalid = std.math.maxInt(u32),
+        _,
+    };
+
+    pub const MapContext = struct {
+        pub fn hash(ctx: MapContext, a: File) u32 {
+            _ = ctx;
+            return @truncate(Hash.hash(a.directory_index, a.basename));
+        }
+
+        pub fn eql(ctx: MapContext, a: File, b: File, b_index: usize) bool {
+            _ = ctx;
+            _ = b_index;
+            return a.directory_index == b.directory_index and std.mem.eql(u8, a.basename, b.basename);
+        }
+    };
+};
+
 pub const LoadError = Dwarf.ElfModule.LoadError;
 
 pub fn load(gpa: Allocator, path: Path) LoadError!Info {
@@ -26,12 +80,17 @@ pub fn load(gpa: Allocator, path: Path) LoadError!Info {
     try elf_module.dwarf.sortCompileUnits();
     var info: Info = .{
         .address_map = .{},
+        .directories = .{},
+        .files = .{},
+        .mutex = .{},
     };
     try info.address_map.put(gpa, elf_module.base_address, elf_module);
     return info;
 }
 
 pub fn deinit(info: *Info, gpa: Allocator) void {
+    info.directories.deinit(gpa);
+    info.files.deinit(gpa);
     for (info.address_map.values()) |*elf_module| {
         elf_module.dwarf.deinit(gpa);
     }
@@ -39,17 +98,98 @@ pub fn deinit(info: *Info, gpa: Allocator) void {
     info.* = undefined;
 }
 
-pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError;
+pub fn fileAt(info: *Info, index: File.Index) *File {
+    return &info.files.keys()[@intFromEnum(index)];
+}
+
+pub const ResolveSourceLocationsError = Dwarf.ScanError;
 
+/// Given an array of virtual memory addresses, sorted ascending, outputs a
+/// corresponding array of source locations.
 pub fn resolveSourceLocations(
     info: *Info,
     gpa: Allocator,
     sorted_pc_addrs: []const u64,
     /// Asserts its length equals length of `sorted_pc_addrs`.
-    output: []std.debug.SourceLocation,
+    output: []SourceLocation,
 ) ResolveSourceLocationsError!void {
     assert(sorted_pc_addrs.len == output.len);
     if (info.address_map.entries.len != 1) @panic("TODO");
     const elf_module = &info.address_map.values()[0];
-    return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output);
+    return resolveSourceLocationsDwarf(info, gpa, sorted_pc_addrs, output, &elf_module.dwarf);
+}
+
+pub fn resolveSourceLocationsDwarf(
+    info: *Info,
+    gpa: Allocator,
+    sorted_pc_addrs: []const u64,
+    /// Asserts its length equals length of `sorted_pc_addrs`.
+    output: []SourceLocation,
+    d: *Dwarf,
+) ResolveSourceLocationsError!void {
+    assert(sorted_pc_addrs.len == output.len);
+    assert(d.compile_units_sorted);
+
+    var cu_i: usize = 0;
+    var line_table_i: usize = 0;
+    var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0];
+    var range = cu.pc_range.?;
+    // Protects directories and files tables from other threads.
+    info.mutex.lock();
+    defer info.mutex.unlock();
+    next_pc: for (sorted_pc_addrs, output) |pc, *out| {
+        while (pc >= range.end) {
+            cu_i += 1;
+            if (cu_i >= d.compile_unit_list.items.len) {
+                out.* = SourceLocation.invalid;
+                continue :next_pc;
+            }
+            cu = &d.compile_unit_list.items[cu_i];
+            line_table_i = 0;
+            range = cu.pc_range orelse {
+                out.* = SourceLocation.invalid;
+                continue :next_pc;
+            };
+        }
+        if (pc < range.start) {
+            out.* = SourceLocation.invalid;
+            continue :next_pc;
+        }
+        if (line_table_i == 0) {
+            line_table_i = 1;
+            info.mutex.unlock();
+            defer info.mutex.lock();
+            d.populateSrcLocCache(gpa, cu) catch |err| switch (err) {
+                error.MissingDebugInfo, error.InvalidDebugInfo => {
+                    out.* = SourceLocation.invalid;
+                    cu_i += 1;
+                    if (cu_i < d.compile_unit_list.items.len) {
+                        cu = &d.compile_unit_list.items[cu_i];
+                        line_table_i = 0;
+                        if (cu.pc_range) |r| range = r;
+                    }
+                    continue :next_pc;
+                },
+                else => |e| return e,
+            };
+        }
+        const slc = &cu.src_loc_cache.?;
+        const table_addrs = slc.line_table.keys();
+        while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1;
+
+        const entry = slc.line_table.values()[line_table_i - 1];
+        const corrected_file_index = entry.file - @intFromBool(slc.version < 5);
+        const file_entry = slc.files[corrected_file_index];
+        const dir_path = slc.directories[file_entry.dir_index].path;
+        const dir_gop = try info.directories.getOrPut(gpa, dir_path);
+        const file_gop = try info.files.getOrPut(gpa, .{
+            .directory_index = @intCast(dir_gop.index),
+            .basename = file_entry.path,
+        });
+        out.* = .{
+            .file = @enumFromInt(file_gop.index),
+            .line = entry.line,
+            .column = entry.column,
+        };
+    }
 }
tools/dump-cov.zig
@@ -50,15 +50,14 @@ pub fn main() !void {
     }
     assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize)));
 
-    const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len);
+    const source_locations = try arena.alloc(std.debug.Info.SourceLocation, pcs.len);
     try debug_info.resolveSourceLocations(gpa, pcs, source_locations);
-    defer for (source_locations) |sl| {
-        gpa.free(sl.file_name);
-    };
 
     for (pcs, source_locations) |pc, sl| {
-        try stdout.print("{x}: {s}:{d}:{d}\n", .{
-            pc, sl.file_name, sl.line, sl.column,
+        const file = debug_info.fileAt(sl.file);
+        const dir_name = debug_info.directories.keys()[file.directory_index];
+        try stdout.print("{x}: {s}/{s}:{d}:{d}\n", .{
+            pc, dir_name, file.basename, sl.line, sl.column,
         });
     }