Commit 398672eb30

Jakub Konka <kubkon@jakubkonka.com>
2021-07-13 18:42:17
zld: add temp basic handling of debugging stabs
1 parent e17f12d
Changed files (2)
src
src/link/MachO/Object.zig
@@ -27,7 +27,6 @@ header: ?macho.mach_header_64 = null,
 file: ?fs.File = null,
 file_offset: ?u32 = null,
 name: ?[]const u8 = null,
-mtime: ?u64 = null,
 
 load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
 
@@ -51,9 +50,17 @@ symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
 strtab: std.ArrayListUnmanaged(u8) = .{},
 data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
 
+// Debug info
+debug_info: ?DebugInfo = null,
+tu_name: ?[]const u8 = null,
+tu_comp_dir: ?[]const u8 = null,
+mtime: ?u64 = null,
+
 symbols: std.ArrayListUnmanaged(*Symbol) = .{},
 sections_as_symbols: std.AutoHashMapUnmanaged(u8, *Symbol) = .{},
 
+text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{},
+
 const DebugInfo = struct {
     inner: dwarf.DwarfInfo,
     debug_info: []u8,
@@ -160,6 +167,19 @@ pub fn deinit(self: *Object) void {
     self.strtab.deinit(self.allocator);
     self.symbols.deinit(self.allocator);
     self.sections_as_symbols.deinit(self.allocator);
+    self.text_blocks.deinit(self.allocator);
+
+    if (self.debug_info) |*db| {
+        db.deinit(self.allocator);
+    }
+
+    if (self.tu_name) |n| {
+        self.allocator.free(n);
+    }
+
+    if (self.tu_comp_dir) |n| {
+        self.allocator.free(n);
+    }
 
     if (self.name) |n| {
         self.allocator.free(n);
@@ -203,6 +223,7 @@ pub fn parse(self: *Object) !void {
     try self.readLoadCommands(reader);
     try self.parseSymtab();
     try self.parseDataInCode();
+    try self.parseDebugInfo();
 }
 
 pub fn readLoadCommands(self: *Object, reader: anytype) !void {
@@ -431,11 +452,27 @@ const TextBlockParser = struct {
         else
             max_align;
 
+        const stab: ?TextBlock.Stab = if (self.object.debug_info) |di| blk: {
+            // TODO there has to be a better to handle this.
+            for (di.inner.func_list.items) |func| {
+                if (func.pc_range) |range| {
+                    if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) {
+                        break :blk TextBlock.Stab{
+                            .function = range.end - range.start,
+                        };
+                    }
+                }
+            }
+            if (self.zld.globals.contains(senior_sym.name)) break :blk .global;
+            break :blk .static;
+        } else null;
+
         const block = try self.allocator.create(TextBlock);
         errdefer self.allocator.destroy(block);
 
         block.* = TextBlock.init(self.allocator);
         block.local_sym_index = senior_nlist.index;
+        block.stab = stab;
         block.code = try self.allocator.dupe(u8, code);
         block.size = size;
         block.alignment = actual_align;
@@ -531,9 +568,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
 
         // Is there any padding between symbols within the section?
         const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
+        // TODO is it perhaps worth skip parsing subsections in Debug mode and not worry about
+        // duplicates at all? Need some benchmarks!
         // const is_splittable = false;
 
-        const has_dices: bool = blk: {
+        zld.has_dices = blk: {
             if (self.text_section_index) |index| {
                 if (index != id) break :blk false;
                 if (self.data_in_code_entries.items.len == 0) break :blk false;
@@ -541,7 +580,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
             }
             break :blk false;
         };
-        zld.has_dices = has_dices;
+        zld.has_stabs = zld.has_stabs or self.debug_info != null;
 
         next: {
             if (is_splittable) blocks: {
@@ -625,6 +664,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
                     } else {
                         try zld.blocks.putNoClobber(zld.allocator, match, block);
                     }
+
+                    try self.text_blocks.append(self.allocator, block);
                 }
 
                 var parser = TextBlockParser{
@@ -681,6 +722,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
                     } else {
                         try zld.blocks.putNoClobber(zld.allocator, match, block);
                     }
+
+                    try self.text_blocks.append(self.allocator, block);
                 }
 
                 break :next;
@@ -758,9 +801,25 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
                     reg.segment_id = match.seg;
                     reg.section_id = match.sect;
 
+                    const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: {
+                        // TODO there has to be a better to handle this.
+                        for (di.inner.func_list.items) |func| {
+                            if (func.pc_range) |range| {
+                                if (reg.address >= range.start and reg.address < range.end) {
+                                    break :blk TextBlock.Stab{
+                                        .function = range.end - range.start,
+                                    };
+                                }
+                            }
+                        }
+                        if (zld.globals.contains(sym.name)) break :blk .global;
+                        break :blk .static;
+                    } else null;
+
                     contained.appendAssumeCapacity(.{
                         .local_sym_index = reg.local_sym_index,
                         .offset = nlist_with_index.nlist.n_value - sect.addr,
+                        .stab = stab,
                     });
                 }
 
@@ -785,6 +844,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
             } else {
                 try zld.blocks.putNoClobber(zld.allocator, match, block);
             }
+
+            try self.text_blocks.append(self.allocator, block);
         }
     }
 }
@@ -861,13 +922,12 @@ fn parseSymtab(self: *Object) !void {
 }
 
 pub fn parseDebugInfo(self: *Object) !void {
+    log.debug("parsing debug info in '{s}'", .{self.name.?});
+
     var debug_info = blk: {
         var di = try DebugInfo.parseFromObject(self.allocator, self);
         break :blk di orelse return;
     };
-    defer debug_info.deinit(self.allocator);
-
-    log.debug("parsing debug info in '{s}'", .{self.name.?});
 
     // We assume there is only one CU.
     const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) {
@@ -881,6 +941,10 @@ pub fn parseDebugInfo(self: *Object) !void {
     const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name);
     const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir);
 
+    self.debug_info = debug_info;
+    self.tu_name = try self.allocator.dupe(u8, name);
+    self.tu_comp_dir = try self.allocator.dupe(u8, comp_dir);
+
     if (self.mtime == null) {
         self.mtime = mtime: {
             const file = self.file orelse break :mtime 0;
@@ -888,67 +952,6 @@ pub fn parseDebugInfo(self: *Object) !void {
             break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
         };
     }
-
-    try self.stabs.ensureUnusedCapacity(self.allocator, self.symbols.items.len + 4);
-
-    // Current dir
-    self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, comp_dir, .{
-        .kind = .so,
-        .file = self,
-    }));
-
-    // Artifact name
-    self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, name, .{
-        .kind = .so,
-        .file = self,
-    }));
-
-    // Path to object file with debug info
-    self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, self.name.?, .{
-        .kind = .oso,
-        .file = self,
-    }));
-
-    for (self.symbols.items) |sym| {
-        if (sym.cast(Symbol.Regular)) |reg| {
-            const size: u64 = blk: for (debug_info.inner.func_list.items) |func| {
-                if (func.pc_range) |range| {
-                    if (reg.address >= range.start and reg.address < range.end) {
-                        break :blk range.end - range.start;
-                    }
-                }
-            } else 0;
-
-            const stab = try Symbol.Stab.new(self.allocator, sym.name, .{
-                .kind = kind: {
-                    if (size > 0) break :kind .function;
-                    switch (reg.linkage) {
-                        .translation_unit => break :kind .static,
-                        else => break :kind .global,
-                    }
-                },
-                .size = size,
-                .symbol = sym,
-                .file = self,
-            });
-            self.stabs.appendAssumeCapacity(stab);
-        } else if (sym.cast(Symbol.Tentative)) |_| {
-            const stab = try Symbol.Stab.new(self.allocator, sym.name, .{
-                .kind = .global,
-                .size = 0,
-                .symbol = sym,
-                .file = self,
-            });
-            self.stabs.appendAssumeCapacity(stab);
-        }
-    }
-
-    // Closing delimiter.
-    const delim_stab = try Symbol.Stab.new(self.allocator, "", .{
-        .kind = .so,
-        .file = self,
-    });
-    self.stabs.appendAssumeCapacity(delim_stab);
 }
 
 pub fn parseDataInCode(self: *Object) !void {
src/link/MachO/Zld.zig
@@ -115,6 +115,7 @@ stub_helper_stubs_start_off: ?u64 = null,
 blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{},
 
 has_dices: bool = false,
+has_stabs: bool = false,
 
 pub const Output = struct {
     tag: enum { exe, dylib },
@@ -125,6 +126,7 @@ pub const Output = struct {
 pub const TextBlock = struct {
     allocator: *Allocator,
     local_sym_index: u32,
+    stab: ?Stab = null,
     aliases: std.ArrayList(u32),
     references: std.AutoArrayHashMap(u32, void),
     contained: ?[]SymbolAtOffset = null,
@@ -140,6 +142,76 @@ pub const TextBlock = struct {
     pub const SymbolAtOffset = struct {
         local_sym_index: u32,
         offset: u64,
+        stab: ?Stab = null,
+    };
+
+    pub const Stab = union(enum) {
+        function: u64,
+        static,
+        global,
+
+        pub fn asNlists(stab: Stab, local_sym_index: u32, zld: *Zld) ![]macho.nlist_64 {
+            var nlists = std.ArrayList(macho.nlist_64).init(zld.allocator);
+            defer nlists.deinit();
+
+            const sym = zld.locals.items[local_sym_index];
+            const reg = sym.payload.regular;
+
+            switch (stab) {
+                .function => |size| {
+                    try nlists.ensureUnusedCapacity(4);
+                    const section_id = reg.sectionId(zld);
+                    nlists.appendAssumeCapacity(.{
+                        .n_strx = 0,
+                        .n_type = macho.N_BNSYM,
+                        .n_sect = section_id,
+                        .n_desc = 0,
+                        .n_value = reg.address,
+                    });
+                    nlists.appendAssumeCapacity(.{
+                        .n_strx = try zld.strtab.getOrPut(sym.name),
+                        .n_type = macho.N_FUN,
+                        .n_sect = section_id,
+                        .n_desc = 0,
+                        .n_value = reg.address,
+                    });
+                    nlists.appendAssumeCapacity(.{
+                        .n_strx = 0,
+                        .n_type = macho.N_FUN,
+                        .n_sect = 0,
+                        .n_desc = 0,
+                        .n_value = size,
+                    });
+                    nlists.appendAssumeCapacity(.{
+                        .n_strx = 0,
+                        .n_type = macho.N_ENSYM,
+                        .n_sect = section_id,
+                        .n_desc = 0,
+                        .n_value = size,
+                    });
+                },
+                .global => {
+                    try nlists.append(.{
+                        .n_strx = try zld.strtab.getOrPut(sym.name),
+                        .n_type = macho.N_GSYM,
+                        .n_sect = 0,
+                        .n_desc = 0,
+                        .n_value = 0,
+                    });
+                },
+                .static => {
+                    try nlists.append(.{
+                        .n_strx = try zld.strtab.getOrPut(sym.name),
+                        .n_type = macho.N_STSYM,
+                        .n_sect = reg.sectionId(zld),
+                        .n_desc = 0,
+                        .n_value = reg.address,
+                    });
+                },
+            }
+
+            return nlists.toOwnedSlice();
+        }
     };
 
     pub fn init(allocator: *Allocator) TextBlock {
@@ -178,6 +250,9 @@ pub const TextBlock = struct {
     pub fn print_this(self: *const TextBlock, zld: *Zld) void {
         log.warn("TextBlock", .{});
         log.warn("  {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] });
+        if (self.stab) |stab| {
+            log.warn("  stab: {}", .{stab});
+        }
         if (self.aliases.items.len > 0) {
             log.warn("  aliases:", .{});
             for (self.aliases.items) |index| {
@@ -193,10 +268,18 @@ pub const TextBlock = struct {
         if (self.contained) |contained| {
             log.warn("  contained symbols:", .{});
             for (contained) |sym_at_off| {
-                log.warn("    {}: {}\n", .{
-                    sym_at_off.offset,
-                    zld.locals.items[sym_at_off.local_sym_index],
-                });
+                if (sym_at_off.stab) |stab| {
+                    log.warn("    {}: {}, stab: {}\n", .{
+                        sym_at_off.offset,
+                        zld.locals.items[sym_at_off.local_sym_index],
+                        stab,
+                    });
+                } else {
+                    log.warn("    {}: {}\n", .{
+                        sym_at_off.offset,
+                        zld.locals.items[sym_at_off.local_sym_index],
+                    });
+                }
             }
         }
         log.warn("  code.len = {}", .{self.code.len});
@@ -2487,8 +2570,10 @@ fn writeSymbolTable(self: *Zld) !void {
     for (self.locals.items) |symbol, i| {
         if (i == 0) continue; // skip null symbol
         if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist
+
         const reg = symbol.payload.regular;
         const nlist = try symbol.asNlist(self, &self.strtab);
+
         if (reg.linkage == .translation_unit) {
             try locals.append(nlist);
         } else {
@@ -2496,6 +2581,61 @@ fn writeSymbolTable(self: *Zld) !void {
         }
     }
 
+    if (self.has_stabs) {
+        for (self.objects.items) |object| {
+            if (object.debug_info == null) continue;
+
+            // Open scope
+            try locals.ensureUnusedCapacity(4);
+            locals.appendAssumeCapacity(.{
+                .n_strx = try self.strtab.getOrPut(object.tu_comp_dir.?),
+                .n_type = macho.N_SO,
+                .n_sect = 0,
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            locals.appendAssumeCapacity(.{
+                .n_strx = try self.strtab.getOrPut(object.tu_name.?),
+                .n_type = macho.N_SO,
+                .n_sect = 0,
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            locals.appendAssumeCapacity(.{
+                .n_strx = try self.strtab.getOrPut(object.name.?),
+                .n_type = macho.N_OSO,
+                .n_sect = 0,
+                .n_desc = 1,
+                .n_value = object.mtime orelse 0,
+            });
+
+            for (object.text_blocks.items) |block| {
+                if (block.stab) |stab| {
+                    const nlists = try stab.asNlists(block.local_sym_index, self);
+                    defer self.allocator.free(nlists);
+                    try locals.appendSlice(nlists);
+                } else {
+                    const contained = block.contained orelse continue;
+                    for (contained) |sym_at_off| {
+                        const stab = sym_at_off.stab orelse continue;
+                        const nlists = try stab.asNlists(sym_at_off.local_sym_index, self);
+                        defer self.allocator.free(nlists);
+                        try locals.appendSlice(nlists);
+                    }
+                }
+            }
+
+            // Close scope
+            locals.appendAssumeCapacity(.{
+                .n_strx = 0,
+                .n_type = macho.N_SO,
+                .n_sect = 0,
+                .n_desc = 0,
+                .n_value = 0,
+            });
+        }
+    }
+
     var undefs = std.ArrayList(macho.nlist_64).init(self.allocator);
     defer undefs.deinit();
     var undef_dir = std.StringHashMap(u32).init(self.allocator);