Commit 9a80ac0429

Jakub Konka <kubkon@jakubkonka.com>
2023-10-04 15:14:45
elf: add garbage collection of sections
1 parent 2ee2213
Changed files (4)
src
link
test
link
src/link/Elf/gc.zig
@@ -0,0 +1,161 @@
+pub fn gcAtoms(elf_file: *Elf) !void {
+    var roots = std.ArrayList(*Atom).init(elf_file.base.allocator);
+    defer roots.deinit();
+    try collectRoots(&roots, elf_file);
+    mark(roots, elf_file);
+    prune(elf_file);
+}
+
+fn collectRoots(roots: *std.ArrayList(*Atom), elf_file: *Elf) !void {
+    if (elf_file.entry_index) |index| {
+        const global = elf_file.symbol(index);
+        try markSymbol(global, roots, elf_file);
+    }
+
+    for (elf_file.objects.items) |index| {
+        for (elf_file.file(index).?.object.globals()) |global_index| {
+            const global = elf_file.symbol(global_index);
+            if (global.file(elf_file)) |file| {
+                if (file.index() == index and global.flags.@"export")
+                    try markSymbol(global, roots, elf_file);
+            }
+        }
+    }
+
+    for (elf_file.objects.items) |index| {
+        const object = elf_file.file(index).?.object;
+
+        for (object.atoms.items) |atom_index| {
+            const atom = elf_file.atom(atom_index) orelse continue;
+            if (!atom.flags.alive) continue;
+
+            const shdr = atom.inputShdr(elf_file);
+            const name = atom.name(elf_file);
+            const is_gc_root = blk: {
+                if (shdr.sh_flags & elf.SHF_GNU_RETAIN != 0) break :blk true;
+                if (shdr.sh_type == elf.SHT_NOTE) break :blk true;
+                if (shdr.sh_type == elf.SHT_PREINIT_ARRAY) break :blk true;
+                if (shdr.sh_type == elf.SHT_INIT_ARRAY) break :blk true;
+                if (shdr.sh_type == elf.SHT_FINI_ARRAY) break :blk true;
+                if (mem.startsWith(u8, name, ".ctors")) break :blk true;
+                if (mem.startsWith(u8, name, ".dtors")) break :blk true;
+                if (mem.startsWith(u8, name, ".init")) break :blk true;
+                if (mem.startsWith(u8, name, ".fini")) break :blk true;
+                if (Elf.isCIdentifier(name)) break :blk true;
+                break :blk false;
+            };
+            if (is_gc_root and markAtom(atom)) try roots.append(atom);
+            if (shdr.sh_flags & elf.SHF_ALLOC == 0) atom.flags.visited = true;
+        }
+
+        // Mark every atom referenced by CIE as alive.
+        for (object.cies.items) |cie| {
+            for (cie.relocs(elf_file)) |rel| {
+                const sym = elf_file.symbol(object.symbols.items[rel.r_sym()]);
+                try markSymbol(sym, roots, elf_file);
+            }
+        }
+    }
+}
+
+fn markSymbol(sym: *Symbol, roots: *std.ArrayList(*Atom), elf_file: *Elf) !void {
+    const atom = sym.atom(elf_file) orelse return;
+    if (markAtom(atom)) try roots.append(atom);
+}
+
+fn markAtom(atom: *Atom) bool {
+    const already_visited = atom.flags.visited;
+    atom.flags.visited = true;
+    return atom.flags.alive and !already_visited;
+}
+
+fn markLive(atom: *Atom, elf_file: *Elf) void {
+    if (@import("build_options").enable_logging) track_live_level.incr();
+
+    assert(atom.flags.visited);
+    const object = atom.file(elf_file).?.object;
+
+    for (atom.fdes(elf_file)) |fde| {
+        for (fde.relocs(elf_file)[1..]) |rel| {
+            const target_sym = elf_file.symbol(object.symbols.items[rel.r_sym()]);
+            const target_atom = target_sym.atom(elf_file) orelse continue;
+            target_atom.flags.alive = true;
+            gc_track_live_log.debug("{}marking live atom({d})", .{ track_live_level, target_atom.atom_index });
+            if (markAtom(target_atom)) markLive(target_atom, elf_file);
+        }
+    }
+
+    for (atom.relocs(elf_file)) |rel| {
+        const target_sym = elf_file.symbol(object.symbols.items[rel.r_sym()]);
+        const target_atom = target_sym.atom(elf_file) orelse continue;
+        target_atom.flags.alive = true;
+        gc_track_live_log.debug("{}marking live atom({d})", .{ track_live_level, target_atom.atom_index });
+        if (markAtom(target_atom)) markLive(target_atom, elf_file);
+    }
+}
+
+fn mark(roots: std.ArrayList(*Atom), elf_file: *Elf) void {
+    for (roots.items) |root| {
+        gc_track_live_log.debug("root atom({d})", .{root.atom_index});
+        markLive(root, elf_file);
+    }
+}
+
+fn prune(elf_file: *Elf) void {
+    for (elf_file.objects.items) |index| {
+        for (elf_file.file(index).?.object.atoms.items) |atom_index| {
+            const atom = elf_file.atom(atom_index) orelse continue;
+            if (atom.flags.alive and !atom.flags.visited) {
+                atom.flags.alive = false;
+                atom.markFdesDead(elf_file);
+            }
+        }
+    }
+}
+
+pub fn dumpPrunedAtoms(elf_file: *Elf) !void {
+    const stderr = std.io.getStdErr().writer();
+    for (elf_file.objects.items) |index| {
+        for (elf_file.file(index).?.object.atoms.items) |atom_index| {
+            const atom = elf_file.atom(atom_index) orelse continue;
+            if (!atom.flags.alive)
+                // TODO should we simply print to stderr?
+                try stderr.print("link: removing unused section '{s}' in file '{}'\n", .{
+                    atom.name(elf_file),
+                    atom.file(elf_file).?.fmtPath(),
+                });
+        }
+    }
+}
+
+const Level = struct {
+    value: usize = 0,
+
+    fn incr(self: *@This()) void {
+        self.value += 1;
+    }
+
+    pub fn format(
+        self: *const @This(),
+        comptime unused_fmt_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) !void {
+        _ = unused_fmt_string;
+        _ = options;
+        try writer.writeByteNTimes(' ', self.value);
+    }
+};
+
+var track_live_level: Level = .{};
+
+const std = @import("std");
+const assert = std.debug.assert;
+const elf = std.elf;
+const gc_track_live_log = std.log.scoped(.gc_track_live);
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Atom = @import("Atom.zig");
+const Elf = @import("../Elf.zig");
+const Symbol = @import("Symbol.zig");
src/link/Elf.zig
@@ -53,7 +53,7 @@ phdr_load_tls_data_index: ?u16 = null,
 /// The index into the program headers of a PT_LOAD program header with TLS zerofill data.
 phdr_load_tls_zerofill_index: ?u16 = null,
 
-entry_addr: ?u64 = null,
+entry_index: ?Symbol.Index = null,
 page_size: u32,
 default_sym_version: elf.Elf64_Versym,
 
@@ -665,7 +665,6 @@ pub fn populateMissingMetadata(self: *Elf) !void {
             .alignment = self.page_size,
             .flags = elf.PF_X | elf.PF_R | elf.PF_W,
         });
-        self.entry_addr = null;
     }
 
     if (self.phdr_got_index == null) {
@@ -1283,9 +1282,29 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     // Any qualifing unresolved symbol will be upgraded to an absolute, weak
     // symbol for potential resolution at load-time.
     self.resolveSymbols();
-    try self.addLinkerDefinedSymbols();
     self.markEhFrameAtomsDead();
     self.markImportsExports();
+
+    // Look for entry address in objects if not set by the incremental compiler.
+    if (self.entry_index == null) {
+        const entry: ?[]const u8 = entry: {
+            if (self.base.options.entry) |entry| break :entry entry;
+            if (!self.isDynLib()) break :entry "_start";
+            break :entry null;
+        };
+        self.entry_index = if (entry) |name| self.globalByName(name) else null;
+    }
+
+    const gc_sections = self.base.options.gc_sections orelse false;
+    if (gc_sections) {
+        try gc.gcAtoms(self);
+
+        // if (self.base.options.print_gc_sections) {
+        //     try gc.dumpPrunedAtoms(self);
+        // }
+    }
+
+    try self.addLinkerDefinedSymbols();
     self.claimUnresolved();
 
     // Scan and create missing synthetic entries such as GOT indirection.
@@ -1310,19 +1329,6 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
         state_log.debug("{}", .{self.dumpState()});
     }
 
-    // Look for entry address in objects if not set by the incremental compiler.
-    if (self.entry_addr == null) {
-        const entry: ?[]const u8 = entry: {
-            if (self.base.options.entry) |entry| break :entry entry;
-            if (!self.isDynLib()) break :entry "_start";
-            break :entry null;
-        };
-        self.entry_addr = if (entry) |name| entry_addr: {
-            const global_index = self.globalByName(name) orelse break :entry_addr null;
-            break :entry_addr self.symbol(global_index).value;
-        } else null;
-    }
-
     // Beyond this point, everything has been allocated a virtual address and we can resolve
     // the relocations, and commit objects to file.
     if (self.zig_module_index) |index| {
@@ -1538,7 +1544,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     try self.writeAtoms();
     try self.writeSyntheticSections();
 
-    if (self.entry_addr == null and self.base.options.effectiveOutputMode() == .Exe) {
+    if (self.entry_index == null and self.base.options.effectiveOutputMode() == .Exe) {
         log.debug("flushing. no_entry_point_found = true", .{});
         self.error_flags.no_entry_point_found = true;
     } else {
@@ -2580,7 +2586,7 @@ fn writeHeader(self: *Elf) !void {
     mem.writeInt(u32, hdr_buf[index..][0..4], 1, endian);
     index += 4;
 
-    const e_entry = if (elf_type == .REL) 0 else self.entry_addr.?;
+    const e_entry = if (self.entry_index) |entry_index| self.symbol(entry_index).value else 0;
 
     // TODO
     const phdr_table_offset = if (self.phdr_table_index) |ind|
@@ -3199,13 +3205,7 @@ pub fn updateDeclExports(
         }
         const stb_bits: u8 = switch (exp.opts.linkage) {
             .Internal => elf.STB_LOCAL,
-            .Strong => blk: {
-                const entry_name = self.base.options.entry orelse "_start";
-                if (mem.eql(u8, exp_name, entry_name)) {
-                    self.entry_addr = decl_sym.value;
-                }
-                break :blk elf.STB_GLOBAL;
-            },
+            .Strong => elf.STB_GLOBAL,
             .Weak => elf.STB_WEAK,
             .LinkOnce => {
                 try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1);
@@ -4693,6 +4693,16 @@ fn calcNumIRelativeRelocs(self: *Elf) usize {
     return count;
 }
 
+pub fn isCIdentifier(name: []const u8) bool {
+    if (name.len == 0) return false;
+    const first_c = name[0];
+    if (!std.ascii.isAlphabetic(first_c) and first_c != '_') return false;
+    for (name[1..]) |c| {
+        if (!std.ascii.isAlphanumeric(c) and c != '_') return false;
+    }
+    return true;
+}
+
 pub fn atom(self: *Elf, atom_index: Atom.Index) ?*Atom {
     if (atom_index == 0) return null;
     assert(atom_index < self.atoms.items.len);
@@ -5202,6 +5212,7 @@ const mem = std.mem;
 
 const codegen = @import("../codegen.zig");
 const eh_frame = @import("Elf/eh_frame.zig");
+const gc = @import("Elf/gc.zig");
 const glibc = @import("../glibc.zig");
 const link = @import("../link.zig");
 const lldMain = @import("../main.zig").lldMain;
test/link/elf.zig
@@ -60,6 +60,7 @@ fn testGcSections(b: *Build, opts: Options) *Step {
         \\}
     );
     obj.link_function_sections = true;
+    obj.link_data_sections = true;
     obj.is_linking_libc = true;
     obj.is_linking_libcpp = true;
 
CMakeLists.txt
@@ -595,6 +595,7 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/Elf/ZigModule.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Elf/eh_frame.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Elf/file.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/Elf/gc.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Elf/synthetic_sections.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig"