Commit 8bd01eb7a9

Jakub Konka <kubkon@jakubkonka.com>
2024-02-12 23:59:19
elf: refactor archive specific object parsing logic
1 parent 616a8f9
Changed files (3)
src/link/Elf/Object.zig
@@ -55,12 +55,26 @@ pub fn deinit(self: *Object, allocator: Allocator) void {
 
 pub fn parse(self: *Object, elf_file: *Elf) !void {
     const gpa = elf_file.base.comp.gpa;
-    const offset = if (self.archive) |ar| ar.offset else 0;
     const handle = elf_file.fileHandle(self.file_handle);
+
+    try self.parseCommon(gpa, handle, elf_file);
+    try self.initAtoms(gpa, handle, elf_file);
+    try self.initSymtab(gpa, elf_file);
+
+    for (self.shdrs.items, 0..) |shdr, i| {
+        const atom = elf_file.atom(self.atoms.items[i]) orelse continue;
+        if (!atom.flags.alive) continue;
+        if (shdr.sh_type == elf.SHT_X86_64_UNWIND or mem.eql(u8, atom.name(elf_file), ".eh_frame"))
+            try self.parseEhFrame(gpa, handle, @as(u32, @intCast(i)), elf_file);
+    }
+}
+
+fn parseCommon(self: *Object, allocator: Allocator, handle: std.fs.File, elf_file: *Elf) !void {
+    const offset = if (self.archive) |ar| ar.offset else 0;
     const file_size = (try handle.stat()).size;
 
-    const header_buffer = try Elf.preadAllAlloc(gpa, handle, offset, @sizeOf(elf.Elf64_Ehdr));
-    defer gpa.free(header_buffer);
+    const header_buffer = try Elf.preadAllAlloc(allocator, handle, offset, @sizeOf(elf.Elf64_Ehdr));
+    defer allocator.free(header_buffer);
     self.header = @as(*align(1) const elf.Elf64_Ehdr, @ptrCast(header_buffer)).*;
 
     const target = elf_file.base.comp.root_mod.resolved_target.result;
@@ -87,10 +101,10 @@ pub fn parse(self: *Object, elf_file: *Elf) !void {
         return error.MalformedObject;
     }
 
-    const shdrs_buffer = try Elf.preadAllAlloc(gpa, handle, offset + shoff, shsize);
-    defer gpa.free(shdrs_buffer);
+    const shdrs_buffer = try Elf.preadAllAlloc(allocator, handle, offset + shoff, shsize);
+    defer allocator.free(shdrs_buffer);
     const shdrs = @as([*]align(1) const elf.Elf64_Shdr, @ptrCast(shdrs_buffer.ptr))[0..shnum];
-    try self.shdrs.appendUnalignedSlice(gpa, shdrs);
+    try self.shdrs.appendUnalignedSlice(allocator, shdrs);
 
     for (self.shdrs.items) |shdr| {
         if (shdr.sh_type != elf.SHT_NOBITS) {
@@ -101,15 +115,15 @@ pub fn parse(self: *Object, elf_file: *Elf) !void {
         }
     }
 
-    const shstrtab = try self.preadShdrContentsAlloc(gpa, handle, self.header.?.e_shstrndx);
-    defer gpa.free(shstrtab);
+    const shstrtab = try self.preadShdrContentsAlloc(allocator, handle, self.header.?.e_shstrndx);
+    defer allocator.free(shstrtab);
     for (self.shdrs.items) |shdr| {
         if (shdr.sh_name >= shstrtab.len) {
             try elf_file.reportParseError2(self.index, "corrupt section name offset", .{});
             return error.MalformedObject;
         }
     }
-    try self.strtab.appendSlice(gpa, shstrtab);
+    try self.strtab.appendSlice(allocator, shstrtab);
 
     const symtab_index = for (self.shdrs.items, 0..) |shdr, i| switch (shdr.sh_type) {
         elf.SHT_SYMTAB => break @as(u16, @intCast(i)),
@@ -120,8 +134,8 @@ pub fn parse(self: *Object, elf_file: *Elf) !void {
         const shdr = self.shdrs.items[index];
         self.first_global = shdr.sh_info;
 
-        const raw_symtab = try self.preadShdrContentsAlloc(gpa, handle, index);
-        defer gpa.free(raw_symtab);
+        const raw_symtab = try self.preadShdrContentsAlloc(allocator, handle, index);
+        defer allocator.free(raw_symtab);
         const nsyms = math.divExact(usize, raw_symtab.len, @sizeOf(elf.Elf64_Sym)) catch {
             try elf_file.reportParseError2(self.index, "symbol table not evenly divisible", .{});
             return error.MalformedObject;
@@ -129,11 +143,11 @@ pub fn parse(self: *Object, elf_file: *Elf) !void {
         const symtab = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw_symtab.ptr))[0..nsyms];
 
         const strtab_bias = @as(u32, @intCast(self.strtab.items.len));
-        const strtab = try self.preadShdrContentsAlloc(gpa, handle, shdr.sh_link);
-        defer gpa.free(strtab);
-        try self.strtab.appendSlice(gpa, strtab);
+        const strtab = try self.preadShdrContentsAlloc(allocator, handle, shdr.sh_link);
+        defer allocator.free(strtab);
+        try self.strtab.appendSlice(allocator, strtab);
 
-        try self.symtab.ensureUnusedCapacity(gpa, symtab.len);
+        try self.symtab.ensureUnusedCapacity(allocator, symtab.len);
         for (symtab) |sym| {
             const out_sym = self.symtab.addOneAssumeCapacity();
             out_sym.* = sym;
@@ -145,21 +159,6 @@ pub fn parse(self: *Object, elf_file: *Elf) !void {
     }
 }
 
-pub fn init(self: *Object, elf_file: *Elf) !void {
-    const gpa = elf_file.base.comp.gpa;
-    const handle = elf_file.fileHandle(self.file_handle);
-
-    try self.initAtoms(gpa, handle, elf_file);
-    try self.initSymtab(gpa, elf_file);
-
-    for (self.shdrs.items, 0..) |shdr, i| {
-        const atom = elf_file.atom(self.atoms.items[i]) orelse continue;
-        if (!atom.flags.alive) continue;
-        if (shdr.sh_type == elf.SHT_X86_64_UNWIND or mem.eql(u8, atom.name(elf_file), ".eh_frame"))
-            try self.parseEhFrame(gpa, handle, @as(u32, @intCast(i)), elf_file);
-    }
-}
-
 fn initAtoms(self: *Object, allocator: Allocator, handle: std.fs.File, elf_file: *Elf) !void {
     const shdrs = self.shdrs.items;
     try self.atoms.resize(allocator, shdrs.len);
@@ -782,6 +781,12 @@ pub fn addAtomsToRelaSections(self: Object, elf_file: *Elf) !void {
     }
 }
 
+pub fn parseAr(self: *Object, elf_file: *Elf) !void {
+    const gpa = elf_file.base.comp.gpa;
+    const handle = elf_file.fileHandle(self.file_handle);
+    try self.parseCommon(gpa, handle, elf_file);
+}
+
 pub fn updateArSymtab(self: Object, ar_symtab: *Archive.ArSymtab, elf_file: *Elf) !void {
     const comp = elf_file.base.comp;
     const gpa = comp.gpa;
src/link/Elf/relocatable.zig
@@ -7,18 +7,20 @@ pub fn flushStaticLib(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]co
     try positionals.ensureUnusedCapacity(comp.objects.len);
     positionals.appendSliceAssumeCapacity(comp.objects);
 
-    // This is a set of object files emitted by clang in a single `build-exe` invocation.
-    // For instance, the implicit `a.o` as compiled by `zig build-exe a.c` will end up
-    // in this set.
     for (comp.c_object_table.keys()) |key| {
         try positionals.append(.{ .path = key.status.success.object_path });
     }
 
     if (module_obj_path) |path| try positionals.append(.{ .path = path });
 
+    if (comp.include_compiler_rt) {
+        try positionals.append(.{ .path = comp.compiler_rt_obj.?.full_object_path });
+    }
+
     for (positionals.items) |obj| {
-        elf_file.parsePositional(obj.path, obj.must_link) catch |err| switch (err) {
+        parsePositional(elf_file, obj.path) catch |err| switch (err) {
             error.MalformedObject, error.MalformedArchive, error.InvalidCpuArch => continue, // already reported
+            error.UnknownFileType => try elf_file.reportParseError(obj.path, "unknown file type for an object file", .{}),
             else => |e| try elf_file.reportParseError(
                 obj.path,
                 "unexpected error: parsing input file failed with error {s}",
@@ -172,13 +174,6 @@ pub fn flushObject(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]const
 
     if (comp.link_errors.items.len > 0) return error.FlushFailure;
 
-    // Init all objects
-    for (elf_file.objects.items) |index| {
-        try elf_file.file(index).?.object.init(elf_file);
-    }
-
-    if (comp.link_errors.items.len > 0) return error.FlushFailure;
-
     // Now, we are ready to resolve the symbols across all input files.
     // We will first resolve the files in the ZigObject, next in the parsed
     // input Object files.
@@ -214,6 +209,55 @@ pub fn flushObject(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]const
     if (comp.link_errors.items.len > 0) return error.FlushFailure;
 }
 
+fn parsePositional(elf_file: *Elf, path: []const u8) Elf.ParseError!void {
+    if (try Object.isObject(path)) {
+        try parseObject(elf_file, path);
+    } else if (try Archive.isArchive(path)) {
+        try parseArchive(elf_file, path);
+    } else return error.UnknownFileType;
+    // TODO: should we check for LD script?
+    // Actually, should we even unpack an archive?
+}
+
+fn parseObject(elf_file: *Elf, path: []const u8) Elf.ParseError!void {
+    const gpa = elf_file.base.comp.gpa;
+    const handle = try std.fs.cwd().openFile(path, .{});
+    const fh = try elf_file.addFileHandle(handle);
+
+    const index = @as(File.Index, @intCast(try elf_file.files.addOne(gpa)));
+    elf_file.files.set(index, .{ .object = .{
+        .path = try gpa.dupe(u8, path),
+        .file_handle = fh,
+        .index = index,
+    } });
+    try elf_file.objects.append(gpa, index);
+
+    const object = elf_file.file(index).?.object;
+    try object.parseAr(elf_file);
+}
+
+fn parseArchive(elf_file: *Elf, path: []const u8) Elf.ParseError!void {
+    const gpa = elf_file.base.comp.gpa;
+    const handle = try std.fs.cwd().openFile(path, .{});
+    const fh = try elf_file.addFileHandle(handle);
+
+    var archive = Archive{};
+    defer archive.deinit(gpa);
+    try archive.parse(elf_file, path, fh);
+
+    const objects = try archive.objects.toOwnedSlice(gpa);
+    defer gpa.free(objects);
+
+    for (objects) |extracted| {
+        const index = @as(File.Index, @intCast(try elf_file.files.addOne(gpa)));
+        elf_file.files.set(index, .{ .object = extracted });
+        const object = &elf_file.files.items(.data)[index].object;
+        object.index = index;
+        try object.parseAr(elf_file);
+        try elf_file.objects.append(gpa, index);
+    }
+}
+
 fn claimUnresolved(elf_file: *Elf) void {
     if (elf_file.zigObjectPtr()) |zig_object| {
         zig_object.claimUnresolvedObject(elf_file);
@@ -518,3 +562,4 @@ const Archive = @import("Archive.zig");
 const Compilation = @import("../../Compilation.zig");
 const Elf = @import("../Elf.zig");
 const File = @import("file.zig").File;
+const Object = @import("Object.zig");
src/link/Elf.zig
@@ -1075,6 +1075,10 @@ pub fn flushModule(self: *Elf, arena: Allocator, prog_node: *std.Progress.Node)
     // --verbose-link
     if (comp.verbose_link) try self.dumpArgv(comp);
 
+    if (self.zigObjectPtr()) |zig_object| try zig_object.flushModule(self);
+    if (self.base.isStaticLib()) return relocatable.flushStaticLib(self, comp, module_obj_path);
+    if (self.base.isObject()) return relocatable.flushObject(self, comp, module_obj_path);
+
     const csu = try CsuObjects.init(arena, comp);
     const compiler_rt_path: ?[]const u8 = blk: {
         if (comp.compiler_rt_lib) |x| break :blk x.full_object_path;
@@ -1082,10 +1086,6 @@ pub fn flushModule(self: *Elf, arena: Allocator, prog_node: *std.Progress.Node)
         break :blk null;
     };
 
-    if (self.zigObjectPtr()) |zig_object| try zig_object.flushModule(self);
-    if (self.base.isStaticLib()) return relocatable.flushStaticLib(self, comp, module_obj_path);
-    if (self.base.isObject()) return relocatable.flushObject(self, comp, module_obj_path);
-
     // Here we will parse input positional and library files (if referenced).
     // This will roughly match in any linker backend we support.
     var positionals = std.ArrayList(Compilation.LinkObject).init(arena);
@@ -1249,13 +1249,6 @@ pub fn flushModule(self: *Elf, arena: Allocator, prog_node: *std.Progress.Node)
 
     if (comp.link_errors.items.len > 0) return error.FlushFailure;
 
-    // Init all objects
-    for (self.objects.items) |index| {
-        try self.file(index).?.object.init(self);
-    }
-
-    if (comp.link_errors.items.len > 0) return error.FlushFailure;
-
     // Dedup shared objects
     {
         var seen_dsos = std.StringHashMap(void).init(gpa);
@@ -1651,7 +1644,7 @@ fn dumpArgv(self: *Elf, comp: *Compilation) !void {
     Compilation.dump_argv(argv.items);
 }
 
-const ParseError = error{
+pub const ParseError = error{
     MalformedObject,
     MalformedArchive,
     InvalidCpuArch,
@@ -1662,6 +1655,7 @@ const ParseError = error{
     FileSystem,
     NotSupported,
     InvalidCharacter,
+    UnknownFileType,
 } || LdScript.Error || std.os.AccessError || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError;
 
 pub fn parsePositional(self: *Elf, path: []const u8, must_link: bool) ParseError!void {