Commit 5b3c4691e6

Jakub Konka <kubkon@jakubkonka.com>
2021-07-04 12:56:14
zld: put relocs in a TextBlock
1 parent 453c16d
Changed files (3)
src/link/MachO/commands.zig
@@ -425,6 +425,44 @@ fn makeStaticString(bytes: []const u8) [16]u8 {
     return buf;
 }
 
+fn parseName(name: *const [16]u8) []const u8 {
+    const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
+    return name[0..len];
+}
+
+pub fn segmentName(sect: macho.section_64) []const u8 {
+    return parseName(&sect.segname);
+}
+
+pub fn sectionName(sect: macho.section_64) []const u8 {
+    return parseName(&sect.sectname);
+}
+
+pub fn sectionType(sect: macho.section_64) u8 {
+    return @truncate(u8, sect.flags & 0xff);
+}
+
+pub fn sectionAttrs(sect: macho.section_64) u32 {
+    return sect.flags & 0xffffff00;
+}
+
+pub fn sectionIsCode(sect: macho.section_64) bool {
+    const attr = sectionAttrs(sect);
+    return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0;
+}
+
+pub fn sectionIsDebug(sect: macho.section_64) bool {
+    return sectionAttrs(sect) & macho.S_ATTR_DEBUG != 0;
+}
+
+pub fn sectionIsDontDeadStrip(sect: macho.section_64) bool {
+    return sectionAttrs(sect) & macho.S_ATTR_NO_DEAD_STRIP != 0;
+}
+
+pub fn sectionIsDontDeadStripIfReferencesLive(sect: macho.section_64) bool {
+    return sectionAttrs(sect) & macho.S_ATTR_LIVE_SUPPORT != 0;
+}
+
 fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void {
     var stream = io.fixedBufferStream(buffer);
     var given = try LoadCommand.read(allocator, stream.reader());
src/link/MachO/Object.zig
@@ -9,13 +9,13 @@ const log = std.log.scoped(.object);
 const macho = std.macho;
 const mem = std.mem;
 const reloc = @import("reloc.zig");
-const parseName = @import("Zld.zig").parseName;
 
 const Allocator = mem.Allocator;
 const Arch = std.Target.Cpu.Arch;
 const Relocation = reloc.Relocation;
 const Symbol = @import("Symbol.zig");
-const TextBlock = @import("Zld.zig").TextBlock;
+const TextBlock = Zld.TextBlock;
+const Zld = @import("Zld.zig");
 
 usingnamespace @import("commands.zig");
 
@@ -74,43 +74,6 @@ pub const Section = struct {
             allocator.free(relocs);
         }
     }
-
-    pub fn segname(self: Section) []const u8 {
-        return parseName(&self.inner.segname);
-    }
-
-    pub fn sectname(self: Section) []const u8 {
-        return parseName(&self.inner.sectname);
-    }
-
-    pub fn flags(self: Section) u32 {
-        return self.inner.flags;
-    }
-
-    pub fn sectionType(self: Section) u8 {
-        return @truncate(u8, self.flags() & 0xff);
-    }
-
-    pub fn sectionAttrs(self: Section) u32 {
-        return self.flags() & 0xffffff00;
-    }
-
-    pub fn isCode(self: Section) bool {
-        const attr = self.sectionAttrs();
-        return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0;
-    }
-
-    pub fn isDebug(self: Section) bool {
-        return self.sectionAttrs() & macho.S_ATTR_DEBUG != 0;
-    }
-
-    pub fn dontDeadStrip(self: Section) bool {
-        return self.sectionAttrs() & macho.S_ATTR_NO_DEAD_STRIP != 0;
-    }
-
-    pub fn dontDeadStripIfReferencesLive(self: Section) bool {
-        return self.sectionAttrs() & macho.S_ATTR_LIVE_SUPPORT != 0;
-    }
 };
 
 const DebugInfo = struct {
@@ -272,7 +235,6 @@ pub fn parse(self: *Object) !void {
     try self.parseSymtab();
     try self.parseDataInCode();
     try self.parseInitializers();
-    try self.parseDummy();
 }
 
 pub fn readLoadCommands(self: *Object, reader: anytype) !void {
@@ -288,8 +250,8 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void {
                 var seg = cmd.Segment;
                 for (seg.sections.items) |*sect, j| {
                     const index = @intCast(u16, j);
-                    const segname = parseName(&sect.segname);
-                    const sectname = parseName(&sect.sectname);
+                    const segname = segmentName(sect.*);
+                    const sectname = sectionName(sect.*);
                     if (mem.eql(u8, segname, "__DWARF")) {
                         if (mem.eql(u8, sectname, "__debug_info")) {
                             self.dwarf_debug_info_index = index;
@@ -351,7 +313,7 @@ pub fn parseSections(self: *Object) !void {
     try self.sections.ensureCapacity(self.allocator, seg.sections.items.len);
 
     for (seg.sections.items) |sect| {
-        log.debug("parsing section '{s},{s}'", .{ parseName(&sect.segname), parseName(&sect.sectname) });
+        log.debug("parsing section '{s},{s}'", .{ segmentName(sect), sectionName(sect) });
         // Read sections' code
         var code = try self.allocator.alloc(u8, @intCast(usize, sect.size));
         _ = try self.file.?.preadAll(code, sect.offset);
@@ -381,47 +343,91 @@ pub fn parseSections(self: *Object) !void {
     }
 }
 
-fn cmpNlist(_: void, lhs: macho.nlist_64, rhs: macho.nlist_64) bool {
-    return lhs.n_value < rhs.n_value;
-}
+pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
+    const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
 
-fn filterSymsInSection(symbols: []macho.nlist_64, sect_id: u8) []macho.nlist_64 {
-    var start: usize = 0;
-    var end: usize = symbols.len;
+    log.warn("analysing {s}", .{self.name.?});
 
-    while (true) {
-        var change = false;
-        if (symbols[start].n_sect != sect_id) {
-            start += 1;
-            change = true;
+    const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+
+    const SymWithIndex = struct {
+        nlist: macho.nlist_64,
+        index: u32,
+
+        pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool {
+            return lhs.nlist.n_value < rhs.nlist.n_value;
         }
-        if (symbols[end - 1].n_sect != sect_id) {
-            end -= 1;
-            change = true;
+
+        fn filterSymsInSection(symbols: []@This(), sect_id: u8) []@This() {
+            var start: usize = 0;
+            var end: usize = symbols.len;
+
+            while (true) {
+                var change = false;
+                if (symbols[start].nlist.n_sect != sect_id) {
+                    start += 1;
+                    change = true;
+                }
+                if (symbols[end - 1].nlist.n_sect != sect_id) {
+                    end -= 1;
+                    change = true;
+                }
+
+                if (start == end) break;
+                if (!change) break;
+            }
+
+            return symbols[start..end];
         }
 
-        if (start == end) break;
-        if (!change) break;
-    }
+        fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info {
+            if (relocs.len == 0) return relocs;
 
-    return symbols[start..end];
-}
+            var start_id: usize = 0;
+            var end_id: usize = relocs.len;
 
-pub fn parseDummy(self: *Object) !void {
-    const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
+            while (true) {
+                var change = false;
+                if (relocs[start_id].r_address > end) {
+                    start_id += 1;
+                    change = true;
+                }
+                if (relocs[end_id - 1].r_address < start) {
+                    end_id -= 1;
+                    change = true;
+                }
 
-    log.warn("analysing {s}", .{self.name.?});
+                if (start_id == end_id) break;
+                if (!change) break;
+            }
 
-    const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+            return relocs[start_id..end_id];
+        }
+    };
+
+    const nlists = self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym];
 
-    var sorted_syms = std.ArrayList(macho.nlist_64).init(self.allocator);
+    var sorted_syms = std.ArrayList(SymWithIndex).init(self.allocator);
     defer sorted_syms.deinit();
-    try sorted_syms.appendSlice(self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]);
+    try sorted_syms.ensureTotalCapacity(nlists.len);
 
-    std.sort.sort(macho.nlist_64, sorted_syms.items, {}, cmpNlist);
+    for (nlists) |nlist, index| {
+        sorted_syms.appendAssumeCapacity(.{
+            .nlist = nlist,
+            .index = @intCast(u32, index + dysymtab.ilocalsym),
+        });
+    }
+
+    std.sort.sort(SymWithIndex, sorted_syms.items, {}, SymWithIndex.cmp);
 
     for (seg.sections.items) |sect, sect_id| {
-        log.warn("section {s},{s}", .{ parseName(&sect.segname), parseName(&sect.sectname) });
+        log.warn("section {s},{s}", .{ segmentName(sect), sectionName(sect) });
+
+        const match = (try zld.getMatchingSection(sect)) orelse {
+            log.warn("unhandled section", .{});
+            continue;
+        };
+
         // Read code
         var code = try self.allocator.alloc(u8, @intCast(usize, sect.size));
         defer self.allocator.free(code);
@@ -431,16 +437,25 @@ pub fn parseDummy(self: *Object) !void {
         const raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc);
         defer self.allocator.free(raw_relocs);
         _ = try self.file.?.preadAll(raw_relocs, sect.reloff);
+        const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
 
-        const relocs = try reloc.parse(
-            self.allocator,
-            self.arch.?,
-            code,
-            mem.bytesAsSlice(macho.relocation_info, raw_relocs),
-        );
+        const alignment = sect.@"align";
 
         if (self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) {
-            const syms = filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1));
+            const syms = SymWithIndex.filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1));
+
+            if (syms.len == 0) {
+                // One large text block referenced by section offsets only
+                log.warn("TextBlock", .{});
+                log.warn("  | referenced by section offsets", .{});
+                log.warn("  | start_addr = {}", .{sect.addr});
+                log.warn("  | end_addr = {}", .{sect.size});
+                log.warn("  | size = {}", .{sect.size});
+                log.warn("  | alignment = 0x{x}", .{alignment});
+                log.warn("  | segment_id = {}", .{match.seg});
+                log.warn("  | section_id = {}", .{match.sect});
+                log.warn("  | relocs: {any}", .{relocs});
+            }
 
             var indices = std.ArrayList(u32).init(self.allocator);
             defer indices.deinit();
@@ -450,32 +465,35 @@ pub fn parseDummy(self: *Object) !void {
                 const curr = syms[i];
                 try indices.append(i);
 
-                const next: ?macho.nlist_64 = if (i + 1 < syms.len)
+                const next: ?SymWithIndex = if (i + 1 < syms.len)
                     syms[i + 1]
                 else
                     null;
 
                 if (next) |n| {
-                    if (curr.n_value == n.n_value) {
+                    if (curr.nlist.n_value == n.nlist.n_value) {
                         continue;
                     }
                 }
 
-                const start_addr = curr.n_value - sect.addr;
-                const end_addr = if (next) |n| n.n_value - sect.addr else sect.size;
-                const alignment = sect.@"align";
+                const start_addr = curr.nlist.n_value - sect.addr;
+                const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size;
 
                 const tb_code = code[start_addr..end_addr];
                 const size = tb_code.len;
 
                 log.warn("TextBlock", .{});
                 for (indices.items) |id| {
-                    log.warn("  | symbol {s}", .{self.getString(syms[id].n_strx)});
+                    const sym = self.symbols.items[syms[id].index];
+                    log.warn("  | symbol = {s}", .{sym.name});
                 }
-                log.warn("  | start_addr = 0x{x}", .{start_addr});
-                log.warn("  | end_addr = 0x{x}", .{end_addr});
+                log.warn("  | start_addr = {}", .{start_addr});
+                log.warn("  | end_addr = {}", .{end_addr});
                 log.warn("  | size = {}", .{size});
                 log.warn("  | alignment = 0x{x}", .{alignment});
+                log.warn("  | segment_id = {}", .{match.seg});
+                log.warn("  | section_id = {}", .{match.sect});
+                log.warn("  | relocs: {any}", .{SymWithIndex.filterRelocs(relocs, start_addr, end_addr)});
 
                 indices.clearRetainingCapacity();
             }
src/link/MachO/Zld.zig
@@ -234,6 +234,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg
     try self.parseInputFiles(files, args.syslibroot);
     try self.parseLibs(args.libs, args.syslibroot);
     try self.resolveSymbols();
+    try self.parseTextBlocks();
     try self.resolveStubsAndGotEntries();
     try self.updateMetadata();
     try self.sortSections();
@@ -322,10 +323,10 @@ fn mapAndUpdateSections(
 
     log.debug("{s}: '{s},{s}' mapped to '{s},{s}' from 0x{x} to 0x{x}", .{
         object.name.?,
-        parseName(&source_sect.inner.segname),
-        parseName(&source_sect.inner.sectname),
-        parseName(&target_sect.segname),
-        parseName(&target_sect.sectname),
+        segmentName(source_sect.inner),
+        sectionName(source_sect.inner),
+        segmentName(target_sect.*),
+        sectionName(target_sect.*),
         offset,
         offset + size,
     });
@@ -343,12 +344,12 @@ fn updateMetadata(self: *Zld) !void {
     for (self.objects.items) |object| {
         // Find ideal section alignment and update section mappings
         for (object.sections.items) |sect, sect_id| {
-            const match = (try self.getMatchingSection(sect)) orelse {
+            const match = (try self.getMatchingSection(sect.inner)) orelse {
                 log.debug("{s}: unhandled section type 0x{x} for '{s},{s}'", .{
                     object.name.?,
-                    sect.flags(),
-                    sect.segname(),
-                    sect.sectname(),
+                    sect.inner.flags,
+                    segmentName(sect.inner),
+                    sectionName(sect.inner),
                 });
                 continue;
             };
@@ -441,15 +442,15 @@ const MatchingSection = struct {
     sect: u16,
 };
 
-fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
+pub fn getMatchingSection(self: *Zld, sect: macho.section_64) !?MatchingSection {
     const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
     const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-    const segname = sect.segname();
-    const sectname = sect.sectname();
+    const segname = segmentName(sect);
+    const sectname = sectionName(sect);
 
     const res: ?MatchingSection = blk: {
-        switch (sect.sectionType()) {
+        switch (sectionType(sect)) {
             macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
                 if (self.text_const_section_index == null) {
                     self.text_const_section_index = @intCast(u16, text_seg.sections.items.len);
@@ -649,7 +650,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
                 };
             },
             macho.S_REGULAR => {
-                if (sect.isCode()) {
+                if (sectionIsCode(sect)) {
                     if (self.text_section_index == null) {
                         self.text_section_index = @intCast(u16, text_seg.sections.items.len);
                         try text_seg.addSection(self.allocator, "__text", .{
@@ -662,11 +663,11 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
                         .sect = self.text_section_index.?,
                     };
                 }
-                if (sect.isDebug()) {
+                if (sectionIsDebug(sect)) {
                     // TODO debug attributes
                     if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) {
                         log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{
-                            sect.flags(), segname, sectname,
+                            sect.flags, segname, sectname,
                         });
                     }
                     break :blk null;
@@ -829,7 +830,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
 
                 if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) {
                     log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{
-                        sect.flags(), segname, sectname,
+                        sect.flags, segname, sectname,
                     });
                 }
 
@@ -956,8 +957,8 @@ fn sortSections(self: *Zld) !void {
 
             log.debug("remapping in {s}: '{s},{s}': {} => {}", .{
                 object.name.?,
-                parseName(&sect.inner.segname),
-                parseName(&sect.inner.sectname),
+                segmentName(sect.inner),
+                sectionName(sect.inner),
                 target_map.section_id,
                 new_index,
             });
@@ -1086,8 +1087,8 @@ fn allocateSymbol(self: *Zld, symbol: *Symbol) !void {
     const source_sect = &object.sections.items[reg.section];
     const target_map = source_sect.target_map orelse {
         log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{
-            parseName(&source_sect.inner.segname),
-            parseName(&source_sect.inner.sectname),
+            segmentName(source_sect.inner),
+            sectionName(source_sect.inner),
             symbol.name,
         });
         return;
@@ -1464,7 +1465,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void {
 fn resolveSymbolsInObject(self: *Zld, object: *Object) !void {
     log.debug("resolving symbols in '{s}'", .{object.name});
 
-    for (object.symtab.items) |sym| {
+    for (object.symtab.items) |sym, sym_id| {
         const sym_name = object.getString(sym.n_strx);
 
         if (Symbol.isStab(sym)) {
@@ -1497,6 +1498,7 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void {
                     .file = object,
                 },
             };
+            const index = @intCast(u32, self.locals.items.len);
             try self.locals.append(self.allocator, symbol);
             try object.symbols.append(self.allocator, symbol);
             continue;
@@ -1665,6 +1667,12 @@ fn resolveSymbols(self: *Zld) !void {
     if (has_undefined) return error.UndefinedSymbolReference;
 }
 
+fn parseTextBlocks(self: *Zld) !void {
+    for (self.objects.items) |object| {
+        try object.parseTextBlocks(self);
+    }
+}
+
 fn resolveStubsAndGotEntries(self: *Zld) !void {
     for (self.objects.items) |object| {
         log.debug("resolving stubs and got entries from {s}", .{object.name});
@@ -1718,11 +1726,11 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
         log.debug("relocating object {s}", .{object.name});
 
         for (object.sections.items) |sect| {
-            if (sect.inner.flags == macho.S_MOD_INIT_FUNC_POINTERS or
-                sect.inner.flags == macho.S_MOD_TERM_FUNC_POINTERS) continue;
+            if (sectionType(sect.inner) == macho.S_MOD_INIT_FUNC_POINTERS or
+                sectionType(sect.inner) == macho.S_MOD_TERM_FUNC_POINTERS) continue;
 
-            const segname = parseName(&sect.inner.segname);
-            const sectname = parseName(&sect.inner.sectname);
+            const segname = segmentName(sect.inner);
+            const sectname = sectionName(sect.inner);
 
             log.debug("relocating section '{s},{s}'", .{ segname, sectname });
 
@@ -1759,7 +1767,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
                                 args.source_target_sect_addr = source_sect.inner.addr;
                             }
 
-                            const flags = @truncate(u8, target_sect.flags & 0xff);
+                            const sect_type = sectionType(target_sect);
                             const should_rebase = rebase: {
                                 if (!unsigned.is_64bit) break :rebase false;
 
@@ -1780,8 +1788,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
                                 };
 
                                 if (!is_right_segment) break :rebase false;
-                                if (flags != macho.S_LITERAL_POINTERS and
-                                    flags != macho.S_REGULAR)
+                                if (sect_type != macho.S_LITERAL_POINTERS and
+                                    sect_type != macho.S_REGULAR)
                                 {
                                     break :rebase false;
                                 }
@@ -1804,7 +1812,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
 
                             // TLV is handled via a separate offset mechanism.
                             // Calculate the offset to the initializer.
-                            if (flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
+                            if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
                                 // TODO we don't want to save offset to tlv_bootstrap
                                 if (mem.eql(u8, object.symbols.items[rel.target.symbol].name, "__tlv_bootstrap")) break :tlv;
 
@@ -1858,13 +1866,13 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
                 target_sect_off + sect.code.len,
             });
 
-            if (target_sect.flags == macho.S_ZEROFILL or
-                target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or
-                target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES)
+            if (sectionType(target_sect) == macho.S_ZEROFILL or
+                sectionType(target_sect) == macho.S_THREAD_LOCAL_ZEROFILL or
+                sectionType(target_sect) == macho.S_THREAD_LOCAL_VARIABLES)
             {
                 log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{
-                    parseName(&target_sect.segname),
-                    parseName(&target_sect.sectname),
+                    segmentName(target_sect),
+                    sectionName(target_sect),
                     target_sect_off,
                     target_sect_off + sect.code.len,
                 });
@@ -1926,8 +1934,8 @@ fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.T
                 log.debug("    | section offset", .{});
                 const source_sect = object.sections.items[sect_id];
                 log.debug("    | section '{s},{s}'", .{
-                    parseName(&source_sect.inner.segname),
-                    parseName(&source_sect.inner.sectname),
+                    segmentName(source_sect.inner),
+                    sectionName(source_sect.inner),
                 });
                 const target_map = source_sect.target_map orelse unreachable;
                 const target_seg = self.load_commands.items[target_map.segment_id].Segment;
@@ -2999,8 +3007,3 @@ fn writeHeader(self: *Zld) !void {
 
     try self.file.?.pwriteAll(mem.asBytes(&header), 0);
 }
-
-pub fn parseName(name: *const [16]u8) []const u8 {
-    const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
-    return name[0..len];
-}