Commit 5b3c4691e6
Changed files (3)
src
link
src/link/MachO/commands.zig
@@ -425,6 +425,44 @@ fn makeStaticString(bytes: []const u8) [16]u8 {
return buf;
}
+fn parseName(name: *const [16]u8) []const u8 {
+ const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
+ return name[0..len];
+}
+
+pub fn segmentName(sect: macho.section_64) []const u8 {
+ return parseName(§.segname);
+}
+
+pub fn sectionName(sect: macho.section_64) []const u8 {
+ return parseName(§.sectname);
+}
+
+pub fn sectionType(sect: macho.section_64) u8 {
+ return @truncate(u8, sect.flags & 0xff);
+}
+
+pub fn sectionAttrs(sect: macho.section_64) u32 {
+ return sect.flags & 0xffffff00;
+}
+
+pub fn sectionIsCode(sect: macho.section_64) bool {
+ const attr = sectionAttrs(sect);
+ return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0;
+}
+
+pub fn sectionIsDebug(sect: macho.section_64) bool {
+ return sectionAttrs(sect) & macho.S_ATTR_DEBUG != 0;
+}
+
+pub fn sectionIsDontDeadStrip(sect: macho.section_64) bool {
+ return sectionAttrs(sect) & macho.S_ATTR_NO_DEAD_STRIP != 0;
+}
+
+pub fn sectionIsDontDeadStripIfReferencesLive(sect: macho.section_64) bool {
+ return sectionAttrs(sect) & macho.S_ATTR_LIVE_SUPPORT != 0;
+}
+
fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void {
var stream = io.fixedBufferStream(buffer);
var given = try LoadCommand.read(allocator, stream.reader());
src/link/MachO/Object.zig
@@ -9,13 +9,13 @@ const log = std.log.scoped(.object);
const macho = std.macho;
const mem = std.mem;
const reloc = @import("reloc.zig");
-const parseName = @import("Zld.zig").parseName;
const Allocator = mem.Allocator;
const Arch = std.Target.Cpu.Arch;
const Relocation = reloc.Relocation;
const Symbol = @import("Symbol.zig");
-const TextBlock = @import("Zld.zig").TextBlock;
+const TextBlock = Zld.TextBlock;
+const Zld = @import("Zld.zig");
usingnamespace @import("commands.zig");
@@ -74,43 +74,6 @@ pub const Section = struct {
allocator.free(relocs);
}
}
-
- pub fn segname(self: Section) []const u8 {
- return parseName(&self.inner.segname);
- }
-
- pub fn sectname(self: Section) []const u8 {
- return parseName(&self.inner.sectname);
- }
-
- pub fn flags(self: Section) u32 {
- return self.inner.flags;
- }
-
- pub fn sectionType(self: Section) u8 {
- return @truncate(u8, self.flags() & 0xff);
- }
-
- pub fn sectionAttrs(self: Section) u32 {
- return self.flags() & 0xffffff00;
- }
-
- pub fn isCode(self: Section) bool {
- const attr = self.sectionAttrs();
- return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0;
- }
-
- pub fn isDebug(self: Section) bool {
- return self.sectionAttrs() & macho.S_ATTR_DEBUG != 0;
- }
-
- pub fn dontDeadStrip(self: Section) bool {
- return self.sectionAttrs() & macho.S_ATTR_NO_DEAD_STRIP != 0;
- }
-
- pub fn dontDeadStripIfReferencesLive(self: Section) bool {
- return self.sectionAttrs() & macho.S_ATTR_LIVE_SUPPORT != 0;
- }
};
const DebugInfo = struct {
@@ -272,7 +235,6 @@ pub fn parse(self: *Object) !void {
try self.parseSymtab();
try self.parseDataInCode();
try self.parseInitializers();
- try self.parseDummy();
}
pub fn readLoadCommands(self: *Object, reader: anytype) !void {
@@ -288,8 +250,8 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void {
var seg = cmd.Segment;
for (seg.sections.items) |*sect, j| {
const index = @intCast(u16, j);
- const segname = parseName(§.segname);
- const sectname = parseName(§.sectname);
+ const segname = segmentName(sect.*);
+ const sectname = sectionName(sect.*);
if (mem.eql(u8, segname, "__DWARF")) {
if (mem.eql(u8, sectname, "__debug_info")) {
self.dwarf_debug_info_index = index;
@@ -351,7 +313,7 @@ pub fn parseSections(self: *Object) !void {
try self.sections.ensureCapacity(self.allocator, seg.sections.items.len);
for (seg.sections.items) |sect| {
- log.debug("parsing section '{s},{s}'", .{ parseName(§.segname), parseName(§.sectname) });
+ log.debug("parsing section '{s},{s}'", .{ segmentName(sect), sectionName(sect) });
// Read sections' code
var code = try self.allocator.alloc(u8, @intCast(usize, sect.size));
_ = try self.file.?.preadAll(code, sect.offset);
@@ -381,47 +343,91 @@ pub fn parseSections(self: *Object) !void {
}
}
-fn cmpNlist(_: void, lhs: macho.nlist_64, rhs: macho.nlist_64) bool {
- return lhs.n_value < rhs.n_value;
-}
+pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
+ const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
-fn filterSymsInSection(symbols: []macho.nlist_64, sect_id: u8) []macho.nlist_64 {
- var start: usize = 0;
- var end: usize = symbols.len;
+ log.warn("analysing {s}", .{self.name.?});
- while (true) {
- var change = false;
- if (symbols[start].n_sect != sect_id) {
- start += 1;
- change = true;
+ const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+
+ const SymWithIndex = struct {
+ nlist: macho.nlist_64,
+ index: u32,
+
+ pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool {
+ return lhs.nlist.n_value < rhs.nlist.n_value;
}
- if (symbols[end - 1].n_sect != sect_id) {
- end -= 1;
- change = true;
+
+ fn filterSymsInSection(symbols: []@This(), sect_id: u8) []@This() {
+ var start: usize = 0;
+ var end: usize = symbols.len;
+
+ while (true) {
+ var change = false;
+ if (symbols[start].nlist.n_sect != sect_id) {
+ start += 1;
+ change = true;
+ }
+ if (symbols[end - 1].nlist.n_sect != sect_id) {
+ end -= 1;
+ change = true;
+ }
+
+ if (start == end) break;
+ if (!change) break;
+ }
+
+ return symbols[start..end];
}
- if (start == end) break;
- if (!change) break;
- }
+ fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info {
+ if (relocs.len == 0) return relocs;
- return symbols[start..end];
-}
+ var start_id: usize = 0;
+ var end_id: usize = relocs.len;
-pub fn parseDummy(self: *Object) !void {
- const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
+ while (true) {
+ var change = false;
+ if (relocs[start_id].r_address > end) {
+ start_id += 1;
+ change = true;
+ }
+ if (relocs[end_id - 1].r_address < start) {
+ end_id -= 1;
+ change = true;
+ }
- log.warn("analysing {s}", .{self.name.?});
+ if (start_id == end_id) break;
+ if (!change) break;
+ }
- const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+ return relocs[start_id..end_id];
+ }
+ };
+
+ const nlists = self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym];
- var sorted_syms = std.ArrayList(macho.nlist_64).init(self.allocator);
+ var sorted_syms = std.ArrayList(SymWithIndex).init(self.allocator);
defer sorted_syms.deinit();
- try sorted_syms.appendSlice(self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]);
+ try sorted_syms.ensureTotalCapacity(nlists.len);
- std.sort.sort(macho.nlist_64, sorted_syms.items, {}, cmpNlist);
+ for (nlists) |nlist, index| {
+ sorted_syms.appendAssumeCapacity(.{
+ .nlist = nlist,
+ .index = @intCast(u32, index + dysymtab.ilocalsym),
+ });
+ }
+
+ std.sort.sort(SymWithIndex, sorted_syms.items, {}, SymWithIndex.cmp);
for (seg.sections.items) |sect, sect_id| {
- log.warn("section {s},{s}", .{ parseName(§.segname), parseName(§.sectname) });
+ log.warn("section {s},{s}", .{ segmentName(sect), sectionName(sect) });
+
+ const match = (try zld.getMatchingSection(sect)) orelse {
+ log.warn("unhandled section", .{});
+ continue;
+ };
+
// Read code
var code = try self.allocator.alloc(u8, @intCast(usize, sect.size));
defer self.allocator.free(code);
@@ -431,16 +437,25 @@ pub fn parseDummy(self: *Object) !void {
const raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc);
defer self.allocator.free(raw_relocs);
_ = try self.file.?.preadAll(raw_relocs, sect.reloff);
+ const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
- const relocs = try reloc.parse(
- self.allocator,
- self.arch.?,
- code,
- mem.bytesAsSlice(macho.relocation_info, raw_relocs),
- );
+ const alignment = sect.@"align";
if (self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) {
- const syms = filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1));
+ const syms = SymWithIndex.filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1));
+
+ if (syms.len == 0) {
+ // One large text block referenced by section offsets only
+ log.warn("TextBlock", .{});
+ log.warn(" | referenced by section offsets", .{});
+ log.warn(" | start_addr = {}", .{sect.addr});
+ log.warn(" | end_addr = {}", .{sect.size});
+ log.warn(" | size = {}", .{sect.size});
+ log.warn(" | alignment = 0x{x}", .{alignment});
+ log.warn(" | segment_id = {}", .{match.seg});
+ log.warn(" | section_id = {}", .{match.sect});
+ log.warn(" | relocs: {any}", .{relocs});
+ }
var indices = std.ArrayList(u32).init(self.allocator);
defer indices.deinit();
@@ -450,32 +465,35 @@ pub fn parseDummy(self: *Object) !void {
const curr = syms[i];
try indices.append(i);
- const next: ?macho.nlist_64 = if (i + 1 < syms.len)
+ const next: ?SymWithIndex = if (i + 1 < syms.len)
syms[i + 1]
else
null;
if (next) |n| {
- if (curr.n_value == n.n_value) {
+ if (curr.nlist.n_value == n.nlist.n_value) {
continue;
}
}
- const start_addr = curr.n_value - sect.addr;
- const end_addr = if (next) |n| n.n_value - sect.addr else sect.size;
- const alignment = sect.@"align";
+ const start_addr = curr.nlist.n_value - sect.addr;
+ const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size;
const tb_code = code[start_addr..end_addr];
const size = tb_code.len;
log.warn("TextBlock", .{});
for (indices.items) |id| {
- log.warn(" | symbol {s}", .{self.getString(syms[id].n_strx)});
+ const sym = self.symbols.items[syms[id].index];
+ log.warn(" | symbol = {s}", .{sym.name});
}
- log.warn(" | start_addr = 0x{x}", .{start_addr});
- log.warn(" | end_addr = 0x{x}", .{end_addr});
+ log.warn(" | start_addr = {}", .{start_addr});
+ log.warn(" | end_addr = {}", .{end_addr});
log.warn(" | size = {}", .{size});
log.warn(" | alignment = 0x{x}", .{alignment});
+ log.warn(" | segment_id = {}", .{match.seg});
+ log.warn(" | section_id = {}", .{match.sect});
+ log.warn(" | relocs: {any}", .{SymWithIndex.filterRelocs(relocs, start_addr, end_addr)});
indices.clearRetainingCapacity();
}
src/link/MachO/Zld.zig
@@ -234,6 +234,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg
try self.parseInputFiles(files, args.syslibroot);
try self.parseLibs(args.libs, args.syslibroot);
try self.resolveSymbols();
+ try self.parseTextBlocks();
try self.resolveStubsAndGotEntries();
try self.updateMetadata();
try self.sortSections();
@@ -322,10 +323,10 @@ fn mapAndUpdateSections(
log.debug("{s}: '{s},{s}' mapped to '{s},{s}' from 0x{x} to 0x{x}", .{
object.name.?,
- parseName(&source_sect.inner.segname),
- parseName(&source_sect.inner.sectname),
- parseName(&target_sect.segname),
- parseName(&target_sect.sectname),
+ segmentName(source_sect.inner),
+ sectionName(source_sect.inner),
+ segmentName(target_sect.*),
+ sectionName(target_sect.*),
offset,
offset + size,
});
@@ -343,12 +344,12 @@ fn updateMetadata(self: *Zld) !void {
for (self.objects.items) |object| {
// Find ideal section alignment and update section mappings
for (object.sections.items) |sect, sect_id| {
- const match = (try self.getMatchingSection(sect)) orelse {
+ const match = (try self.getMatchingSection(sect.inner)) orelse {
log.debug("{s}: unhandled section type 0x{x} for '{s},{s}'", .{
object.name.?,
- sect.flags(),
- sect.segname(),
- sect.sectname(),
+ sect.inner.flags,
+ segmentName(sect.inner),
+ sectionName(sect.inner),
});
continue;
};
@@ -441,15 +442,15 @@ const MatchingSection = struct {
sect: u16,
};
-fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
+pub fn getMatchingSection(self: *Zld, sect: macho.section_64) !?MatchingSection {
const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
- const segname = sect.segname();
- const sectname = sect.sectname();
+ const segname = segmentName(sect);
+ const sectname = sectionName(sect);
const res: ?MatchingSection = blk: {
- switch (sect.sectionType()) {
+ switch (sectionType(sect)) {
macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
if (self.text_const_section_index == null) {
self.text_const_section_index = @intCast(u16, text_seg.sections.items.len);
@@ -649,7 +650,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
};
},
macho.S_REGULAR => {
- if (sect.isCode()) {
+ if (sectionIsCode(sect)) {
if (self.text_section_index == null) {
self.text_section_index = @intCast(u16, text_seg.sections.items.len);
try text_seg.addSection(self.allocator, "__text", .{
@@ -662,11 +663,11 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
.sect = self.text_section_index.?,
};
}
- if (sect.isDebug()) {
+ if (sectionIsDebug(sect)) {
// TODO debug attributes
if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) {
log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{
- sect.flags(), segname, sectname,
+ sect.flags, segname, sectname,
});
}
break :blk null;
@@ -829,7 +830,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection {
if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) {
log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{
- sect.flags(), segname, sectname,
+ sect.flags, segname, sectname,
});
}
@@ -956,8 +957,8 @@ fn sortSections(self: *Zld) !void {
log.debug("remapping in {s}: '{s},{s}': {} => {}", .{
object.name.?,
- parseName(§.inner.segname),
- parseName(§.inner.sectname),
+ segmentName(sect.inner),
+ sectionName(sect.inner),
target_map.section_id,
new_index,
});
@@ -1086,8 +1087,8 @@ fn allocateSymbol(self: *Zld, symbol: *Symbol) !void {
const source_sect = &object.sections.items[reg.section];
const target_map = source_sect.target_map orelse {
log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{
- parseName(&source_sect.inner.segname),
- parseName(&source_sect.inner.sectname),
+ segmentName(source_sect.inner),
+ sectionName(source_sect.inner),
symbol.name,
});
return;
@@ -1464,7 +1465,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void {
fn resolveSymbolsInObject(self: *Zld, object: *Object) !void {
log.debug("resolving symbols in '{s}'", .{object.name});
- for (object.symtab.items) |sym| {
+ for (object.symtab.items) |sym, sym_id| {
const sym_name = object.getString(sym.n_strx);
if (Symbol.isStab(sym)) {
@@ -1497,6 +1498,7 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void {
.file = object,
},
};
+ const index = @intCast(u32, self.locals.items.len);
try self.locals.append(self.allocator, symbol);
try object.symbols.append(self.allocator, symbol);
continue;
@@ -1665,6 +1667,12 @@ fn resolveSymbols(self: *Zld) !void {
if (has_undefined) return error.UndefinedSymbolReference;
}
+fn parseTextBlocks(self: *Zld) !void {
+ for (self.objects.items) |object| {
+ try object.parseTextBlocks(self);
+ }
+}
+
fn resolveStubsAndGotEntries(self: *Zld) !void {
for (self.objects.items) |object| {
log.debug("resolving stubs and got entries from {s}", .{object.name});
@@ -1718,11 +1726,11 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
log.debug("relocating object {s}", .{object.name});
for (object.sections.items) |sect| {
- if (sect.inner.flags == macho.S_MOD_INIT_FUNC_POINTERS or
- sect.inner.flags == macho.S_MOD_TERM_FUNC_POINTERS) continue;
+ if (sectionType(sect.inner) == macho.S_MOD_INIT_FUNC_POINTERS or
+ sectionType(sect.inner) == macho.S_MOD_TERM_FUNC_POINTERS) continue;
- const segname = parseName(§.inner.segname);
- const sectname = parseName(§.inner.sectname);
+ const segname = segmentName(sect.inner);
+ const sectname = sectionName(sect.inner);
log.debug("relocating section '{s},{s}'", .{ segname, sectname });
@@ -1759,7 +1767,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
args.source_target_sect_addr = source_sect.inner.addr;
}
- const flags = @truncate(u8, target_sect.flags & 0xff);
+ const sect_type = sectionType(target_sect);
const should_rebase = rebase: {
if (!unsigned.is_64bit) break :rebase false;
@@ -1780,8 +1788,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
};
if (!is_right_segment) break :rebase false;
- if (flags != macho.S_LITERAL_POINTERS and
- flags != macho.S_REGULAR)
+ if (sect_type != macho.S_LITERAL_POINTERS and
+ sect_type != macho.S_REGULAR)
{
break :rebase false;
}
@@ -1804,7 +1812,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
// TLV is handled via a separate offset mechanism.
// Calculate the offset to the initializer.
- if (flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
+ if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
// TODO we don't want to save offset to tlv_bootstrap
if (mem.eql(u8, object.symbols.items[rel.target.symbol].name, "__tlv_bootstrap")) break :tlv;
@@ -1858,13 +1866,13 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void {
target_sect_off + sect.code.len,
});
- if (target_sect.flags == macho.S_ZEROFILL or
- target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or
- target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES)
+ if (sectionType(target_sect) == macho.S_ZEROFILL or
+ sectionType(target_sect) == macho.S_THREAD_LOCAL_ZEROFILL or
+ sectionType(target_sect) == macho.S_THREAD_LOCAL_VARIABLES)
{
log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{
- parseName(&target_sect.segname),
- parseName(&target_sect.sectname),
+ segmentName(target_sect),
+ sectionName(target_sect),
target_sect_off,
target_sect_off + sect.code.len,
});
@@ -1926,8 +1934,8 @@ fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.T
log.debug(" | section offset", .{});
const source_sect = object.sections.items[sect_id];
log.debug(" | section '{s},{s}'", .{
- parseName(&source_sect.inner.segname),
- parseName(&source_sect.inner.sectname),
+ segmentName(source_sect.inner),
+ sectionName(source_sect.inner),
});
const target_map = source_sect.target_map orelse unreachable;
const target_seg = self.load_commands.items[target_map.segment_id].Segment;
@@ -2999,8 +3007,3 @@ fn writeHeader(self: *Zld) !void {
try self.file.?.pwriteAll(mem.asBytes(&header), 0);
}
-
-pub fn parseName(name: *const [16]u8) []const u8 {
- const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
- return name[0..len];
-}