Commit b0105029ca
Changed files (3)
src
link
src/link/MachO/Archive.zig
@@ -9,16 +9,14 @@ const mem = std.mem;
const Allocator = mem.Allocator;
const Object = @import("Object.zig");
-const parseName = @import("Zld.zig").parseName;
usingnamespace @import("commands.zig");
allocator: *Allocator,
-file: fs.File,
-header: ar_hdr,
-name: []u8,
-
-objects: std.ArrayListUnmanaged(Object) = .{},
+arch: ?std.Target.Cpu.Arch = null,
+file: ?fs.File = null,
+header: ?ar_hdr = null,
+name: ?[]u8 = null,
/// Parsed table of contents.
/// Each symbol name points to a list of all definition
@@ -29,14 +27,14 @@ toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{},
// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
// member indicates, for each member file.
/// String that begins an archive file.
-const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
+pub const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
/// Size of that string.
-const SARMAG: u4 = 8;
+pub const SARMAG: u4 = 8;
/// String in ar_fmag at the end of each header.
-const ARFMAG: *const [2:0]u8 = "`\n";
+pub const ARFMAG: *const [2:0]u8 = "`\n";
-const ar_hdr = extern struct {
+pub const ar_hdr = extern struct {
/// Member file name, sometimes / terminated.
ar_name: [16]u8,
@@ -87,64 +85,91 @@ const ar_hdr = extern struct {
}
};
+pub fn init(allocator: *Allocator) Archive {
+ return .{
+ .allocator = allocator,
+ };
+}
+
pub fn deinit(self: *Archive) void {
- self.allocator.free(self.name);
- for (self.objects.items) |*object| {
- object.deinit();
- }
- self.objects.deinit(self.allocator);
for (self.toc.items()) |*entry| {
self.allocator.free(entry.key);
entry.value.deinit(self.allocator);
}
self.toc.deinit(self.allocator);
+
+ if (self.name) |n| {
+ self.allocator.free(n);
+ }
}
-/// Caller owns the returned Archive instance and is responsible for calling
-/// `deinit` to free allocated memory.
-pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, ar_name: []const u8, file: fs.File) !Archive {
- var reader = file.reader();
- var magic = try readMagic(allocator, reader);
- defer allocator.free(magic);
-
- if (!mem.eql(u8, magic, ARMAG)) {
- // Reset file cursor.
- try file.seekTo(0);
- return error.NotArchive;
+pub fn closeFile(self: Archive) void {
+ if (self.file) |f| {
+ f.close();
}
+}
- const header = try reader.readStruct(ar_hdr);
+pub fn parse(self: *Archive) !void {
+ var reader = self.file.?.reader();
+ const magic = try reader.readBytesNoEof(SARMAG);
- if (!mem.eql(u8, &header.ar_fmag, ARFMAG))
+ if (!mem.eql(u8, &magic, ARMAG)) {
+ log.err("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic });
return error.MalformedArchive;
+ }
- var embedded_name = try getName(allocator, header, reader);
- log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, ar_name });
- defer allocator.free(embedded_name);
+ self.header = try reader.readStruct(ar_hdr);
- var name = try allocator.dupe(u8, ar_name);
- var self = Archive{
- .allocator = allocator,
- .file = file,
- .header = header,
- .name = name,
- };
+ if (!mem.eql(u8, &self.header.?.ar_fmag, ARFMAG)) {
+ log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.?.ar_fmag });
+ return error.MalformedArchive;
+ }
+
+ var embedded_name = try parseName(self.allocator, self.header.?, reader);
+ log.warn("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name.? });
+ defer self.allocator.free(embedded_name);
try self.parseTableOfContents(reader);
- return self;
+ try reader.context.seekTo(0);
+}
+
+fn parseName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
+ const name_or_length = try header.nameOrLength();
+ var name: []u8 = undefined;
+ switch (name_or_length) {
+ .Name => |n| {
+ name = try allocator.dupe(u8, n);
+ },
+ .Length => |len| {
+ var n = try allocator.alloc(u8, len);
+ defer allocator.free(n);
+ try reader.readNoEof(n);
+ const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
+ name = try allocator.dupe(u8, n[0..actual_len]);
+ },
+ }
+ return name;
}
fn parseTableOfContents(self: *Archive, reader: anytype) !void {
const symtab_size = try reader.readIntLittle(u32);
var symtab = try self.allocator.alloc(u8, symtab_size);
defer self.allocator.free(symtab);
- try reader.readNoEof(symtab);
+
+ reader.readNoEof(symtab) catch {
+ log.err("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size});
+ return error.MalformedArchive;
+ };
const strtab_size = try reader.readIntLittle(u32);
var strtab = try self.allocator.alloc(u8, strtab_size);
defer self.allocator.free(strtab);
- try reader.readNoEof(strtab);
+
+ reader.readNoEof(strtab) catch {
+ log.err("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size});
+ return error.MalformedArchive;
+ };
var symtab_stream = std.io.fixedBufferStream(symtab);
var symtab_reader = symtab_stream.reader();
@@ -169,85 +194,29 @@ fn parseTableOfContents(self: *Archive, reader: anytype) !void {
}
}
-fn readObject(self: *Archive, arch: std.Target.Cpu.Arch, ar_name: []const u8, reader: anytype) !void {
+/// Caller owns the Object instance.
+pub fn parseObject(self: Archive, offset: u32) !Object {
+ var reader = self.file.?.reader();
+ try reader.context.seekTo(offset);
+
const object_header = try reader.readStruct(ar_hdr);
- if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG))
+ if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG)) {
+ log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, object_header.ar_fmag });
return error.MalformedArchive;
-
- var object_name = try getName(self.allocator, object_header, reader);
- log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name });
-
- const offset = @intCast(u32, try reader.context.getPos());
- const header = try reader.readStruct(macho.mach_header_64);
-
- const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
- macho.CPU_TYPE_ARM64 => .aarch64,
- macho.CPU_TYPE_X86_64 => .x86_64,
- else => |value| {
- log.err("unsupported cpu architecture 0x{x}", .{value});
- return error.UnsupportedCpuArchitecture;
- },
- };
- if (this_arch != arch) {
- log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
- return error.MismatchedCpuArchitecture;
}
- // TODO Implement std.fs.File.clone() or similar.
- var new_file = try fs.cwd().openFile(ar_name, .{});
- var object = Object{
- .allocator = self.allocator,
- .name = object_name,
- .ar_name = try mem.dupe(self.allocator, u8, ar_name),
- .file = new_file,
- .header = header,
- };
-
- try object.readLoadCommands(reader, .{ .offset = offset });
-
- if (object.symtab_cmd_index != null) {
- try object.readSymtab();
- try object.readStrtab();
- }
+ const object_name = try parseName(self.allocator, object_header, reader);
+ log.warn("extracting object '{s}' from archive '{s}'", .{ object_name, self.name.? });
- if (object.data_in_code_cmd_index != null) try object.readDataInCode();
-
- log.debug("\n\n", .{});
- log.debug("{s} defines symbols", .{object.name});
- for (object.symtab.items) |sym| {
- const symname = object.getString(sym.n_strx);
- log.debug("'{s}': {}", .{ symname, sym });
- }
-
- try self.objects.append(self.allocator, object);
-}
+ var object = Object.init(self.allocator);
+ object.arch = self.arch.?;
+ object.file = try fs.cwd().openFile(self.name.?, .{});
+ object.name = object_name;
+ object.file_offset = @intCast(u32, try reader.context.getPos());
+ try object.parse();
-fn readMagic(allocator: *Allocator, reader: anytype) ![]u8 {
- var magic = std.ArrayList(u8).init(allocator);
- try magic.ensureCapacity(SARMAG);
- var i: usize = 0;
- while (i < SARMAG) : (i += 1) {
- const next = try reader.readByte();
- magic.appendAssumeCapacity(next);
- }
- return magic.toOwnedSlice();
-}
+ try reader.context.seekTo(0);
-fn getName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
- const name_or_length = try header.nameOrLength();
- var name: []u8 = undefined;
- switch (name_or_length) {
- .Name => |n| {
- name = try allocator.dupe(u8, n);
- },
- .Length => |len| {
- var n = try allocator.alloc(u8, len);
- defer allocator.free(n);
- try reader.readNoEof(n);
- const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
- name = try allocator.dupe(u8, n[0..actual_len]);
- },
- }
- return name;
+ return object;
}
src/link/MachO/Object.zig
@@ -15,11 +15,11 @@ const parseName = @import("Zld.zig").parseName;
usingnamespace @import("commands.zig");
allocator: *Allocator,
-file: fs.File,
-name: []u8,
-ar_name: ?[]u8 = null,
-
-header: macho.mach_header_64,
+arch: ?std.Target.Cpu.Arch = null,
+header: ?macho.mach_header_64 = null,
+file: ?fs.File = null,
+file_offset: ?u32 = null,
+name: ?[]u8 = null,
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
@@ -42,6 +42,12 @@ strtab: std.ArrayListUnmanaged(u8) = .{},
data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
+pub fn init(allocator: *Allocator) Object {
+ return .{
+ .allocator = allocator,
+ };
+}
+
pub fn deinit(self: *Object) void {
for (self.load_commands.items) |*lc| {
lc.deinit(self.allocator);
@@ -50,25 +56,32 @@ pub fn deinit(self: *Object) void {
self.symtab.deinit(self.allocator);
self.strtab.deinit(self.allocator);
self.data_in_code_entries.deinit(self.allocator);
- self.allocator.free(self.name);
- if (self.ar_name) |v| {
- self.allocator.free(v);
+
+ if (self.name) |n| {
+ self.allocator.free(n);
}
}
-/// Caller owns the returned Object instance and is responsible for calling
-/// `deinit` to free allocated memory.
-pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []const u8, file: fs.File) !Object {
- var reader = file.reader();
- const header = try reader.readStruct(macho.mach_header_64);
+pub fn closeFile(self: Object) void {
+ if (self.file) |f| {
+ f.close();
+ }
+}
- if (header.filetype != macho.MH_OBJECT) {
- // Reset file cursor.
- try file.seekTo(0);
- return error.NotObject;
+pub fn parse(self: *Object) !void {
+ var reader = self.file.?.reader();
+ if (self.file_offset) |offset| {
+ try reader.context.seekTo(offset);
}
- const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
+ self.header = try reader.readStruct(macho.mach_header_64);
+
+ if (self.header.?.filetype != macho.MH_OBJECT) {
+ log.err("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, self.header.?.filetype });
+ return error.MalformedObject;
+ }
+
+ const this_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) {
macho.CPU_TYPE_ARM64 => .aarch64,
macho.CPU_TYPE_X86_64 => .x86_64,
else => |value| {
@@ -76,35 +89,22 @@ pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []co
return error.UnsupportedCpuArchitecture;
},
};
- if (this_arch != arch) {
- log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
+ if (this_arch != self.arch.?) {
+ log.err("mismatched cpu architecture: expected {s}, found {s}", .{ self.arch.?, this_arch });
return error.MismatchedCpuArchitecture;
}
- var self = Object{
- .allocator = allocator,
- .name = try allocator.dupe(u8, name),
- .file = file,
- .header = header,
- };
-
- try self.readLoadCommands(reader, .{});
+ try self.readLoadCommands(reader);
if (self.symtab_cmd_index != null) try self.parseSymtab();
if (self.data_in_code_cmd_index != null) try self.readDataInCode();
-
- return self;
}
-pub const ReadOffset = struct {
- offset: ?u32 = null,
-};
-
-pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !void {
- const offset_mod = offset.offset orelse 0;
- try self.load_commands.ensureCapacity(self.allocator, self.header.ncmds);
+pub fn readLoadCommands(self: *Object, reader: anytype) !void {
+ const offset = self.file_offset orelse 0;
+ try self.load_commands.ensureCapacity(self.allocator, self.header.?.ncmds);
var i: u16 = 0;
- while (i < self.header.ncmds) : (i += 1) {
+ while (i < self.header.?.ncmds) : (i += 1) {
var cmd = try LoadCommand.read(self.allocator, reader);
switch (cmd.cmd()) {
macho.LC_SEGMENT_64 => {
@@ -132,17 +132,18 @@ pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !voi
}
}
- sect.offset += offset_mod;
- if (sect.reloff > 0)
- sect.reloff += offset_mod;
+ sect.offset += offset;
+ if (sect.reloff > 0) {
+ sect.reloff += offset;
+ }
}
- seg.inner.fileoff += offset_mod;
+ seg.inner.fileoff += offset;
},
macho.LC_SYMTAB => {
self.symtab_cmd_index = i;
- cmd.Symtab.symoff += offset_mod;
- cmd.Symtab.stroff += offset_mod;
+ cmd.Symtab.symoff += offset;
+ cmd.Symtab.stroff += offset;
},
macho.LC_DYSYMTAB => {
self.dysymtab_cmd_index = i;
@@ -152,7 +153,7 @@ pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !voi
},
macho.LC_DATA_IN_CODE => {
self.data_in_code_cmd_index = i;
- cmd.LinkeditData.dataoff += offset_mod;
+ cmd.LinkeditData.dataoff += offset;
},
else => {
log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
@@ -168,7 +169,7 @@ pub fn parseSymtab(self: *Object) !void {
var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
defer self.allocator.free(symtab);
- _ = try self.file.preadAll(symtab, symtab_cmd.symoff);
+ _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff);
try self.symtab.ensureCapacity(self.allocator, symtab_cmd.nsyms);
var stream = std.io.fixedBufferStream(symtab);
@@ -187,7 +188,7 @@ pub fn parseSymtab(self: *Object) !void {
var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
defer self.allocator.free(strtab);
- _ = try self.file.preadAll(strtab, symtab_cmd.stroff);
+ _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff);
try self.strtab.appendSlice(self.allocator, strtab);
}
@@ -200,7 +201,7 @@ pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
const sect = seg.sections.items[index];
var buffer = try allocator.alloc(u8, sect.size);
- _ = try self.file.preadAll(buffer, sect.offset);
+ _ = try self.file.?.preadAll(buffer, sect.offset);
return buffer;
}
@@ -211,7 +212,7 @@ pub fn readDataInCode(self: *Object) !void {
var buffer = try self.allocator.alloc(u8, data_in_code.datasize);
defer self.allocator.free(buffer);
- _ = try self.file.preadAll(buffer, data_in_code.dataoff);
+ _ = try self.file.?.preadAll(buffer, data_in_code.dataoff);
var stream = io.fixedBufferStream(buffer);
var reader = stream.reader();
src/link/MachO/Zld.zig
@@ -225,14 +225,14 @@ pub fn deinit(self: *Zld) void {
self.undefs.deinit(self.allocator);
}
-pub fn closeFiles(self: *Zld) void {
- for (self.objects.items) |*object| {
- object.file.close();
+pub fn closeFiles(self: Zld) void {
+ for (self.objects.items) |object| {
+ object.closeFile();
}
- for (self.archives.items) |*archive| {
- archive.file.close();
+ for (self.archives.items) |archive| {
+ archive.closeFile();
}
- if (self.file) |*f| f.close();
+ if (self.file) |f| f.close();
}
pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
@@ -272,6 +272,7 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
try self.populateMetadata();
try self.parseInputFiles(files);
+ try self.resolveSymbols();
self.printSymtab();
// try self.sortSections();
// try self.allocateTextSegment();
@@ -284,31 +285,76 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
}
fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
+ const Input = struct {
+ kind: enum {
+ object,
+ archive,
+ },
+ file: fs.File,
+ name: []const u8,
+ };
+ var classified = std.ArrayList(Input).init(self.allocator);
+ defer classified.deinit();
+
+ // First, classify input files as either object or archive.
for (files) |file_name| {
const file = try fs.cwd().openFile(file_name, .{});
try_object: {
- var object = Object.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
- error.NotObject => break :try_object,
- else => |e| return e,
- };
- const index = @intCast(u16, self.objects.items.len);
- try self.objects.append(self.allocator, object);
- try self.resolveSymbols(index);
+ const header = try file.reader().readStruct(macho.mach_header_64);
+ if (header.filetype != macho.MH_OBJECT) {
+ try file.seekTo(0);
+ break :try_object;
+ }
+
+ try file.seekTo(0);
+ try classified.append(.{
+ .kind = .object,
+ .file = file,
+ .name = file_name,
+ });
continue;
}
try_archive: {
- var archive = Archive.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
- error.NotArchive => break :try_archive,
- else => |e| return e,
- };
- try self.archives.append(self.allocator, archive);
+ const magic = try file.reader().readBytesNoEof(Archive.SARMAG);
+ if (!mem.eql(u8, &magic, Archive.ARMAG)) {
+ try file.seekTo(0);
+ break :try_archive;
+ }
+
+ try file.seekTo(0);
+ try classified.append(.{
+ .kind = .archive,
+ .file = file,
+ .name = file_name,
+ });
continue;
}
- log.err("unexpected file type: expected object '.o' or archive '.a': {s}", .{file_name});
- return error.UnexpectedInputFileType;
+ log.warn("unexpected input file of unknown type '{s}'", .{file_name});
+ }
+
+ // Based on our classification, proceed with parsing.
+ for (classified.items) |input| {
+ switch (input.kind) {
+ .object => {
+ var object = Object.init(self.allocator);
+ object.arch = self.arch.?;
+ object.name = try self.allocator.dupe(u8, input.name);
+ object.file = input.file;
+ try object.parse();
+ try self.objects.append(self.allocator, object);
+ },
+ .archive => {
+ var archive = Archive.init(self.allocator);
+ archive.arch = self.arch.?;
+ archive.name = try self.allocator.dupe(u8, input.name);
+ archive.file = input.file;
+ try archive.parse();
+ try self.archives.append(self.allocator, archive);
+ },
+ }
}
}
@@ -1153,7 +1199,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void {
try self.file.?.pwriteAll(code, stub_off);
}
-fn resolveSymbols(self: *Zld, object_id: u16) !void {
+fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void {
const object = self.objects.items[object_id];
log.warn("resolving symbols in '{s}'", .{object.name});
@@ -1200,6 +1246,47 @@ fn resolveSymbols(self: *Zld, object_id: u16) !void {
}
}
+fn resolveSymbols(self: *Zld) !void {
+ // First pass, resolve symbols in provided objects.
+ for (self.objects.items) |object, object_id| {
+ try self.resolveSymbolsInObject(@intCast(u16, object_id));
+ }
+
+ var next: usize = 0;
+ while (true) {
+ var archive = &self.archives.items[next];
+ var hit: bool = false;
+
+ for (self.undefs.items()) |entry| {
+ const sym_name = entry.key;
+
+ // Check if the entry exists in a static archive.
+ const offsets = archive.toc.get(sym_name) orelse {
+ // No hit.
+ continue;
+ };
+ assert(offsets.items.len > 0);
+
+ const object = try archive.parseObject(offsets.items[0]);
+ const object_id = @intCast(u16, self.objects.items.len);
+ try self.objects.append(self.allocator, object);
+ try self.resolveSymbolsInObject(object_id);
+
+ hit = true;
+ break;
+ }
+
+ if (!hit) {
+ // Next archive.
+ next += 1;
+ if (next == self.archives.items.len) {
+ break;
+ }
+ archive = &self.archives.items[next];
+ }
+ }
+}
+
fn doRelocs(self: *Zld) !void {
for (self.objects.items) |object, object_id| {
log.debug("\n\n", .{});