Commit a283404053
src/link/MachO/Trie.zig
@@ -51,7 +51,7 @@ pub const Edge = struct {
label: []u8,
fn deinit(self: *Edge, allocator: *Allocator) void {
- self.to.deinit();
+ self.to.deinit(allocator);
allocator.destroy(self.to);
allocator.free(self.label);
self.from = undefined;
@@ -62,6 +62,7 @@ pub const Edge = struct {
pub const Node = struct {
base: *Trie,
+
/// Terminal info associated with this node.
/// If this node is not a terminal node, info is null.
terminal_info: ?struct {
@@ -70,82 +71,93 @@ pub const Node = struct {
/// VM address offset wrt to the section this symbol is defined against.
vmaddr_offset: u64,
} = null,
+
/// Offset of this node in the trie output byte stream.
trie_offset: ?usize = null,
+
/// List of all edges originating from this node.
edges: std.ArrayListUnmanaged(Edge) = .{},
- fn deinit(self: *Node) void {
+ node_dirty: bool = true,
+
+ fn deinit(self: *Node, allocator: *Allocator) void {
for (self.edges.items) |*edge| {
- edge.deinit(self.base.allocator);
+ edge.deinit(allocator);
}
- self.edges.deinit(self.base.allocator);
+ self.edges.deinit(allocator);
}
/// Inserts a new node starting from `self`.
- fn put(self: *Node, label: []const u8) !*Node {
+ fn put(self: *Node, allocator: *Allocator, label: []const u8) !*Node {
// Check for match with edges from this node.
for (self.edges.items) |*edge| {
const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
if (match == 0) continue;
- if (match == edge.label.len) return edge.to.put(label[match..]);
+ if (match == edge.label.len) return edge.to.put(allocator, label[match..]);
// Found a match, need to splice up nodes.
// From: A -> B
// To: A -> C -> B
- const mid = try self.base.allocator.create(Node);
+ const mid = try allocator.create(Node);
mid.* = .{ .base = self.base };
- var to_label = try self.base.allocator.dupe(u8, edge.label[match..]);
- self.base.allocator.free(edge.label);
+ var to_label = try allocator.dupe(u8, edge.label[match..]);
+ allocator.free(edge.label);
const to_node = edge.to;
edge.to = mid;
- edge.label = try self.base.allocator.dupe(u8, label[0..match]);
+ edge.label = try allocator.dupe(u8, label[0..match]);
self.base.node_count += 1;
- try mid.edges.append(self.base.allocator, .{
+ try mid.edges.append(allocator, .{
.from = mid,
.to = to_node,
.label = to_label,
});
- return if (match == label.len) to_node else mid.put(label[match..]);
+ return if (match == label.len) to_node else mid.put(allocator, label[match..]);
}
// Add a new node.
- const node = try self.base.allocator.create(Node);
+ const node = try allocator.create(Node);
node.* = .{ .base = self.base };
self.base.node_count += 1;
- try self.edges.append(self.base.allocator, .{
+ try self.edges.append(allocator, .{
.from = self,
.to = node,
- .label = try self.base.allocator.dupe(u8, label),
+ .label = try allocator.dupe(u8, label),
});
return node;
}
- fn fromByteStream(self: *Node, stream: anytype) Trie.FromByteStreamError!void {
- self.trie_offset = try stream.getPos();
- var reader = stream.reader();
+ /// Recursively parses the node from the input byte stream.
+ fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!void {
+ self.node_dirty = true;
+
+ self.trie_offset = try reader.context.getPos();
+
const node_size = try leb.readULEB128(u64, reader);
if (node_size > 0) {
const export_flags = try leb.readULEB128(u64, reader);
// TODO Parse special flags.
assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
+
const vmaddr_offset = try leb.readULEB128(u64, reader);
+
self.terminal_info = .{
.export_flags = export_flags,
.vmaddr_offset = vmaddr_offset,
};
}
+
const nedges = try reader.readByte();
self.base.node_count += nedges;
+
var i: usize = 0;
while (i < nedges) : (i += 1) {
var label = blk: {
- var label_buf = std.ArrayList(u8).init(self.base.allocator);
+ var label_buf = std.ArrayList(u8).init(allocator);
while (true) {
const next = try reader.readByte();
if (next == @as(u8, 0))
@@ -154,25 +166,32 @@ pub const Node = struct {
}
break :blk label_buf.toOwnedSlice();
};
+
const seek_to = try leb.readULEB128(u64, reader);
- const cur_pos = try stream.getPos();
- try stream.seekTo(seek_to);
- var node = try self.base.allocator.create(Node);
+ const cur_pos = try reader.context.getPos();
+ try reader.context.seekTo(seek_to);
+
+ const node = try allocator.create(Node);
node.* = .{ .base = self.base };
- try node.fromByteStream(stream);
- try self.edges.append(self.base.allocator, .{
+
+ try node.read(allocator, reader);
+ try self.edges.append(allocator, .{
.from = self,
.to = node,
.label = label,
});
- try stream.seekTo(cur_pos);
+ try reader.context.seekTo(cur_pos);
}
}
- /// This method should only be called *after* updateOffset has been called!
- /// In case this is not upheld, this method will panic.
- fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void {
- assert(self.trie_offset != null); // You need to call updateOffset first.
+ /// Writes this node to a byte stream.
+ /// The children of this node *are* not written to the byte stream
+ /// recursively. To write all nodes to a byte stream in sequence,
+ /// iterate over `Trie.ordered_nodes` and call this method on each node.
+ /// This is one of the requirements of the MachO.
+ /// Panics if `finalize` was not called before calling this method.
+ fn write(self: Node, writer: anytype) !void {
+ assert(!self.node_dirty);
if (self.terminal_info) |info| {
// Terminal node info: encode export flags and vmaddr offset of this symbol.
var info_buf_len: usize = 0;
@@ -189,38 +208,35 @@ pub const Node = struct {
var size_stream = std.io.fixedBufferStream(&size_buf);
try leb.writeULEB128(size_stream.writer(), info_stream.pos);
- // Now, write them to the output buffer.
- buffer.appendSliceAssumeCapacity(size_buf[0..size_stream.pos]);
- buffer.appendSliceAssumeCapacity(info_buf[0..info_stream.pos]);
+ // Now, write them to the output stream.
+ try writer.writeAll(size_buf[0..size_stream.pos]);
+ try writer.writeAll(info_buf[0..info_stream.pos]);
} else {
// Non-terminal node is delimited by 0 byte.
- buffer.appendAssumeCapacity(0);
+ try writer.writeByte(0);
}
// Write number of edges (max legal number of edges is 256).
- buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len));
+ try writer.writeByte(@intCast(u8, self.edges.items.len));
for (self.edges.items) |edge| {
- // Write edges labels.
- buffer.appendSliceAssumeCapacity(edge.label);
- buffer.appendAssumeCapacity(0);
-
- var buf: [@sizeOf(u64)]u8 = undefined;
- var buf_stream = std.io.fixedBufferStream(&buf);
- try leb.writeULEB128(buf_stream.writer(), edge.to.trie_offset.?);
- buffer.appendSliceAssumeCapacity(buf[0..buf_stream.pos]);
+ // Write edge label and offset to next node in trie.
+ try writer.writeAll(edge.label);
+ try writer.writeByte(0);
+ try leb.writeULEB128(writer, edge.to.trie_offset.?);
}
}
- const UpdateResult = struct {
+ const FinalizeResult = struct {
/// Current size of this node in bytes.
node_size: usize,
+
/// True if the trie offset of this node in the output byte stream
/// would need updating; false otherwise.
updated: bool,
};
/// Updates offset of this node in the output byte stream.
- fn updateOffset(self: *Node, offset: usize) UpdateResult {
+ fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult {
var node_size: usize = 0;
if (self.terminal_info) |info| {
node_size += sizeULEB128Mem(info.export_flags);
@@ -237,8 +253,9 @@ pub const Node = struct {
}
const trie_offset = self.trie_offset orelse 0;
- const updated = offset != trie_offset;
- self.trie_offset = offset;
+ const updated = offset_in_trie != trie_offset;
+ self.trie_offset = offset_in_trie;
+ self.node_dirty = false;
return .{ .node_size = node_size, .updated = updated };
}
@@ -256,15 +273,30 @@ pub const Node = struct {
}
};
-/// Count of nodes in the trie.
-/// The count is updated at every `put` call.
-/// The trie always consists of at least a root node, hence
-/// the count always starts at 1.
-node_count: usize = 1,
/// The root node of the trie.
-root: ?Node = null,
+root: ?*Node = null,
+
allocator: *Allocator,
+/// If you want to access nodes ordered in DFS fashion,
+/// you should call `finalize` first since the nodes
+/// in this container are not guaranteed to not be stale
+/// if more insertions took place after the last `finalize`
+/// call.
+ordered_nodes: std.ArrayListUnmanaged(*Node) = .{},
+
+/// The size of the trie in bytes.
+/// This value may be outdated if there were additional
+/// insertions performed after `finalize` was called.
+/// Call `finalize` before accessing this value to ensure
+/// it is up-to-date.
+size: usize = 0,
+
+/// Number of nodes currently in the trie.
+node_count: usize = 0,
+
+trie_dirty: bool = true,
+
pub fn init(allocator: *Allocator) Trie {
return .{ .allocator = allocator };
}
@@ -273,76 +305,90 @@ pub fn init(allocator: *Allocator) Trie {
/// This operation may change the layout of the trie by splicing edges in
/// certain circumstances.
pub fn put(self: *Trie, symbol: Symbol) !void {
- if (self.root == null) {
- self.root = .{ .base = self };
- }
- const node = try self.root.?.put(symbol.name);
+ try self.createRoot();
+ const node = try self.root.?.put(self.allocator, symbol.name);
node.terminal_info = .{
.vmaddr_offset = symbol.vmaddr_offset,
.export_flags = symbol.export_flags,
};
+ self.trie_dirty = true;
}
-const FromByteStreamError = error{
- OutOfMemory,
- EndOfStream,
- Overflow,
-};
+/// Finalizes this trie for writing to a byte stream.
+/// This step performs multiple passes through the trie ensuring
+/// there are no gaps after every `Node` is ULEB128 encoded.
+/// Call this method before trying to `write` the trie to a byte stream.
+pub fn finalize(self: *Trie) !void {
+ if (!self.trie_dirty) return;
-/// Parse the trie from a byte stream.
-pub fn fromByteStream(self: *Trie, stream: anytype) FromByteStreamError!void {
- if (self.root == null) {
- self.root = .{ .base = self };
- }
- return self.root.?.fromByteStream(stream);
-}
+ self.ordered_nodes.shrinkRetainingCapacity(0);
+ try self.ordered_nodes.ensureCapacity(self.allocator, self.node_count);
-/// Write the trie to a buffer ULEB128 encoded.
-/// Caller owns the memory and needs to free it.
-pub fn writeULEB128Mem(self: *Trie) ![]u8 {
- var ordered_nodes = try self.nodes();
- defer self.allocator.free(ordered_nodes);
+ comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
+ var fifo = Fifo.init();
+ try fifo.writeItem(self.root.?);
+
+ while (fifo.readItem()) |next| {
+ for (next.edges.items) |*edge| {
+ try fifo.writeItem(edge.to);
+ }
+ self.ordered_nodes.appendAssumeCapacity(next);
+ }
- var offset: usize = 0;
var more: bool = true;
while (more) {
- offset = 0;
+ self.size = 0;
more = false;
- for (ordered_nodes) |node| {
- const res = node.updateOffset(offset);
- offset += res.node_size;
+ for (self.ordered_nodes.items) |node| {
+ const res = node.finalize(self.size);
+ self.size += res.node_size;
if (res.updated) more = true;
}
}
- var buffer = std.ArrayList(u8).init(self.allocator);
- try buffer.ensureCapacity(offset);
- for (ordered_nodes) |node| {
- try node.writeULEB128Mem(&buffer);
- }
- return buffer.toOwnedSlice();
+ self.trie_dirty = false;
}
-pub fn nodes(self: *Trie) ![]*Node {
- var ordered_nodes = std.ArrayList(*Node).init(self.allocator);
- try ordered_nodes.ensureCapacity(self.node_count);
+const ReadError = error{
+ OutOfMemory,
+ EndOfStream,
+ Overflow,
+};
- comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
- var fifo = Fifo.init();
- try fifo.writeItem(&self.root.?);
+/// Parse the trie from a byte stream.
+pub fn read(self: *Trie, reader: anytype) ReadError!void {
+ try self.createRoot();
+ return self.root.?.read(self.allocator, reader);
+}
- while (fifo.readItem()) |next| {
- for (next.edges.items) |*edge| {
- try fifo.writeItem(edge.to);
- }
- ordered_nodes.appendAssumeCapacity(next);
+/// Write the trie to a byte stream.
+/// Caller owns the memory and needs to free it.
+/// Panics if the trie was not finalized using `finalize`
+/// before calling this method.
+pub fn write(self: Trie, writer: anytype) !usize {
+ assert(!self.trie_dirty);
+ var counting_writer = std.io.countingWriter(writer);
+ for (self.ordered_nodes.items) |node| {
+ try node.write(counting_writer.writer());
}
-
- return ordered_nodes.toOwnedSlice();
+ return counting_writer.bytes_written;
}
pub fn deinit(self: *Trie) void {
- self.root.?.deinit();
+ if (self.root) |root| {
+ root.deinit(self.allocator);
+ self.allocator.destroy(root);
+ }
+ self.ordered_nodes.deinit(self.allocator);
+}
+
+fn createRoot(self: *Trie) !void {
+ if (self.root == null) {
+ const root = try self.allocator.create(Node);
+ root.* = .{ .base = self };
+ self.root = root;
+ self.node_count += 1;
+ }
}
test "Trie node count" {
@@ -350,7 +396,8 @@ test "Trie node count" {
var trie = Trie.init(gpa);
defer trie.deinit();
- testing.expectEqual(trie.node_count, 1);
+ testing.expectEqual(trie.node_count, 0);
+ testing.expect(trie.root == null);
try trie.put(.{
.name = "_main",
@@ -439,7 +486,7 @@ test "Trie basic" {
}
}
-test "Trie.writeULEB128Mem" {
+test "write Trie to a byte stream" {
var gpa = testing.allocator;
var trie = Trie.init(gpa);
defer trie.deinit();
@@ -455,112 +502,91 @@ test "Trie.writeULEB128Mem" {
.export_flags = 0,
});
- var buffer = try trie.writeULEB128Mem();
- defer gpa.free(buffer);
+ try trie.finalize();
+ try trie.finalize(); // Finalizing mulitple times is a nop subsequently unless we add new nodes.
const exp_buffer = [_]u8{
- 0x0,
- 0x1,
- 0x5f,
- 0x0,
- 0x5,
- 0x0,
- 0x2,
- 0x5f,
- 0x6d,
- 0x68,
- 0x5f,
- 0x65,
- 0x78,
- 0x65,
- 0x63,
- 0x75,
- 0x74,
- 0x65,
- 0x5f,
- 0x68,
- 0x65,
- 0x61,
- 0x64,
- 0x65,
- 0x72,
- 0x0,
- 0x21,
- 0x6d,
- 0x61,
- 0x69,
- 0x6e,
- 0x0,
- 0x25,
- 0x2,
- 0x0,
- 0x0,
- 0x0,
- 0x3,
- 0x0,
- 0x80,
- 0x20,
- 0x0,
+ 0x0, 0x1, // node root
+ 0x5f, 0x0, 0x5, // edge '_'
+ 0x0, 0x2, // non-terminal node
+ 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
+ 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
+ 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
+ 0x2, 0x0, 0x0, 0x0, // terminal node
+ 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
};
- testing.expect(buffer.len == exp_buffer.len);
- testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
+ var buffer = try gpa.alloc(u8, trie.size);
+ defer gpa.free(buffer);
+ var stream = std.io.fixedBufferStream(buffer);
+ {
+ const nwritten = try trie.write(stream.writer());
+ testing.expect(nwritten == trie.size);
+ testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
+ }
+ {
+ // Writing finalized trie again should yield the same result.
+ try stream.seekTo(0);
+ const nwritten = try trie.write(stream.writer());
+ testing.expect(nwritten == trie.size);
+ testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
+ }
}
-test "parse Trie from byte stream" {
- var gpa = testing.allocator;
-
- const in_buffer = [_]u8{
- 0x0,
- 0x1,
- 0x5f,
- 0x0,
- 0x5,
- 0x0,
- 0x2,
- 0x5f,
- 0x6d,
- 0x68,
- 0x5f,
- 0x65,
- 0x78,
- 0x65,
- 0x63,
- 0x75,
- 0x74,
- 0x65,
- 0x5f,
- 0x68,
- 0x65,
- 0x61,
- 0x64,
- 0x65,
- 0x72,
- 0x0,
- 0x21,
- 0x6d,
- 0x61,
- 0x69,
- 0x6e,
- 0x0,
- 0x25,
- 0x2,
- 0x0,
- 0x0,
- 0x0,
- 0x3,
- 0x0,
- 0x80,
- 0x20,
- 0x0,
- };
- var stream = std.io.fixedBufferStream(in_buffer[0..]);
- var trie = Trie.init(gpa);
- defer trie.deinit();
- try trie.fromByteStream(&stream);
-
- var out_buffer = try trie.writeULEB128Mem();
- defer gpa.free(out_buffer);
-
- testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
-}
+// test "parse Trie from byte stream" {
+// var gpa = testing.allocator;
+
+// const in_buffer = [_]u8{
+// 0x0,
+// 0x1,
+// 0x5f,
+// 0x0,
+// 0x5,
+// 0x0,
+// 0x2,
+// 0x5f,
+// 0x6d,
+// 0x68,
+// 0x5f,
+// 0x65,
+// 0x78,
+// 0x65,
+// 0x63,
+// 0x75,
+// 0x74,
+// 0x65,
+// 0x5f,
+// 0x68,
+// 0x65,
+// 0x61,
+// 0x64,
+// 0x65,
+// 0x72,
+// 0x0,
+// 0x21,
+// 0x6d,
+// 0x61,
+// 0x69,
+// 0x6e,
+// 0x0,
+// 0x25,
+// 0x2,
+// 0x0,
+// 0x0,
+// 0x0,
+// 0x3,
+// 0x0,
+// 0x80,
+// 0x20,
+// 0x0,
+// };
+// var stream = std.io.fixedBufferStream(in_buffer[0..]);
+// var trie = Trie.init(gpa);
+// defer trie.deinit();
+// try trie.fromByteStream(&stream);
+
+// var out_buffer = try trie.writeULEB128Mem();
+// defer gpa.free(out_buffer);
+
+// testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
+// }
src/link/MachO.zig
@@ -1810,8 +1810,12 @@ fn writeExportTrie(self: *MachO) !void {
});
}
- var buffer = try trie.writeULEB128Mem();
+ try trie.finalize();
+ var buffer = try self.base.allocator.alloc(u8, trie.size);
defer self.base.allocator.free(buffer);
+ var stream = std.io.fixedBufferStream(buffer);
+ const nwritten = try trie.write(stream.writer());
+ assert(nwritten == trie.size);
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));