master
  1//! Represents export trie used in MachO executables and dynamic libraries.
  2//! The purpose of an export trie is to encode as compactly as possible all
  3//! export symbols for the loader `dyld`.
  4//! The export trie encodes offset and other information using ULEB128
  5//! encoding, and is part of the __LINKEDIT segment.
  6//!
  7//! Description from loader.h:
  8//!
  9//! The symbols exported by a dylib are encoded in a trie. This is a compact
 10//! representation that factors out common prefixes. It also reduces LINKEDIT pages
 11//! in RAM because it encodes all information (name, address, flags) in one small,
 12//! contiguous range. The export area is a stream of nodes. The first node sequentially
 13//! is the start node for the trie.
 14//!
 15//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol
 16//! information for the string so far. If there is no exported symbol, the node starts
 17//! with a zero byte. If there is exported info, it follows the length.
 18//!
 19//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
 20//! offset which is location of the content named by the symbol from the mach_header
 21//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
 22//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
 23//! is zero length, then the symbol is re-export from the specified dylib with the same name.
 24//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
 25//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
 26//! The resolver is used by lazy pointers and must be called to get the actual address to use.
 27//!
 28//! After the optional exported symbol information is a byte of how many edges (0-255) that
 29//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
 30//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
 31
 32/// The root node of the trie.
 33root: ?Node.Index = null,
 34buffer: std.ArrayList(u8) = .empty,
 35nodes: std.MultiArrayList(Node) = .{},
 36edges: std.ArrayList(Edge) = .empty,
 37
 38/// Insert a symbol into the trie, updating the prefixes in the process.
 39/// This operation may change the layout of the trie by splicing edges in
 40/// certain circumstances.
 41fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void {
 42    // const tracy = trace(@src());
 43    // defer tracy.end();
 44
 45    const node_index = try self.putNode(self.root.?, allocator, symbol.name);
 46    const slice = self.nodes.slice();
 47    slice.items(.is_terminal)[node_index] = true;
 48    slice.items(.vmaddr_offset)[node_index] = symbol.vmaddr_offset;
 49    slice.items(.export_flags)[node_index] = symbol.export_flags;
 50}
 51
 52/// Inserts a new node starting at `node_index`.
 53fn putNode(self: *Trie, node_index: Node.Index, allocator: Allocator, label: []const u8) !Node.Index {
 54    // Check for match with edges from this node.
 55    for (self.nodes.items(.edges)[node_index].items) |edge_index| {
 56        const edge = &self.edges.items[edge_index];
 57        const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.node;
 58        if (match == 0) continue;
 59        if (match == edge.label.len) return self.putNode(edge.node, allocator, label[match..]);
 60
 61        // Found a match, need to splice up nodes.
 62        // From: A -> B
 63        // To: A -> C -> B
 64        const mid_index = try self.addNode(allocator);
 65        const to_label = edge.label[match..];
 66        const to_node = edge.node;
 67        edge.node = mid_index;
 68        edge.label = label[0..match];
 69
 70        const new_edge_index = try self.addEdge(allocator);
 71        const new_edge = &self.edges.items[new_edge_index];
 72        new_edge.node = to_node;
 73        new_edge.label = to_label;
 74        try self.nodes.items(.edges)[mid_index].append(allocator, new_edge_index);
 75
 76        return if (match == label.len) mid_index else self.putNode(mid_index, allocator, label[match..]);
 77    }
 78
 79    // Add a new node.
 80    const new_node_index = try self.addNode(allocator);
 81    const new_edge_index = try self.addEdge(allocator);
 82    const new_edge = &self.edges.items[new_edge_index];
 83    new_edge.node = new_node_index;
 84    new_edge.label = label;
 85    try self.nodes.items(.edges)[node_index].append(allocator, new_edge_index);
 86
 87    return new_node_index;
 88}
 89
 90pub fn updateSize(self: *Trie, macho_file: *MachO) !void {
 91    const tracy = trace(@src());
 92    defer tracy.end();
 93
 94    const gpa = macho_file.base.comp.gpa;
 95
 96    try self.init(gpa);
 97    try self.nodes.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2);
 98    try self.edges.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2);
 99
100    const seg = macho_file.getTextSegment();
101    for (macho_file.resolver.values.items) |ref| {
102        if (ref.getFile(macho_file) == null) continue;
103        const sym = ref.getSymbol(macho_file).?;
104        if (!sym.flags.@"export") continue;
105        if (sym.getAtom(macho_file)) |atom| if (!atom.isAlive()) continue;
106        var flags: u64 = if (sym.flags.abs)
107            macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE
108        else if (sym.flags.tlv)
109            macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL
110        else
111            macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR;
112        if (sym.flags.weak) {
113            flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
114            macho_file.weak_defines.store(true, .seq_cst);
115            macho_file.binds_to_weak.store(true, .seq_cst);
116        }
117        try self.put(gpa, .{
118            .name = sym.getName(macho_file),
119            .vmaddr_offset = sym.getAddress(.{ .stubs = false }, macho_file) - seg.vmaddr,
120            .export_flags = flags,
121        });
122    }
123
124    try self.finalize(gpa);
125
126    macho_file.dyld_info_cmd.export_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64));
127}
128
129/// Finalizes this trie for writing to a byte stream.
130/// This step performs multiple passes through the trie ensuring
131/// there are no gaps after every `Node` is ULEB128 encoded.
132/// Call this method before trying to `write` the trie to a byte stream.
133fn finalize(self: *Trie, allocator: Allocator) !void {
134    const tracy = trace(@src());
135    defer tracy.end();
136
137    var ordered_nodes = std.array_list.Managed(Node.Index).init(allocator);
138    defer ordered_nodes.deinit();
139    try ordered_nodes.ensureTotalCapacityPrecise(self.nodes.items(.is_terminal).len);
140
141    {
142        var fifo: std.ArrayList(Node.Index) = .empty;
143        defer fifo.deinit(allocator);
144
145        try fifo.append(allocator, self.root.?);
146
147        var i: usize = 0;
148        while (i < fifo.items.len) {
149            const next_index = fifo.items[i];
150            i += 1;
151            const edges = &self.nodes.items(.edges)[next_index];
152            for (edges.items) |edge_index| {
153                const edge = self.edges.items[edge_index];
154                try fifo.append(allocator, edge.node);
155            }
156            ordered_nodes.appendAssumeCapacity(next_index);
157        }
158    }
159
160    var more: bool = true;
161    var size: u32 = 0;
162    while (more) {
163        size = 0;
164        more = false;
165        for (ordered_nodes.items) |node_index| {
166            const res = try self.finalizeNode(node_index, size);
167            size += res.node_size;
168            if (res.updated) more = true;
169        }
170    }
171
172    try self.buffer.ensureTotalCapacityPrecise(allocator, size);
173
174    var allocating: std.Io.Writer.Allocating = .fromArrayList(allocator, &self.buffer);
175    defer self.buffer = allocating.toArrayList();
176    const writer = &allocating.writer;
177
178    for (ordered_nodes.items) |node_index| {
179        try self.writeNode(node_index, writer);
180    }
181}
182
183const FinalizeNodeResult = struct {
184    /// Current size of this node in bytes.
185    node_size: u32,
186
187    /// True if the trie offset of this node in the output byte stream
188    /// would need updating; false otherwise.
189    updated: bool,
190};
191
192/// Updates offset of this node in the output byte stream.
193fn finalizeNode(self: *Trie, node_index: Node.Index, offset_in_trie: u32) !FinalizeNodeResult {
194    var trash_buffer: [64]u8 = undefined;
195    var stream: std.Io.Writer.Discarding = .init(&trash_buffer);
196    const writer = &stream.writer;
197    const slice = self.nodes.slice();
198
199    var node_size: u32 = 0;
200    if (slice.items(.is_terminal)[node_index]) {
201        const export_flags = slice.items(.export_flags)[node_index];
202        const vmaddr_offset = slice.items(.vmaddr_offset)[node_index];
203        try writer.writeUleb128(export_flags);
204        try writer.writeUleb128(vmaddr_offset);
205        try writer.writeUleb128(stream.fullCount());
206    } else {
207        node_size += 1; // 0x0 for non-terminal nodes
208    }
209    node_size += 1; // 1 byte for edge count
210
211    for (slice.items(.edges)[node_index].items) |edge_index| {
212        const edge = &self.edges.items[edge_index];
213        const next_node_offset = slice.items(.trie_offset)[edge.node];
214        node_size += @intCast(edge.label.len + 1);
215        try writer.writeUleb128(next_node_offset);
216    }
217
218    const trie_offset = slice.items(.trie_offset)[node_index];
219    const updated = offset_in_trie != trie_offset;
220    slice.items(.trie_offset)[node_index] = offset_in_trie;
221    node_size += @intCast(stream.fullCount());
222
223    return .{ .node_size = node_size, .updated = updated };
224}
225
226fn init(self: *Trie, allocator: Allocator) !void {
227    assert(self.root == null);
228    self.root = try self.addNode(allocator);
229}
230
231pub fn deinit(self: *Trie, allocator: Allocator) void {
232    for (self.nodes.items(.edges)) |*edges| {
233        edges.deinit(allocator);
234    }
235    self.nodes.deinit(allocator);
236    self.edges.deinit(allocator);
237    self.buffer.deinit(allocator);
238}
239
240pub fn write(self: Trie, writer: *std.Io.Writer) !void {
241    if (self.buffer.items.len == 0) return;
242    try writer.writeAll(self.buffer.items);
243}
244
245/// Writes this node to a byte stream.
246/// The children of this node *are* not written to the byte stream
247/// recursively. To write all nodes to a byte stream in sequence,
248/// iterate over `Trie.ordered_nodes` and call this method on each node.
249/// This is one of the requirements of the MachO.
250/// Panics if `finalize` was not called before calling this method.
251fn writeNode(self: *Trie, node_index: Node.Index, writer: *std.Io.Writer) !void {
252    const slice = self.nodes.slice();
253    const edges = slice.items(.edges)[node_index];
254    const is_terminal = slice.items(.is_terminal)[node_index];
255    const export_flags = slice.items(.export_flags)[node_index];
256    const vmaddr_offset = slice.items(.vmaddr_offset)[node_index];
257
258    if (is_terminal) {
259        // Terminal node info: encode export flags and vmaddr offset of this symbol.
260        var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
261        var info_stream: std.Io.Writer = .fixed(&info_buf);
262        // TODO Implement for special flags.
263        assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
264            export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
265        try info_stream.writeUleb128(export_flags);
266        try info_stream.writeUleb128(vmaddr_offset);
267
268        // Encode the size of the terminal node info.
269        var size_buf: [@sizeOf(u64)]u8 = undefined;
270        var size_stream: std.Io.Writer = .fixed(&size_buf);
271        try size_stream.writeUleb128(info_stream.end);
272
273        // Now, write them to the output stream.
274        try writer.writeAll(size_buf[0..size_stream.end]);
275        try writer.writeAll(info_buf[0..info_stream.end]);
276    } else {
277        // Non-terminal node is delimited by 0 byte.
278        try writer.writeByte(0);
279    }
280    // Write number of edges (max legal number of edges is 256).
281    try writer.writeByte(@as(u8, @intCast(edges.items.len)));
282
283    for (edges.items) |edge_index| {
284        const edge = self.edges.items[edge_index];
285        // Write edge label and offset to next node in trie.
286        try writer.writeAll(edge.label);
287        try writer.writeByte(0);
288        try writer.writeUleb128(slice.items(.trie_offset)[edge.node]);
289    }
290}
291
292fn addNode(self: *Trie, allocator: Allocator) !Node.Index {
293    const index: Node.Index = @intCast(try self.nodes.addOne(allocator));
294    self.nodes.set(index, .{});
295    return index;
296}
297
298fn addEdge(self: *Trie, allocator: Allocator) !Edge.Index {
299    const index: Edge.Index = @intCast(self.edges.items.len);
300    const edge = try self.edges.addOne(allocator);
301    edge.* = .{};
302    return index;
303}
304
305/// Export symbol that is to be placed in the trie.
306pub const ExportSymbol = struct {
307    /// Name of the symbol.
308    name: []const u8,
309
310    /// Offset of this symbol's virtual memory address from the beginning
311    /// of the __TEXT segment.
312    vmaddr_offset: u64,
313
314    /// Export flags of this exported symbol.
315    export_flags: u64,
316};
317
318const Node = struct {
319    is_terminal: bool = false,
320
321    /// Export flags associated with this exported symbol.
322    export_flags: u64 = 0,
323
324    /// VM address offset wrt to the section this symbol is defined against.
325    vmaddr_offset: u64 = 0,
326
327    /// Offset of this node in the trie output byte stream.
328    trie_offset: u32 = 0,
329
330    /// List of all edges originating from this node.
331    edges: std.ArrayList(Edge.Index) = .empty,
332
333    const Index = u32;
334};
335
336/// Edge connecting nodes in the trie.
337const Edge = struct {
338    /// Target node in the trie.
339    node: Node.Index = 0,
340
341    /// Matching prefix.
342    label: []const u8 = "",
343
344    const Index = u32;
345};
346
347fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void {
348    assert(expected.len > 0);
349    if (mem.eql(u8, expected, given)) return;
350    const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{expected});
351    defer testing.allocator.free(expected_fmt);
352    const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{given});
353    defer testing.allocator.free(given_fmt);
354    const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?;
355    const padding = try testing.allocator.alloc(u8, idx + 5);
356    defer testing.allocator.free(padding);
357    @memset(padding, ' ');
358    std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding });
359    return error.TestFailed;
360}
361
362test "write Trie to a byte stream" {
363    const gpa = testing.allocator;
364    var trie: Trie = .{};
365    defer trie.deinit(gpa);
366    try trie.init(gpa);
367
368    try trie.put(gpa, .{
369        .name = "__mh_execute_header",
370        .vmaddr_offset = 0,
371        .export_flags = 0,
372    });
373    try trie.put(gpa, .{
374        .name = "_main",
375        .vmaddr_offset = 0x1000,
376        .export_flags = 0,
377    });
378
379    try trie.finalize(gpa);
380
381    const exp_buffer = [_]u8{
382        0x0, 0x1, // node root
383        0x5f, 0x0, 0x5, // edge '_'
384        0x0, 0x2, // non-terminal node
385        0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
386        0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
387        0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
388        0x2, 0x0, 0x0, 0x0, // terminal node
389        0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
390    };
391    try expectEqualHexStrings(&exp_buffer, trie.buffer.items);
392}
393
394test "ordering bug" {
395    const gpa = testing.allocator;
396    var trie: Trie = .{};
397    defer trie.deinit(gpa);
398    try trie.init(gpa);
399
400    try trie.put(gpa, .{
401        .name = "_asStr",
402        .vmaddr_offset = 0x558,
403        .export_flags = 0,
404    });
405    try trie.put(gpa, .{
406        .name = "_a",
407        .vmaddr_offset = 0x8008,
408        .export_flags = 0,
409    });
410
411    try trie.finalize(gpa);
412
413    const exp_buffer = [_]u8{
414        0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00,
415        0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72,
416        0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00,
417    };
418    try expectEqualHexStrings(&exp_buffer, trie.buffer.items);
419}
420
421const assert = std.debug.assert;
422const log = std.log.scoped(.macho);
423const macho = std.macho;
424const mem = std.mem;
425const std = @import("std");
426const testing = std.testing;
427const trace = @import("../../../tracy.zig").trace;
428
429const Allocator = mem.Allocator;
430const MachO = @import("../../MachO.zig");
431const Trie = @This();