master
1//! Represents export trie used in MachO executables and dynamic libraries.
2//! The purpose of an export trie is to encode as compactly as possible all
3//! export symbols for the loader `dyld`.
4//! The export trie encodes offset and other information using ULEB128
5//! encoding, and is part of the __LINKEDIT segment.
6//!
7//! Description from loader.h:
8//!
9//! The symbols exported by a dylib are encoded in a trie. This is a compact
10//! representation that factors out common prefixes. It also reduces LINKEDIT pages
11//! in RAM because it encodes all information (name, address, flags) in one small,
12//! contiguous range. The export area is a stream of nodes. The first node sequentially
13//! is the start node for the trie.
14//!
15//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol
16//! information for the string so far. If there is no exported symbol, the node starts
17//! with a zero byte. If there is exported info, it follows the length.
18//!
19//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
20//! offset which is location of the content named by the symbol from the mach_header
21//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
22//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
23//! is zero length, then the symbol is re-export from the specified dylib with the same name.
24//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
25//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
26//! The resolver is used by lazy pointers and must be called to get the actual address to use.
27//!
28//! After the optional exported symbol information is a byte of how many edges (0-255) that
29//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
30//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
31
32/// The root node of the trie.
33root: ?Node.Index = null,
34buffer: std.ArrayList(u8) = .empty,
35nodes: std.MultiArrayList(Node) = .{},
36edges: std.ArrayList(Edge) = .empty,
37
38/// Insert a symbol into the trie, updating the prefixes in the process.
39/// This operation may change the layout of the trie by splicing edges in
40/// certain circumstances.
41fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void {
42 // const tracy = trace(@src());
43 // defer tracy.end();
44
45 const node_index = try self.putNode(self.root.?, allocator, symbol.name);
46 const slice = self.nodes.slice();
47 slice.items(.is_terminal)[node_index] = true;
48 slice.items(.vmaddr_offset)[node_index] = symbol.vmaddr_offset;
49 slice.items(.export_flags)[node_index] = symbol.export_flags;
50}
51
52/// Inserts a new node starting at `node_index`.
53fn putNode(self: *Trie, node_index: Node.Index, allocator: Allocator, label: []const u8) !Node.Index {
54 // Check for match with edges from this node.
55 for (self.nodes.items(.edges)[node_index].items) |edge_index| {
56 const edge = &self.edges.items[edge_index];
57 const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.node;
58 if (match == 0) continue;
59 if (match == edge.label.len) return self.putNode(edge.node, allocator, label[match..]);
60
61 // Found a match, need to splice up nodes.
62 // From: A -> B
63 // To: A -> C -> B
64 const mid_index = try self.addNode(allocator);
65 const to_label = edge.label[match..];
66 const to_node = edge.node;
67 edge.node = mid_index;
68 edge.label = label[0..match];
69
70 const new_edge_index = try self.addEdge(allocator);
71 const new_edge = &self.edges.items[new_edge_index];
72 new_edge.node = to_node;
73 new_edge.label = to_label;
74 try self.nodes.items(.edges)[mid_index].append(allocator, new_edge_index);
75
76 return if (match == label.len) mid_index else self.putNode(mid_index, allocator, label[match..]);
77 }
78
79 // Add a new node.
80 const new_node_index = try self.addNode(allocator);
81 const new_edge_index = try self.addEdge(allocator);
82 const new_edge = &self.edges.items[new_edge_index];
83 new_edge.node = new_node_index;
84 new_edge.label = label;
85 try self.nodes.items(.edges)[node_index].append(allocator, new_edge_index);
86
87 return new_node_index;
88}
89
90pub fn updateSize(self: *Trie, macho_file: *MachO) !void {
91 const tracy = trace(@src());
92 defer tracy.end();
93
94 const gpa = macho_file.base.comp.gpa;
95
96 try self.init(gpa);
97 try self.nodes.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2);
98 try self.edges.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2);
99
100 const seg = macho_file.getTextSegment();
101 for (macho_file.resolver.values.items) |ref| {
102 if (ref.getFile(macho_file) == null) continue;
103 const sym = ref.getSymbol(macho_file).?;
104 if (!sym.flags.@"export") continue;
105 if (sym.getAtom(macho_file)) |atom| if (!atom.isAlive()) continue;
106 var flags: u64 = if (sym.flags.abs)
107 macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE
108 else if (sym.flags.tlv)
109 macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL
110 else
111 macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR;
112 if (sym.flags.weak) {
113 flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
114 macho_file.weak_defines.store(true, .seq_cst);
115 macho_file.binds_to_weak.store(true, .seq_cst);
116 }
117 try self.put(gpa, .{
118 .name = sym.getName(macho_file),
119 .vmaddr_offset = sym.getAddress(.{ .stubs = false }, macho_file) - seg.vmaddr,
120 .export_flags = flags,
121 });
122 }
123
124 try self.finalize(gpa);
125
126 macho_file.dyld_info_cmd.export_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64));
127}
128
129/// Finalizes this trie for writing to a byte stream.
130/// This step performs multiple passes through the trie ensuring
131/// there are no gaps after every `Node` is ULEB128 encoded.
132/// Call this method before trying to `write` the trie to a byte stream.
133fn finalize(self: *Trie, allocator: Allocator) !void {
134 const tracy = trace(@src());
135 defer tracy.end();
136
137 var ordered_nodes = std.array_list.Managed(Node.Index).init(allocator);
138 defer ordered_nodes.deinit();
139 try ordered_nodes.ensureTotalCapacityPrecise(self.nodes.items(.is_terminal).len);
140
141 {
142 var fifo: std.ArrayList(Node.Index) = .empty;
143 defer fifo.deinit(allocator);
144
145 try fifo.append(allocator, self.root.?);
146
147 var i: usize = 0;
148 while (i < fifo.items.len) {
149 const next_index = fifo.items[i];
150 i += 1;
151 const edges = &self.nodes.items(.edges)[next_index];
152 for (edges.items) |edge_index| {
153 const edge = self.edges.items[edge_index];
154 try fifo.append(allocator, edge.node);
155 }
156 ordered_nodes.appendAssumeCapacity(next_index);
157 }
158 }
159
160 var more: bool = true;
161 var size: u32 = 0;
162 while (more) {
163 size = 0;
164 more = false;
165 for (ordered_nodes.items) |node_index| {
166 const res = try self.finalizeNode(node_index, size);
167 size += res.node_size;
168 if (res.updated) more = true;
169 }
170 }
171
172 try self.buffer.ensureTotalCapacityPrecise(allocator, size);
173
174 var allocating: std.Io.Writer.Allocating = .fromArrayList(allocator, &self.buffer);
175 defer self.buffer = allocating.toArrayList();
176 const writer = &allocating.writer;
177
178 for (ordered_nodes.items) |node_index| {
179 try self.writeNode(node_index, writer);
180 }
181}
182
183const FinalizeNodeResult = struct {
184 /// Current size of this node in bytes.
185 node_size: u32,
186
187 /// True if the trie offset of this node in the output byte stream
188 /// would need updating; false otherwise.
189 updated: bool,
190};
191
192/// Updates offset of this node in the output byte stream.
193fn finalizeNode(self: *Trie, node_index: Node.Index, offset_in_trie: u32) !FinalizeNodeResult {
194 var trash_buffer: [64]u8 = undefined;
195 var stream: std.Io.Writer.Discarding = .init(&trash_buffer);
196 const writer = &stream.writer;
197 const slice = self.nodes.slice();
198
199 var node_size: u32 = 0;
200 if (slice.items(.is_terminal)[node_index]) {
201 const export_flags = slice.items(.export_flags)[node_index];
202 const vmaddr_offset = slice.items(.vmaddr_offset)[node_index];
203 try writer.writeUleb128(export_flags);
204 try writer.writeUleb128(vmaddr_offset);
205 try writer.writeUleb128(stream.fullCount());
206 } else {
207 node_size += 1; // 0x0 for non-terminal nodes
208 }
209 node_size += 1; // 1 byte for edge count
210
211 for (slice.items(.edges)[node_index].items) |edge_index| {
212 const edge = &self.edges.items[edge_index];
213 const next_node_offset = slice.items(.trie_offset)[edge.node];
214 node_size += @intCast(edge.label.len + 1);
215 try writer.writeUleb128(next_node_offset);
216 }
217
218 const trie_offset = slice.items(.trie_offset)[node_index];
219 const updated = offset_in_trie != trie_offset;
220 slice.items(.trie_offset)[node_index] = offset_in_trie;
221 node_size += @intCast(stream.fullCount());
222
223 return .{ .node_size = node_size, .updated = updated };
224}
225
226fn init(self: *Trie, allocator: Allocator) !void {
227 assert(self.root == null);
228 self.root = try self.addNode(allocator);
229}
230
231pub fn deinit(self: *Trie, allocator: Allocator) void {
232 for (self.nodes.items(.edges)) |*edges| {
233 edges.deinit(allocator);
234 }
235 self.nodes.deinit(allocator);
236 self.edges.deinit(allocator);
237 self.buffer.deinit(allocator);
238}
239
240pub fn write(self: Trie, writer: *std.Io.Writer) !void {
241 if (self.buffer.items.len == 0) return;
242 try writer.writeAll(self.buffer.items);
243}
244
245/// Writes this node to a byte stream.
246/// The children of this node *are* not written to the byte stream
247/// recursively. To write all nodes to a byte stream in sequence,
248/// iterate over `Trie.ordered_nodes` and call this method on each node.
249/// This is one of the requirements of the MachO.
250/// Panics if `finalize` was not called before calling this method.
251fn writeNode(self: *Trie, node_index: Node.Index, writer: *std.Io.Writer) !void {
252 const slice = self.nodes.slice();
253 const edges = slice.items(.edges)[node_index];
254 const is_terminal = slice.items(.is_terminal)[node_index];
255 const export_flags = slice.items(.export_flags)[node_index];
256 const vmaddr_offset = slice.items(.vmaddr_offset)[node_index];
257
258 if (is_terminal) {
259 // Terminal node info: encode export flags and vmaddr offset of this symbol.
260 var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
261 var info_stream: std.Io.Writer = .fixed(&info_buf);
262 // TODO Implement for special flags.
263 assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
264 export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
265 try info_stream.writeUleb128(export_flags);
266 try info_stream.writeUleb128(vmaddr_offset);
267
268 // Encode the size of the terminal node info.
269 var size_buf: [@sizeOf(u64)]u8 = undefined;
270 var size_stream: std.Io.Writer = .fixed(&size_buf);
271 try size_stream.writeUleb128(info_stream.end);
272
273 // Now, write them to the output stream.
274 try writer.writeAll(size_buf[0..size_stream.end]);
275 try writer.writeAll(info_buf[0..info_stream.end]);
276 } else {
277 // Non-terminal node is delimited by 0 byte.
278 try writer.writeByte(0);
279 }
280 // Write number of edges (max legal number of edges is 256).
281 try writer.writeByte(@as(u8, @intCast(edges.items.len)));
282
283 for (edges.items) |edge_index| {
284 const edge = self.edges.items[edge_index];
285 // Write edge label and offset to next node in trie.
286 try writer.writeAll(edge.label);
287 try writer.writeByte(0);
288 try writer.writeUleb128(slice.items(.trie_offset)[edge.node]);
289 }
290}
291
292fn addNode(self: *Trie, allocator: Allocator) !Node.Index {
293 const index: Node.Index = @intCast(try self.nodes.addOne(allocator));
294 self.nodes.set(index, .{});
295 return index;
296}
297
298fn addEdge(self: *Trie, allocator: Allocator) !Edge.Index {
299 const index: Edge.Index = @intCast(self.edges.items.len);
300 const edge = try self.edges.addOne(allocator);
301 edge.* = .{};
302 return index;
303}
304
305/// Export symbol that is to be placed in the trie.
306pub const ExportSymbol = struct {
307 /// Name of the symbol.
308 name: []const u8,
309
310 /// Offset of this symbol's virtual memory address from the beginning
311 /// of the __TEXT segment.
312 vmaddr_offset: u64,
313
314 /// Export flags of this exported symbol.
315 export_flags: u64,
316};
317
318const Node = struct {
319 is_terminal: bool = false,
320
321 /// Export flags associated with this exported symbol.
322 export_flags: u64 = 0,
323
324 /// VM address offset wrt to the section this symbol is defined against.
325 vmaddr_offset: u64 = 0,
326
327 /// Offset of this node in the trie output byte stream.
328 trie_offset: u32 = 0,
329
330 /// List of all edges originating from this node.
331 edges: std.ArrayList(Edge.Index) = .empty,
332
333 const Index = u32;
334};
335
336/// Edge connecting nodes in the trie.
337const Edge = struct {
338 /// Target node in the trie.
339 node: Node.Index = 0,
340
341 /// Matching prefix.
342 label: []const u8 = "",
343
344 const Index = u32;
345};
346
347fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void {
348 assert(expected.len > 0);
349 if (mem.eql(u8, expected, given)) return;
350 const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{expected});
351 defer testing.allocator.free(expected_fmt);
352 const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{given});
353 defer testing.allocator.free(given_fmt);
354 const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?;
355 const padding = try testing.allocator.alloc(u8, idx + 5);
356 defer testing.allocator.free(padding);
357 @memset(padding, ' ');
358 std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding });
359 return error.TestFailed;
360}
361
362test "write Trie to a byte stream" {
363 const gpa = testing.allocator;
364 var trie: Trie = .{};
365 defer trie.deinit(gpa);
366 try trie.init(gpa);
367
368 try trie.put(gpa, .{
369 .name = "__mh_execute_header",
370 .vmaddr_offset = 0,
371 .export_flags = 0,
372 });
373 try trie.put(gpa, .{
374 .name = "_main",
375 .vmaddr_offset = 0x1000,
376 .export_flags = 0,
377 });
378
379 try trie.finalize(gpa);
380
381 const exp_buffer = [_]u8{
382 0x0, 0x1, // node root
383 0x5f, 0x0, 0x5, // edge '_'
384 0x0, 0x2, // non-terminal node
385 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
386 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
387 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
388 0x2, 0x0, 0x0, 0x0, // terminal node
389 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
390 };
391 try expectEqualHexStrings(&exp_buffer, trie.buffer.items);
392}
393
394test "ordering bug" {
395 const gpa = testing.allocator;
396 var trie: Trie = .{};
397 defer trie.deinit(gpa);
398 try trie.init(gpa);
399
400 try trie.put(gpa, .{
401 .name = "_asStr",
402 .vmaddr_offset = 0x558,
403 .export_flags = 0,
404 });
405 try trie.put(gpa, .{
406 .name = "_a",
407 .vmaddr_offset = 0x8008,
408 .export_flags = 0,
409 });
410
411 try trie.finalize(gpa);
412
413 const exp_buffer = [_]u8{
414 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00,
415 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72,
416 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00,
417 };
418 try expectEqualHexStrings(&exp_buffer, trie.buffer.items);
419}
420
421const assert = std.debug.assert;
422const log = std.log.scoped(.macho);
423const macho = std.macho;
424const mem = std.mem;
425const std = @import("std");
426const testing = std.testing;
427const trace = @import("../../../tracy.zig").trace;
428
429const Allocator = mem.Allocator;
430const MachO = @import("../../MachO.zig");
431const Trie = @This();