master
1const std = @import("std");
2const assert = std.debug.assert;
3const elf = std.elf;
4const fs = std.fs;
5const macho = std.macho;
6const math = std.math;
7const mem = std.mem;
8const testing = std.testing;
9const Writer = std.Io.Writer;
10
11const CheckObject = @This();
12
13const Allocator = mem.Allocator;
14const Step = std.Build.Step;
15
16pub const base_id: Step.Id = .check_object;
17
18step: Step,
19source: std.Build.LazyPath,
20max_bytes: usize = 20 * 1024 * 1024,
21checks: std.array_list.Managed(Check),
22obj_format: std.Target.ObjectFormat,
23
24pub fn create(
25 owner: *std.Build,
26 source: std.Build.LazyPath,
27 obj_format: std.Target.ObjectFormat,
28) *CheckObject {
29 const gpa = owner.allocator;
30 const check_object = gpa.create(CheckObject) catch @panic("OOM");
31 check_object.* = .{
32 .step = .init(.{
33 .id = base_id,
34 .name = "CheckObject",
35 .owner = owner,
36 .makeFn = make,
37 }),
38 .source = source.dupe(owner),
39 .checks = std.array_list.Managed(Check).init(gpa),
40 .obj_format = obj_format,
41 };
42 check_object.source.addStepDependencies(&check_object.step);
43 return check_object;
44}
45
46const SearchPhrase = struct {
47 string: []const u8,
48 lazy_path: ?std.Build.LazyPath = null,
49
50 fn resolve(phrase: SearchPhrase, b: *std.Build, step: *Step) []const u8 {
51 const lazy_path = phrase.lazy_path orelse return phrase.string;
52 return b.fmt("{s} {s}", .{ phrase.string, lazy_path.getPath2(b, step) });
53 }
54};
55
56/// There five types of actions currently supported:
57/// .exact - will do an exact match against the haystack
58/// .contains - will check for existence within the haystack
59/// .not_present - will check for non-existence within the haystack
60/// .extract - will do an exact match and extract into a variable enclosed within `{name}` braces
61/// .compute_cmp - will perform an operation on the extracted global variables
62/// using the MatchAction. It currently only supports an addition. The operation is required
63/// to be specified in Reverse Polish Notation to ease in operator-precedence parsing (well,
64/// to avoid any parsing really).
65/// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively
66/// they could then be added with this simple program `vmaddr entryoff +`.
67const Action = struct {
68 tag: enum { exact, contains, not_present, extract, compute_cmp },
69 phrase: SearchPhrase,
70 expected: ?ComputeCompareExpected = null,
71
72 /// Returns true if the `phrase` is an exact match with the haystack and variable was successfully extracted.
73 fn extract(
74 act: Action,
75 b: *std.Build,
76 step: *Step,
77 haystack: []const u8,
78 global_vars: anytype,
79 ) !bool {
80 assert(act.tag == .extract);
81 const hay = mem.trim(u8, haystack, " ");
82 const phrase = mem.trim(u8, act.phrase.resolve(b, step), " ");
83
84 var candidate_vars: std.array_list.Managed(struct { name: []const u8, value: u64 }) = .init(b.allocator);
85 var hay_it = mem.tokenizeScalar(u8, hay, ' ');
86 var needle_it = mem.tokenizeScalar(u8, phrase, ' ');
87
88 while (needle_it.next()) |needle_tok| {
89 const hay_tok = hay_it.next() orelse break;
90 if (mem.startsWith(u8, needle_tok, "{")) {
91 const closing_brace = mem.indexOf(u8, needle_tok, "}") orelse return error.MissingClosingBrace;
92 if (closing_brace != needle_tok.len - 1) return error.ClosingBraceNotLast;
93
94 const name = needle_tok[1..closing_brace];
95 if (name.len == 0) return error.MissingBraceValue;
96 const value = std.fmt.parseInt(u64, hay_tok, 16) catch return false;
97 try candidate_vars.append(.{
98 .name = name,
99 .value = value,
100 });
101 } else {
102 if (!mem.eql(u8, hay_tok, needle_tok)) return false;
103 }
104 }
105
106 if (candidate_vars.items.len == 0) return false;
107
108 for (candidate_vars.items) |cv| try global_vars.putNoClobber(cv.name, cv.value);
109
110 return true;
111 }
112
113 /// Returns true if the `phrase` is an exact match with the haystack.
114 fn exact(
115 act: Action,
116 b: *std.Build,
117 step: *Step,
118 haystack: []const u8,
119 ) bool {
120 assert(act.tag == .exact);
121 const hay = mem.trim(u8, haystack, " ");
122 const phrase = mem.trim(u8, act.phrase.resolve(b, step), " ");
123 return mem.eql(u8, hay, phrase);
124 }
125
126 /// Returns true if the `phrase` exists within the haystack.
127 fn contains(
128 act: Action,
129 b: *std.Build,
130 step: *Step,
131 haystack: []const u8,
132 ) bool {
133 assert(act.tag == .contains);
134 const hay = mem.trim(u8, haystack, " ");
135 const phrase = mem.trim(u8, act.phrase.resolve(b, step), " ");
136 return mem.indexOf(u8, hay, phrase) != null;
137 }
138
139 /// Returns true if the `phrase` does not exist within the haystack.
140 fn notPresent(
141 act: Action,
142 b: *std.Build,
143 step: *Step,
144 haystack: []const u8,
145 ) bool {
146 assert(act.tag == .not_present);
147 return !contains(.{
148 .tag = .contains,
149 .phrase = act.phrase,
150 .expected = act.expected,
151 }, b, step, haystack);
152 }
153
154 /// Will return true if the `phrase` is correctly parsed into an RPN program and
155 /// its reduced, computed value compares using `op` with the expected value, either
156 /// a literal or another extracted variable.
157 fn computeCmp(act: Action, b: *std.Build, step: *Step, global_vars: anytype) !bool {
158 const gpa = step.owner.allocator;
159 const phrase = act.phrase.resolve(b, step);
160 var op_stack = std.array_list.Managed(enum { add, sub, mod, mul }).init(gpa);
161 var values = std.array_list.Managed(u64).init(gpa);
162
163 var it = mem.tokenizeScalar(u8, phrase, ' ');
164 while (it.next()) |next| {
165 if (mem.eql(u8, next, "+")) {
166 try op_stack.append(.add);
167 } else if (mem.eql(u8, next, "-")) {
168 try op_stack.append(.sub);
169 } else if (mem.eql(u8, next, "%")) {
170 try op_stack.append(.mod);
171 } else if (mem.eql(u8, next, "*")) {
172 try op_stack.append(.mul);
173 } else {
174 const val = std.fmt.parseInt(u64, next, 0) catch blk: {
175 break :blk global_vars.get(next) orelse {
176 try step.addError(
177 \\
178 \\========= variable was not extracted: ===========
179 \\{s}
180 \\=================================================
181 , .{next});
182 return error.UnknownVariable;
183 };
184 };
185 try values.append(val);
186 }
187 }
188
189 var op_i: usize = 1;
190 var reduced: u64 = values.items[0];
191 for (op_stack.items) |op| {
192 const other = values.items[op_i];
193 switch (op) {
194 .add => {
195 reduced += other;
196 },
197 .sub => {
198 reduced -= other;
199 },
200 .mod => {
201 reduced %= other;
202 },
203 .mul => {
204 reduced *= other;
205 },
206 }
207 op_i += 1;
208 }
209
210 const exp_value = switch (act.expected.?.value) {
211 .variable => |name| global_vars.get(name) orelse {
212 try step.addError(
213 \\
214 \\========= variable was not extracted: ===========
215 \\{s}
216 \\=================================================
217 , .{name});
218 return error.UnknownVariable;
219 },
220 .literal => |x| x,
221 };
222 return math.compare(reduced, act.expected.?.op, exp_value);
223 }
224};
225
226const ComputeCompareExpected = struct {
227 op: math.CompareOperator,
228 value: union(enum) {
229 variable: []const u8,
230 literal: u64,
231 },
232
233 pub fn format(value: ComputeCompareExpected, w: *Writer) Writer.Error!void {
234 try w.print("{t} ", .{value.op});
235 switch (value.value) {
236 .variable => |name| try w.writeAll(name),
237 .literal => |x| try w.print("{x}", .{x}),
238 }
239 }
240};
241
242const Check = struct {
243 kind: Kind,
244 payload: Payload,
245 data: std.array_list.Managed(u8),
246 actions: std.array_list.Managed(Action),
247
248 fn create(allocator: Allocator, kind: Kind) Check {
249 return .{
250 .kind = kind,
251 .payload = .{ .none = {} },
252 .data = std.array_list.Managed(u8).init(allocator),
253 .actions = std.array_list.Managed(Action).init(allocator),
254 };
255 }
256
257 fn dumpSection(allocator: Allocator, name: [:0]const u8) Check {
258 var check = Check.create(allocator, .dump_section);
259 const off: u32 = @intCast(check.data.items.len);
260 check.data.print("{s}\x00", .{name}) catch @panic("OOM");
261 check.payload = .{ .dump_section = off };
262 return check;
263 }
264
265 fn extract(check: *Check, phrase: SearchPhrase) void {
266 check.actions.append(.{
267 .tag = .extract,
268 .phrase = phrase,
269 }) catch @panic("OOM");
270 }
271
272 fn exact(check: *Check, phrase: SearchPhrase) void {
273 check.actions.append(.{
274 .tag = .exact,
275 .phrase = phrase,
276 }) catch @panic("OOM");
277 }
278
279 fn contains(check: *Check, phrase: SearchPhrase) void {
280 check.actions.append(.{
281 .tag = .contains,
282 .phrase = phrase,
283 }) catch @panic("OOM");
284 }
285
286 fn notPresent(check: *Check, phrase: SearchPhrase) void {
287 check.actions.append(.{
288 .tag = .not_present,
289 .phrase = phrase,
290 }) catch @panic("OOM");
291 }
292
293 fn computeCmp(check: *Check, phrase: SearchPhrase, expected: ComputeCompareExpected) void {
294 check.actions.append(.{
295 .tag = .compute_cmp,
296 .phrase = phrase,
297 .expected = expected,
298 }) catch @panic("OOM");
299 }
300
301 const Kind = enum {
302 headers,
303 symtab,
304 indirect_symtab,
305 dynamic_symtab,
306 archive_symtab,
307 dynamic_section,
308 dyld_rebase,
309 dyld_bind,
310 dyld_weak_bind,
311 dyld_lazy_bind,
312 exports,
313 compute_compare,
314 dump_section,
315 };
316
317 const Payload = union {
318 none: void,
319 /// Null-delimited string in the 'data' buffer.
320 dump_section: u32,
321 };
322};
323
324/// Creates a new empty sequence of actions.
325fn checkStart(check_object: *CheckObject, kind: Check.Kind) void {
326 const check = Check.create(check_object.step.owner.allocator, kind);
327 check_object.checks.append(check) catch @panic("OOM");
328}
329
330/// Adds an exact match phrase to the latest created Check.
331pub fn checkExact(check_object: *CheckObject, phrase: []const u8) void {
332 check_object.checkExactInner(phrase, null);
333}
334
335/// Like `checkExact()` but takes an additional argument `LazyPath` which will be
336/// resolved to a full search query in `make()`.
337pub fn checkExactPath(check_object: *CheckObject, phrase: []const u8, lazy_path: std.Build.LazyPath) void {
338 check_object.checkExactInner(phrase, lazy_path);
339}
340
341fn checkExactInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
342 assert(check_object.checks.items.len > 0);
343 const last = &check_object.checks.items[check_object.checks.items.len - 1];
344 last.exact(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
345}
346
347/// Adds a fuzzy match phrase to the latest created Check.
348pub fn checkContains(check_object: *CheckObject, phrase: []const u8) void {
349 check_object.checkContainsInner(phrase, null);
350}
351
352/// Like `checkContains()` but takes an additional argument `lazy_path` which will be
353/// resolved to a full search query in `make()`.
354pub fn checkContainsPath(
355 check_object: *CheckObject,
356 phrase: []const u8,
357 lazy_path: std.Build.LazyPath,
358) void {
359 check_object.checkContainsInner(phrase, lazy_path);
360}
361
362fn checkContainsInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
363 assert(check_object.checks.items.len > 0);
364 const last = &check_object.checks.items[check_object.checks.items.len - 1];
365 last.contains(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
366}
367
368/// Adds an exact match phrase with variable extractor to the latest created Check.
369pub fn checkExtract(check_object: *CheckObject, phrase: []const u8) void {
370 check_object.checkExtractInner(phrase, null);
371}
372
373/// Like `checkExtract()` but takes an additional argument `LazyPath` which will be
374/// resolved to a full search query in `make()`.
375pub fn checkExtractLazyPath(check_object: *CheckObject, phrase: []const u8, lazy_path: std.Build.LazyPath) void {
376 check_object.checkExtractInner(phrase, lazy_path);
377}
378
379fn checkExtractInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
380 assert(check_object.checks.items.len > 0);
381 const last = &check_object.checks.items[check_object.checks.items.len - 1];
382 last.extract(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
383}
384
385/// Adds another searched phrase to the latest created Check
386/// however ensures there is no matching phrase in the output.
387pub fn checkNotPresent(check_object: *CheckObject, phrase: []const u8) void {
388 check_object.checkNotPresentInner(phrase, null);
389}
390
391/// Like `checkExtract()` but takes an additional argument `LazyPath` which will be
392/// resolved to a full search query in `make()`.
393pub fn checkNotPresentLazyPath(check_object: *CheckObject, phrase: []const u8, lazy_path: std.Build.LazyPath) void {
394 check_object.checkNotPresentInner(phrase, lazy_path);
395}
396
397fn checkNotPresentInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
398 assert(check_object.checks.items.len > 0);
399 const last = &check_object.checks.items[check_object.checks.items.len - 1];
400 last.notPresent(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
401}
402
403/// Creates a new check checking in the file headers (section, program headers, etc.).
404pub fn checkInHeaders(check_object: *CheckObject) void {
405 check_object.checkStart(.headers);
406}
407
408/// Creates a new check checking specifically symbol table parsed and dumped from the object
409/// file.
410pub fn checkInSymtab(check_object: *CheckObject) void {
411 const label = switch (check_object.obj_format) {
412 .macho => MachODumper.symtab_label,
413 .elf => ElfDumper.symtab_label,
414 .wasm => WasmDumper.symtab_label,
415 .coff => @panic("TODO symtab for coff"),
416 else => @panic("TODO other file formats"),
417 };
418 check_object.checkStart(.symtab);
419 check_object.checkExact(label);
420}
421
422/// Creates a new check checking specifically dyld rebase opcodes contents parsed and dumped
423/// from the object file.
424/// This check is target-dependent and applicable to MachO only.
425pub fn checkInDyldRebase(check_object: *CheckObject) void {
426 const label = switch (check_object.obj_format) {
427 .macho => MachODumper.dyld_rebase_label,
428 else => @panic("Unsupported target platform"),
429 };
430 check_object.checkStart(.dyld_rebase);
431 check_object.checkExact(label);
432}
433
434/// Creates a new check checking specifically dyld bind opcodes contents parsed and dumped
435/// from the object file.
436/// This check is target-dependent and applicable to MachO only.
437pub fn checkInDyldBind(check_object: *CheckObject) void {
438 const label = switch (check_object.obj_format) {
439 .macho => MachODumper.dyld_bind_label,
440 else => @panic("Unsupported target platform"),
441 };
442 check_object.checkStart(.dyld_bind);
443 check_object.checkExact(label);
444}
445
446/// Creates a new check checking specifically dyld weak bind opcodes contents parsed and dumped
447/// from the object file.
448/// This check is target-dependent and applicable to MachO only.
449pub fn checkInDyldWeakBind(check_object: *CheckObject) void {
450 const label = switch (check_object.obj_format) {
451 .macho => MachODumper.dyld_weak_bind_label,
452 else => @panic("Unsupported target platform"),
453 };
454 check_object.checkStart(.dyld_weak_bind);
455 check_object.checkExact(label);
456}
457
458/// Creates a new check checking specifically dyld lazy bind opcodes contents parsed and dumped
459/// from the object file.
460/// This check is target-dependent and applicable to MachO only.
461pub fn checkInDyldLazyBind(check_object: *CheckObject) void {
462 const label = switch (check_object.obj_format) {
463 .macho => MachODumper.dyld_lazy_bind_label,
464 else => @panic("Unsupported target platform"),
465 };
466 check_object.checkStart(.dyld_lazy_bind);
467 check_object.checkExact(label);
468}
469
470/// Creates a new check checking specifically exports info contents parsed and dumped
471/// from the object file.
472/// This check is target-dependent and applicable to MachO only.
473pub fn checkInExports(check_object: *CheckObject) void {
474 const label = switch (check_object.obj_format) {
475 .macho => MachODumper.exports_label,
476 else => @panic("Unsupported target platform"),
477 };
478 check_object.checkStart(.exports);
479 check_object.checkExact(label);
480}
481
482/// Creates a new check checking specifically indirect symbol table parsed and dumped
483/// from the object file.
484/// This check is target-dependent and applicable to MachO only.
485pub fn checkInIndirectSymtab(check_object: *CheckObject) void {
486 const label = switch (check_object.obj_format) {
487 .macho => MachODumper.indirect_symtab_label,
488 else => @panic("Unsupported target platform"),
489 };
490 check_object.checkStart(.indirect_symtab);
491 check_object.checkExact(label);
492}
493
494/// Creates a new check checking specifically dynamic symbol table parsed and dumped from the object
495/// file.
496/// This check is target-dependent and applicable to ELF only.
497pub fn checkInDynamicSymtab(check_object: *CheckObject) void {
498 const label = switch (check_object.obj_format) {
499 .elf => ElfDumper.dynamic_symtab_label,
500 else => @panic("Unsupported target platform"),
501 };
502 check_object.checkStart(.dynamic_symtab);
503 check_object.checkExact(label);
504}
505
506/// Creates a new check checking specifically dynamic section parsed and dumped from the object
507/// file.
508/// This check is target-dependent and applicable to ELF only.
509pub fn checkInDynamicSection(check_object: *CheckObject) void {
510 const label = switch (check_object.obj_format) {
511 .elf => ElfDumper.dynamic_section_label,
512 else => @panic("Unsupported target platform"),
513 };
514 check_object.checkStart(.dynamic_section);
515 check_object.checkExact(label);
516}
517
518/// Creates a new check checking specifically symbol table parsed and dumped from the archive
519/// file.
520pub fn checkInArchiveSymtab(check_object: *CheckObject) void {
521 const label = switch (check_object.obj_format) {
522 .elf => ElfDumper.archive_symtab_label,
523 else => @panic("TODO other file formats"),
524 };
525 check_object.checkStart(.archive_symtab);
526 check_object.checkExact(label);
527}
528
529pub fn dumpSection(check_object: *CheckObject, name: [:0]const u8) void {
530 const check = Check.dumpSection(check_object.step.owner.allocator, name);
531 check_object.checks.append(check) catch @panic("OOM");
532}
533
534/// Creates a new standalone, singular check which allows running simple binary operations
535/// on the extracted variables. It will then compare the reduced program with the value of
536/// the expected variable.
537pub fn checkComputeCompare(
538 check_object: *CheckObject,
539 program: []const u8,
540 expected: ComputeCompareExpected,
541) void {
542 var check = Check.create(check_object.step.owner.allocator, .compute_compare);
543 check.computeCmp(.{ .string = check_object.step.owner.dupe(program) }, expected);
544 check_object.checks.append(check) catch @panic("OOM");
545}
546
547fn make(step: *Step, make_options: Step.MakeOptions) !void {
548 _ = make_options;
549 const b = step.owner;
550 const gpa = b.allocator;
551 const check_object: *CheckObject = @fieldParentPtr("step", step);
552 try step.singleUnchangingWatchInput(check_object.source);
553
554 const src_path = check_object.source.getPath3(b, step);
555 const contents = src_path.root_dir.handle.readFileAllocOptions(
556 src_path.sub_path,
557 gpa,
558 .limited(check_object.max_bytes),
559 .of(u64),
560 null,
561 ) catch |err| return step.fail("unable to read '{f}': {t}", .{
562 std.fmt.alt(src_path, .formatEscapeChar), err,
563 });
564
565 var vars: std.StringHashMap(u64) = .init(gpa);
566 for (check_object.checks.items) |chk| {
567 if (chk.kind == .compute_compare) {
568 assert(chk.actions.items.len == 1);
569 const act = chk.actions.items[0];
570 assert(act.tag == .compute_cmp);
571 const res = act.computeCmp(b, step, vars) catch |err| switch (err) {
572 error.UnknownVariable => return step.fail("Unknown variable", .{}),
573 else => |e| return e,
574 };
575 if (!res) {
576 return step.fail(
577 \\
578 \\========= comparison failed for action: ===========
579 \\{s} {f}
580 \\===================================================
581 , .{ act.phrase.resolve(b, step), act.expected.? });
582 }
583 continue;
584 }
585
586 const output = switch (check_object.obj_format) {
587 .macho => try MachODumper.parseAndDump(step, chk, contents),
588 .elf => try ElfDumper.parseAndDump(step, chk, contents),
589 .coff => return step.fail("TODO coff parser", .{}),
590 .wasm => try WasmDumper.parseAndDump(step, chk, contents),
591 else => unreachable,
592 };
593
594 // Depending on whether we requested dumping section verbatim or not,
595 // we either format message string with escaped codes, or not to aid debugging
596 // the failed test.
597 const fmtMessageString = struct {
598 fn fmtMessageString(kind: Check.Kind, msg: []const u8) std.fmt.Alt(Ctx, formatMessageString) {
599 return .{ .data = .{
600 .kind = kind,
601 .msg = msg,
602 } };
603 }
604
605 const Ctx = struct {
606 kind: Check.Kind,
607 msg: []const u8,
608 };
609
610 fn formatMessageString(ctx: Ctx, w: *Writer) !void {
611 switch (ctx.kind) {
612 .dump_section => try w.print("{f}", .{std.ascii.hexEscape(ctx.msg, .lower)}),
613 else => try w.writeAll(ctx.msg),
614 }
615 }
616 }.fmtMessageString;
617
618 var it = mem.tokenizeAny(u8, output, "\r\n");
619 for (chk.actions.items) |act| {
620 switch (act.tag) {
621 .exact => {
622 while (it.next()) |line| {
623 if (act.exact(b, step, line)) break;
624 } else {
625 return step.fail(
626 \\
627 \\========= expected to find: ==========================
628 \\{f}
629 \\========= but parsed file does not contain it: =======
630 \\{f}
631 \\========= file path: =================================
632 \\{f}
633 , .{
634 fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
635 fmtMessageString(chk.kind, output),
636 src_path,
637 });
638 }
639 },
640
641 .contains => {
642 while (it.next()) |line| {
643 if (act.contains(b, step, line)) break;
644 } else {
645 return step.fail(
646 \\
647 \\========= expected to find: ==========================
648 \\*{f}*
649 \\========= but parsed file does not contain it: =======
650 \\{f}
651 \\========= file path: =================================
652 \\{f}
653 , .{
654 fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
655 fmtMessageString(chk.kind, output),
656 src_path,
657 });
658 }
659 },
660
661 .not_present => {
662 while (it.next()) |line| {
663 if (act.notPresent(b, step, line)) continue;
664 return step.fail(
665 \\
666 \\========= expected not to find: ===================
667 \\{f}
668 \\========= but parsed file does contain it: ========
669 \\{f}
670 \\========= file path: ==============================
671 \\{f}
672 , .{
673 fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
674 fmtMessageString(chk.kind, output),
675 src_path,
676 });
677 }
678 },
679
680 .extract => {
681 while (it.next()) |line| {
682 if (try act.extract(b, step, line, &vars)) break;
683 } else {
684 return step.fail(
685 \\
686 \\========= expected to find and extract: ==============
687 \\{f}
688 \\========= but parsed file does not contain it: =======
689 \\{f}
690 \\========= file path: ==============================
691 \\{f}
692 , .{
693 fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
694 fmtMessageString(chk.kind, output),
695 src_path,
696 });
697 }
698 },
699
700 .compute_cmp => unreachable,
701 }
702 }
703 }
704}
705
706const MachODumper = struct {
707 const dyld_rebase_label = "dyld rebase data";
708 const dyld_bind_label = "dyld bind data";
709 const dyld_weak_bind_label = "dyld weak bind data";
710 const dyld_lazy_bind_label = "dyld lazy bind data";
711 const exports_label = "exports data";
712 const symtab_label = "symbol table";
713 const indirect_symtab_label = "indirect symbol table";
714
715 fn parseAndDump(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
716 // TODO: handle archives and fat files
717 return parseAndDumpObject(step, check, bytes);
718 }
719
720 const ObjectContext = struct {
721 gpa: Allocator,
722 data: []const u8,
723 header: macho.mach_header_64,
724 segments: std.ArrayList(macho.segment_command_64) = .empty,
725 sections: std.ArrayList(macho.section_64) = .empty,
726 symtab: std.ArrayList(macho.nlist_64) = .empty,
727 strtab: std.ArrayList(u8) = .empty,
728 indsymtab: std.ArrayList(u32) = .empty,
729 imports: std.ArrayList([]const u8) = .empty,
730
731 fn parse(ctx: *ObjectContext) !void {
732 var it = try ctx.getLoadCommandIterator();
733 var i: usize = 0;
734 while (try it.next()) |cmd| {
735 switch (cmd.hdr.cmd) {
736 .SEGMENT_64 => {
737 const seg = cmd.cast(macho.segment_command_64).?;
738 try ctx.segments.append(ctx.gpa, seg);
739 try ctx.sections.ensureUnusedCapacity(ctx.gpa, seg.nsects);
740 for (cmd.getSections()) |sect| {
741 ctx.sections.appendAssumeCapacity(sect);
742 }
743 },
744 .SYMTAB => {
745 const lc = cmd.cast(macho.symtab_command).?;
746 const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(ctx.data.ptr + lc.symoff))[0..lc.nsyms];
747 const strtab = ctx.data[lc.stroff..][0..lc.strsize];
748 try ctx.symtab.appendUnalignedSlice(ctx.gpa, symtab);
749 try ctx.strtab.appendSlice(ctx.gpa, strtab);
750 },
751 .DYSYMTAB => {
752 const lc = cmd.cast(macho.dysymtab_command).?;
753 const indexes = @as([*]align(1) const u32, @ptrCast(ctx.data.ptr + lc.indirectsymoff))[0..lc.nindirectsyms];
754 try ctx.indsymtab.appendUnalignedSlice(ctx.gpa, indexes);
755 },
756 .LOAD_DYLIB,
757 .LOAD_WEAK_DYLIB,
758 .REEXPORT_DYLIB,
759 => {
760 try ctx.imports.append(ctx.gpa, cmd.getDylibPathName());
761 },
762 else => {},
763 }
764
765 i += 1;
766 }
767 }
768
769 fn getString(ctx: ObjectContext, off: u32) [:0]const u8 {
770 assert(off < ctx.strtab.items.len);
771 return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0);
772 }
773
774 fn getLoadCommandIterator(ctx: ObjectContext) !macho.LoadCommandIterator {
775 return .init(&ctx.header, ctx.data[@sizeOf(macho.mach_header_64)..]);
776 }
777
778 fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) !?macho.LoadCommandIterator.LoadCommand {
779 var it = try ctx.getLoadCommandIterator();
780 while (try it.next()) |lc| if (lc.hdr.cmd == cmd) {
781 return lc;
782 };
783 return null;
784 }
785
786 fn getSegmentByName(ctx: ObjectContext, name: []const u8) ?macho.segment_command_64 {
787 for (ctx.segments.items) |seg| {
788 if (mem.eql(u8, seg.segName(), name)) return seg;
789 }
790 return null;
791 }
792
793 fn getSectionByName(ctx: ObjectContext, segname: []const u8, sectname: []const u8) ?macho.section_64 {
794 for (ctx.sections.items) |sect| {
795 if (mem.eql(u8, sect.segName(), segname) and mem.eql(u8, sect.sectName(), sectname)) return sect;
796 }
797 return null;
798 }
799
800 fn dumpHeader(hdr: macho.mach_header_64, writer: anytype) !void {
801 const cputype = switch (hdr.cputype) {
802 macho.CPU_TYPE_ARM64 => "ARM64",
803 macho.CPU_TYPE_X86_64 => "X86_64",
804 else => "Unknown",
805 };
806 const filetype = switch (hdr.filetype) {
807 macho.MH_OBJECT => "MH_OBJECT",
808 macho.MH_EXECUTE => "MH_EXECUTE",
809 macho.MH_FVMLIB => "MH_FVMLIB",
810 macho.MH_CORE => "MH_CORE",
811 macho.MH_PRELOAD => "MH_PRELOAD",
812 macho.MH_DYLIB => "MH_DYLIB",
813 macho.MH_DYLINKER => "MH_DYLINKER",
814 macho.MH_BUNDLE => "MH_BUNDLE",
815 macho.MH_DYLIB_STUB => "MH_DYLIB_STUB",
816 macho.MH_DSYM => "MH_DSYM",
817 macho.MH_KEXT_BUNDLE => "MH_KEXT_BUNDLE",
818 else => "Unknown",
819 };
820
821 try writer.print(
822 \\header
823 \\cputype {s}
824 \\filetype {s}
825 \\ncmds {d}
826 \\sizeofcmds {x}
827 \\flags
828 , .{
829 cputype,
830 filetype,
831 hdr.ncmds,
832 hdr.sizeofcmds,
833 });
834
835 if (hdr.flags > 0) {
836 if (hdr.flags & macho.MH_NOUNDEFS != 0) try writer.writeAll(" NOUNDEFS");
837 if (hdr.flags & macho.MH_INCRLINK != 0) try writer.writeAll(" INCRLINK");
838 if (hdr.flags & macho.MH_DYLDLINK != 0) try writer.writeAll(" DYLDLINK");
839 if (hdr.flags & macho.MH_BINDATLOAD != 0) try writer.writeAll(" BINDATLOAD");
840 if (hdr.flags & macho.MH_PREBOUND != 0) try writer.writeAll(" PREBOUND");
841 if (hdr.flags & macho.MH_SPLIT_SEGS != 0) try writer.writeAll(" SPLIT_SEGS");
842 if (hdr.flags & macho.MH_LAZY_INIT != 0) try writer.writeAll(" LAZY_INIT");
843 if (hdr.flags & macho.MH_TWOLEVEL != 0) try writer.writeAll(" TWOLEVEL");
844 if (hdr.flags & macho.MH_FORCE_FLAT != 0) try writer.writeAll(" FORCE_FLAT");
845 if (hdr.flags & macho.MH_NOMULTIDEFS != 0) try writer.writeAll(" NOMULTIDEFS");
846 if (hdr.flags & macho.MH_NOFIXPREBINDING != 0) try writer.writeAll(" NOFIXPREBINDING");
847 if (hdr.flags & macho.MH_PREBINDABLE != 0) try writer.writeAll(" PREBINDABLE");
848 if (hdr.flags & macho.MH_ALLMODSBOUND != 0) try writer.writeAll(" ALLMODSBOUND");
849 if (hdr.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) try writer.writeAll(" SUBSECTIONS_VIA_SYMBOLS");
850 if (hdr.flags & macho.MH_CANONICAL != 0) try writer.writeAll(" CANONICAL");
851 if (hdr.flags & macho.MH_WEAK_DEFINES != 0) try writer.writeAll(" WEAK_DEFINES");
852 if (hdr.flags & macho.MH_BINDS_TO_WEAK != 0) try writer.writeAll(" BINDS_TO_WEAK");
853 if (hdr.flags & macho.MH_ALLOW_STACK_EXECUTION != 0) try writer.writeAll(" ALLOW_STACK_EXECUTION");
854 if (hdr.flags & macho.MH_ROOT_SAFE != 0) try writer.writeAll(" ROOT_SAFE");
855 if (hdr.flags & macho.MH_SETUID_SAFE != 0) try writer.writeAll(" SETUID_SAFE");
856 if (hdr.flags & macho.MH_NO_REEXPORTED_DYLIBS != 0) try writer.writeAll(" NO_REEXPORTED_DYLIBS");
857 if (hdr.flags & macho.MH_PIE != 0) try writer.writeAll(" PIE");
858 if (hdr.flags & macho.MH_DEAD_STRIPPABLE_DYLIB != 0) try writer.writeAll(" DEAD_STRIPPABLE_DYLIB");
859 if (hdr.flags & macho.MH_HAS_TLV_DESCRIPTORS != 0) try writer.writeAll(" HAS_TLV_DESCRIPTORS");
860 if (hdr.flags & macho.MH_NO_HEAP_EXECUTION != 0) try writer.writeAll(" NO_HEAP_EXECUTION");
861 if (hdr.flags & macho.MH_APP_EXTENSION_SAFE != 0) try writer.writeAll(" APP_EXTENSION_SAFE");
862 if (hdr.flags & macho.MH_NLIST_OUTOFSYNC_WITH_DYLDINFO != 0) try writer.writeAll(" NLIST_OUTOFSYNC_WITH_DYLDINFO");
863 }
864
865 try writer.writeByte('\n');
866 }
867
868 fn dumpLoadCommand(lc: macho.LoadCommandIterator.LoadCommand, index: usize, writer: anytype) !void {
869 // print header first
870 try writer.print(
871 \\LC {d}
872 \\cmd {s}
873 \\cmdsize {d}
874 , .{ index, @tagName(lc.hdr.cmd), lc.hdr.cmdsize });
875
876 switch (lc.hdr.cmd) {
877 .SEGMENT_64 => {
878 const seg = lc.cast(macho.segment_command_64).?;
879 try writer.writeByte('\n');
880 try writer.print(
881 \\segname {s}
882 \\vmaddr {x}
883 \\vmsize {x}
884 \\fileoff {x}
885 \\filesz {x}
886 , .{
887 seg.segName(),
888 seg.vmaddr,
889 seg.vmsize,
890 seg.fileoff,
891 seg.filesize,
892 });
893
894 for (lc.getSections()) |sect| {
895 try writer.writeByte('\n');
896 try writer.print(
897 \\sectname {s}
898 \\addr {x}
899 \\size {x}
900 \\offset {x}
901 \\align {x}
902 , .{
903 sect.sectName(),
904 sect.addr,
905 sect.size,
906 sect.offset,
907 sect.@"align",
908 });
909 }
910 },
911
912 .ID_DYLIB,
913 .LOAD_DYLIB,
914 .LOAD_WEAK_DYLIB,
915 .REEXPORT_DYLIB,
916 => {
917 const dylib = lc.cast(macho.dylib_command).?;
918 try writer.writeByte('\n');
919 try writer.print(
920 \\name {s}
921 \\timestamp {d}
922 \\current version {x}
923 \\compatibility version {x}
924 , .{
925 lc.getDylibPathName(),
926 dylib.dylib.timestamp,
927 dylib.dylib.current_version,
928 dylib.dylib.compatibility_version,
929 });
930 },
931
932 .MAIN => {
933 const main = lc.cast(macho.entry_point_command).?;
934 try writer.writeByte('\n');
935 try writer.print(
936 \\entryoff {x}
937 \\stacksize {x}
938 , .{ main.entryoff, main.stacksize });
939 },
940
941 .RPATH => {
942 try writer.writeByte('\n');
943 try writer.print(
944 \\path {s}
945 , .{
946 lc.getRpathPathName(),
947 });
948 },
949
950 .UUID => {
951 const uuid = lc.cast(macho.uuid_command).?;
952 try writer.writeByte('\n');
953 try writer.print("uuid {x}", .{&uuid.uuid});
954 },
955
956 .DATA_IN_CODE,
957 .FUNCTION_STARTS,
958 .CODE_SIGNATURE,
959 => {
960 const llc = lc.cast(macho.linkedit_data_command).?;
961 try writer.writeByte('\n');
962 try writer.print(
963 \\dataoff {x}
964 \\datasize {x}
965 , .{ llc.dataoff, llc.datasize });
966 },
967
968 .DYLD_INFO_ONLY => {
969 const dlc = lc.cast(macho.dyld_info_command).?;
970 try writer.writeByte('\n');
971 try writer.print(
972 \\rebaseoff {x}
973 \\rebasesize {x}
974 \\bindoff {x}
975 \\bindsize {x}
976 \\weakbindoff {x}
977 \\weakbindsize {x}
978 \\lazybindoff {x}
979 \\lazybindsize {x}
980 \\exportoff {x}
981 \\exportsize {x}
982 , .{
983 dlc.rebase_off,
984 dlc.rebase_size,
985 dlc.bind_off,
986 dlc.bind_size,
987 dlc.weak_bind_off,
988 dlc.weak_bind_size,
989 dlc.lazy_bind_off,
990 dlc.lazy_bind_size,
991 dlc.export_off,
992 dlc.export_size,
993 });
994 },
995
996 .SYMTAB => {
997 const slc = lc.cast(macho.symtab_command).?;
998 try writer.writeByte('\n');
999 try writer.print(
1000 \\symoff {x}
1001 \\nsyms {x}
1002 \\stroff {x}
1003 \\strsize {x}
1004 , .{
1005 slc.symoff,
1006 slc.nsyms,
1007 slc.stroff,
1008 slc.strsize,
1009 });
1010 },
1011
1012 .DYSYMTAB => {
1013 const dlc = lc.cast(macho.dysymtab_command).?;
1014 try writer.writeByte('\n');
1015 try writer.print(
1016 \\ilocalsym {x}
1017 \\nlocalsym {x}
1018 \\iextdefsym {x}
1019 \\nextdefsym {x}
1020 \\iundefsym {x}
1021 \\nundefsym {x}
1022 \\indirectsymoff {x}
1023 \\nindirectsyms {x}
1024 , .{
1025 dlc.ilocalsym,
1026 dlc.nlocalsym,
1027 dlc.iextdefsym,
1028 dlc.nextdefsym,
1029 dlc.iundefsym,
1030 dlc.nundefsym,
1031 dlc.indirectsymoff,
1032 dlc.nindirectsyms,
1033 });
1034 },
1035
1036 .BUILD_VERSION => {
1037 const blc = lc.cast(macho.build_version_command).?;
1038 try writer.writeByte('\n');
1039 try writer.print(
1040 \\platform {s}
1041 \\minos {d}.{d}.{d}
1042 \\sdk {d}.{d}.{d}
1043 \\ntools {d}
1044 , .{
1045 @tagName(blc.platform),
1046 blc.minos >> 16,
1047 @as(u8, @truncate(blc.minos >> 8)),
1048 @as(u8, @truncate(blc.minos)),
1049 blc.sdk >> 16,
1050 @as(u8, @truncate(blc.sdk >> 8)),
1051 @as(u8, @truncate(blc.sdk)),
1052 blc.ntools,
1053 });
1054 for (lc.getBuildVersionTools()) |tool| {
1055 try writer.writeByte('\n');
1056 switch (tool.tool) {
1057 .CLANG, .SWIFT, .LD, .LLD, .ZIG => try writer.print("tool {s}\n", .{@tagName(tool.tool)}),
1058 else => |x| try writer.print("tool {d}\n", .{@intFromEnum(x)}),
1059 }
1060 try writer.print(
1061 \\version {d}.{d}.{d}
1062 , .{
1063 tool.version >> 16,
1064 @as(u8, @truncate(tool.version >> 8)),
1065 @as(u8, @truncate(tool.version)),
1066 });
1067 }
1068 },
1069
1070 .VERSION_MIN_MACOSX,
1071 .VERSION_MIN_IPHONEOS,
1072 .VERSION_MIN_WATCHOS,
1073 .VERSION_MIN_TVOS,
1074 => {
1075 const vlc = lc.cast(macho.version_min_command).?;
1076 try writer.writeByte('\n');
1077 try writer.print(
1078 \\version {d}.{d}.{d}
1079 \\sdk {d}.{d}.{d}
1080 , .{
1081 vlc.version >> 16,
1082 @as(u8, @truncate(vlc.version >> 8)),
1083 @as(u8, @truncate(vlc.version)),
1084 vlc.sdk >> 16,
1085 @as(u8, @truncate(vlc.sdk >> 8)),
1086 @as(u8, @truncate(vlc.sdk)),
1087 });
1088 },
1089
1090 else => {},
1091 }
1092 }
1093
1094 fn dumpSymtab(ctx: ObjectContext, writer: anytype) !void {
1095 try writer.writeAll(symtab_label ++ "\n");
1096
1097 for (ctx.symtab.items) |sym| {
1098 const sym_name = ctx.getString(sym.n_strx);
1099 if (sym.n_type.bits.is_stab != 0) {
1100 const tt = switch (sym.n_type.stab) {
1101 _ => "UNKNOWN STAB",
1102 else => @tagName(sym.n_type.stab),
1103 };
1104 try writer.print("{x}", .{sym.n_value});
1105 if (sym.n_sect > 0) {
1106 const sect = ctx.sections.items[sym.n_sect - 1];
1107 try writer.print(" ({s},{s})", .{ sect.segName(), sect.sectName() });
1108 }
1109 try writer.print(" {s} (stab) {s}\n", .{ tt, sym_name });
1110 } else if (sym.n_type.bits.type == .sect) {
1111 const sect = ctx.sections.items[sym.n_sect - 1];
1112 try writer.print("{x} ({s},{s})", .{
1113 sym.n_value,
1114 sect.segName(),
1115 sect.sectName(),
1116 });
1117 if (sym.n_desc.referenced_dynamically) try writer.writeAll(" [referenced dynamically]");
1118 if (sym.n_desc.weak_def_or_ref_to_weak) try writer.writeAll(" weak");
1119 if (sym.n_desc.weak_ref) try writer.writeAll(" weakref");
1120 if (sym.n_type.bits.ext) {
1121 if (sym.n_type.bits.pext) try writer.writeAll(" private");
1122 try writer.writeAll(" external");
1123 } else if (sym.n_type.bits.pext) try writer.writeAll(" (was private external)");
1124 try writer.print(" {s}\n", .{sym_name});
1125 } else if (sym.tentative()) {
1126 const alignment = (@as(u16, @bitCast(sym.n_desc)) >> 8) & 0x0F;
1127 try writer.print(" 0x{x:0>16} (common) (alignment 2^{d})", .{ sym.n_value, alignment });
1128 if (sym.n_type.bits.ext) try writer.writeAll(" external");
1129 try writer.print(" {s}\n", .{sym_name});
1130 } else if (sym.n_type.bits.type == .undf) {
1131 const ordinal = @divFloor(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER);
1132 const import_name = blk: {
1133 if (ordinal <= 0) {
1134 if (ordinal == macho.BIND_SPECIAL_DYLIB_SELF)
1135 break :blk "self import";
1136 if (ordinal == macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
1137 break :blk "main executable";
1138 if (ordinal == macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
1139 break :blk "flat lookup";
1140 unreachable;
1141 }
1142 const full_path = ctx.imports.items[@as(u16, @bitCast(ordinal)) - 1];
1143 const basename = fs.path.basename(full_path);
1144 assert(basename.len > 0);
1145 const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len;
1146 break :blk basename[0..ext];
1147 };
1148 try writer.writeAll("(undefined)");
1149 if (sym.n_desc.weak_ref) try writer.writeAll(" weakref");
1150 if (sym.n_type.bits.ext) try writer.writeAll(" external");
1151 try writer.print(" {s} (from {s})\n", .{
1152 sym_name,
1153 import_name,
1154 });
1155 }
1156 }
1157 }
1158
1159 fn dumpIndirectSymtab(ctx: ObjectContext, writer: anytype) !void {
1160 try writer.writeAll(indirect_symtab_label ++ "\n");
1161
1162 var sects_buffer: [3]macho.section_64 = undefined;
1163 const sects = blk: {
1164 var count: usize = 0;
1165 if (ctx.getSectionByName("__TEXT", "__stubs")) |sect| {
1166 sects_buffer[count] = sect;
1167 count += 1;
1168 }
1169 if (ctx.getSectionByName("__DATA_CONST", "__got")) |sect| {
1170 sects_buffer[count] = sect;
1171 count += 1;
1172 }
1173 if (ctx.getSectionByName("__DATA", "__la_symbol_ptr")) |sect| {
1174 sects_buffer[count] = sect;
1175 count += 1;
1176 }
1177 break :blk sects_buffer[0..count];
1178 };
1179
1180 const sortFn = struct {
1181 fn sortFn(c: void, lhs: macho.section_64, rhs: macho.section_64) bool {
1182 _ = c;
1183 return lhs.reserved1 < rhs.reserved1;
1184 }
1185 }.sortFn;
1186 mem.sort(macho.section_64, sects, {}, sortFn);
1187
1188 var i: usize = 0;
1189 while (i < sects.len) : (i += 1) {
1190 const sect = sects[i];
1191 const start = sect.reserved1;
1192 const end = if (i + 1 >= sects.len) ctx.indsymtab.items.len else sects[i + 1].reserved1;
1193 const entry_size = blk: {
1194 if (mem.eql(u8, sect.sectName(), "__stubs")) break :blk sect.reserved2;
1195 break :blk @sizeOf(u64);
1196 };
1197
1198 try writer.print("{s},{s}\n", .{ sect.segName(), sect.sectName() });
1199 try writer.print("nentries {d}\n", .{end - start});
1200 for (ctx.indsymtab.items[start..end], 0..) |index, j| {
1201 const sym = ctx.symtab.items[index];
1202 const addr = sect.addr + entry_size * j;
1203 try writer.print("0x{x} {d} {s}\n", .{ addr, index, ctx.getString(sym.n_strx) });
1204 }
1205 }
1206 }
1207
1208 fn dumpRebaseInfo(ctx: ObjectContext, data: []const u8, writer: anytype) !void {
1209 var rebases = std.array_list.Managed(u64).init(ctx.gpa);
1210 defer rebases.deinit();
1211 try ctx.parseRebaseInfo(data, &rebases);
1212 mem.sort(u64, rebases.items, {}, std.sort.asc(u64));
1213 for (rebases.items) |addr| {
1214 try writer.print("0x{x}\n", .{addr});
1215 }
1216 }
1217
1218 fn parseRebaseInfo(ctx: ObjectContext, data: []const u8, rebases: *std.array_list.Managed(u64)) !void {
1219 var reader: std.Io.Reader = .fixed(data);
1220
1221 var seg_id: ?u8 = null;
1222 var offset: u64 = 0;
1223 while (true) {
1224 const byte = reader.takeByte() catch break;
1225 const opc = byte & macho.REBASE_OPCODE_MASK;
1226 const imm = byte & macho.REBASE_IMMEDIATE_MASK;
1227 switch (opc) {
1228 macho.REBASE_OPCODE_DONE => break,
1229 macho.REBASE_OPCODE_SET_TYPE_IMM => {},
1230 macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
1231 seg_id = imm;
1232 offset = try reader.takeLeb128(u64);
1233 },
1234 macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED => {
1235 offset += imm * @sizeOf(u64);
1236 },
1237 macho.REBASE_OPCODE_ADD_ADDR_ULEB => {
1238 const addend = try reader.takeLeb128(u64);
1239 offset += addend;
1240 },
1241 macho.REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB => {
1242 const addend = try reader.takeLeb128(u64);
1243 const seg = ctx.segments.items[seg_id.?];
1244 const addr = seg.vmaddr + offset;
1245 try rebases.append(addr);
1246 offset += addend + @sizeOf(u64);
1247 },
1248 macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES,
1249 macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES,
1250 macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB,
1251 => {
1252 var ntimes: u64 = 1;
1253 var skip: u64 = 0;
1254 switch (opc) {
1255 macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES => {
1256 ntimes = imm;
1257 },
1258 macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES => {
1259 ntimes = try reader.takeLeb128(u64);
1260 },
1261 macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB => {
1262 ntimes = try reader.takeLeb128(u64);
1263 skip = try reader.takeLeb128(u64);
1264 },
1265 else => unreachable,
1266 }
1267 const seg = ctx.segments.items[seg_id.?];
1268 const base_addr = seg.vmaddr;
1269 var count: usize = 0;
1270 while (count < ntimes) : (count += 1) {
1271 const addr = base_addr + offset;
1272 try rebases.append(addr);
1273 offset += skip + @sizeOf(u64);
1274 }
1275 },
1276 else => break,
1277 }
1278 }
1279 }
1280
1281 const Binding = struct {
1282 address: u64,
1283 addend: i64,
1284 ordinal: u16,
1285 tag: Tag,
1286 name: []const u8,
1287
1288 fn deinit(binding: *Binding, gpa: Allocator) void {
1289 gpa.free(binding.name);
1290 }
1291
1292 fn lessThan(ctx: void, lhs: Binding, rhs: Binding) bool {
1293 _ = ctx;
1294 return lhs.address < rhs.address;
1295 }
1296
1297 const Tag = enum {
1298 ord,
1299 self,
1300 exe,
1301 flat,
1302 };
1303 };
1304
1305 fn dumpBindInfo(ctx: ObjectContext, data: []const u8, writer: anytype) !void {
1306 var bindings = std.array_list.Managed(Binding).init(ctx.gpa);
1307 defer {
1308 for (bindings.items) |*b| {
1309 b.deinit(ctx.gpa);
1310 }
1311 bindings.deinit();
1312 }
1313 var data_reader: std.Io.Reader = .fixed(data);
1314 try ctx.parseBindInfo(&data_reader, &bindings);
1315 mem.sort(Binding, bindings.items, {}, Binding.lessThan);
1316 for (bindings.items) |binding| {
1317 try writer.print("0x{x} [addend: {d}]", .{ binding.address, binding.addend });
1318 try writer.writeAll(" (");
1319 switch (binding.tag) {
1320 .self => try writer.writeAll("self"),
1321 .exe => try writer.writeAll("main executable"),
1322 .flat => try writer.writeAll("flat lookup"),
1323 .ord => try writer.writeAll(std.fs.path.basename(ctx.imports.items[binding.ordinal - 1])),
1324 }
1325 try writer.print(") {s}\n", .{binding.name});
1326 }
1327 }
1328
1329 fn parseBindInfo(ctx: ObjectContext, reader: *std.Io.Reader, bindings: *std.array_list.Managed(Binding)) !void {
1330 var seg_id: ?u8 = null;
1331 var tag: Binding.Tag = .self;
1332 var ordinal: u16 = 0;
1333 var offset: u64 = 0;
1334 var addend: i64 = 0;
1335
1336 var name_buf = std.array_list.Managed(u8).init(ctx.gpa);
1337 defer name_buf.deinit();
1338
1339 while (true) {
1340 const byte = reader.takeByte() catch break;
1341 const opc = byte & macho.BIND_OPCODE_MASK;
1342 const imm = byte & macho.BIND_IMMEDIATE_MASK;
1343 switch (opc) {
1344 macho.BIND_OPCODE_DONE,
1345 macho.BIND_OPCODE_SET_TYPE_IMM,
1346 => {},
1347 macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
1348 tag = .ord;
1349 ordinal = imm;
1350 },
1351 macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => {
1352 switch (imm) {
1353 0 => tag = .self,
1354 0xf => tag = .exe,
1355 0xe => tag = .flat,
1356 else => unreachable,
1357 }
1358 },
1359 macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
1360 seg_id = imm;
1361 offset = try reader.takeLeb128(u64);
1362 },
1363 macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
1364 name_buf.clearRetainingCapacity();
1365 try name_buf.appendSlice(try reader.takeDelimiterInclusive(0));
1366 },
1367 macho.BIND_OPCODE_SET_ADDEND_SLEB => {
1368 addend = try reader.takeLeb128(i64);
1369 },
1370 macho.BIND_OPCODE_ADD_ADDR_ULEB => {
1371 const x = try reader.takeLeb128(u64);
1372 offset = @intCast(@as(i64, @intCast(offset)) + @as(i64, @bitCast(x)));
1373 },
1374 macho.BIND_OPCODE_DO_BIND,
1375 macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB,
1376 macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED,
1377 macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB,
1378 => {
1379 var add_addr: u64 = 0;
1380 var count: u64 = 1;
1381 var skip: u64 = 0;
1382
1383 switch (opc) {
1384 macho.BIND_OPCODE_DO_BIND => {},
1385 macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => {
1386 add_addr = try reader.takeLeb128(u64);
1387 },
1388 macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => {
1389 add_addr = imm * @sizeOf(u64);
1390 },
1391 macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => {
1392 count = try reader.takeLeb128(u64);
1393 skip = try reader.takeLeb128(u64);
1394 },
1395 else => unreachable,
1396 }
1397
1398 const seg = ctx.segments.items[seg_id.?];
1399 var i: u64 = 0;
1400 while (i < count) : (i += 1) {
1401 const addr: u64 = @intCast(@as(i64, @intCast(seg.vmaddr + offset)));
1402 try bindings.append(.{
1403 .address = addr,
1404 .addend = addend,
1405 .tag = tag,
1406 .ordinal = ordinal,
1407 .name = try ctx.gpa.dupe(u8, name_buf.items),
1408 });
1409 offset += skip + @sizeOf(u64) + add_addr;
1410 }
1411 },
1412 else => break,
1413 }
1414 }
1415 }
1416
1417 fn dumpExportsTrie(ctx: ObjectContext, data: []const u8, writer: anytype) !void {
1418 const seg = ctx.getSegmentByName("__TEXT") orelse return;
1419
1420 var arena = std.heap.ArenaAllocator.init(ctx.gpa);
1421 defer arena.deinit();
1422
1423 var exports = std.array_list.Managed(Export).init(arena.allocator());
1424 var it: TrieIterator = .{ .stream = .fixed(data) };
1425 try parseTrieNode(arena.allocator(), &it, "", &exports);
1426
1427 mem.sort(Export, exports.items, {}, Export.lessThan);
1428
1429 for (exports.items) |exp| {
1430 switch (exp.tag) {
1431 .@"export" => {
1432 const info = exp.data.@"export";
1433 if (info.kind != .regular or info.weak) {
1434 try writer.writeByte('[');
1435 }
1436 switch (info.kind) {
1437 .regular => {},
1438 .absolute => try writer.writeAll("ABS, "),
1439 .tlv => try writer.writeAll("THREAD_LOCAL, "),
1440 }
1441 if (info.weak) try writer.writeAll("WEAK");
1442 if (info.kind != .regular or info.weak) {
1443 try writer.writeAll("] ");
1444 }
1445 try writer.print("{x} ", .{seg.vmaddr + info.vmoffset});
1446 },
1447 else => {},
1448 }
1449
1450 try writer.print("{s}\n", .{exp.name});
1451 }
1452 }
1453
1454 const TrieIterator = struct {
1455 stream: std.Io.Reader,
1456
1457 fn takeLeb128(it: *TrieIterator) !u64 {
1458 return it.stream.takeLeb128(u64);
1459 }
1460
1461 fn readString(it: *TrieIterator) ![:0]const u8 {
1462 return it.stream.takeSentinel(0);
1463 }
1464
1465 fn takeByte(it: *TrieIterator) !u8 {
1466 return it.stream.takeByte();
1467 }
1468 };
1469
1470 const Export = struct {
1471 name: []const u8,
1472 tag: enum { @"export", reexport, stub_resolver },
1473 data: union {
1474 @"export": struct {
1475 kind: enum { regular, absolute, tlv },
1476 weak: bool = false,
1477 vmoffset: u64,
1478 },
1479 reexport: u64,
1480 stub_resolver: struct {
1481 stub_offset: u64,
1482 resolver_offset: u64,
1483 },
1484 },
1485
1486 inline fn rankByTag(@"export": Export) u3 {
1487 return switch (@"export".tag) {
1488 .@"export" => 1,
1489 .reexport => 2,
1490 .stub_resolver => 3,
1491 };
1492 }
1493
1494 fn lessThan(ctx: void, lhs: Export, rhs: Export) bool {
1495 _ = ctx;
1496 if (lhs.rankByTag() == rhs.rankByTag()) {
1497 return switch (lhs.tag) {
1498 .@"export" => lhs.data.@"export".vmoffset < rhs.data.@"export".vmoffset,
1499 .reexport => lhs.data.reexport < rhs.data.reexport,
1500 .stub_resolver => lhs.data.stub_resolver.stub_offset < rhs.data.stub_resolver.stub_offset,
1501 };
1502 }
1503 return lhs.rankByTag() < rhs.rankByTag();
1504 }
1505 };
1506
1507 fn parseTrieNode(
1508 arena: Allocator,
1509 it: *TrieIterator,
1510 prefix: []const u8,
1511 exports: *std.array_list.Managed(Export),
1512 ) !void {
1513 const size = try it.takeLeb128();
1514 if (size > 0) {
1515 const flags = try it.takeLeb128();
1516 switch (flags) {
1517 macho.EXPORT_SYMBOL_FLAGS_REEXPORT => {
1518 const ord = try it.takeLeb128();
1519 const name = try arena.dupe(u8, try it.readString());
1520 try exports.append(.{
1521 .name = if (name.len > 0) name else prefix,
1522 .tag = .reexport,
1523 .data = .{ .reexport = ord },
1524 });
1525 },
1526 macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER => {
1527 const stub_offset = try it.takeLeb128();
1528 const resolver_offset = try it.takeLeb128();
1529 try exports.append(.{
1530 .name = prefix,
1531 .tag = .stub_resolver,
1532 .data = .{ .stub_resolver = .{
1533 .stub_offset = stub_offset,
1534 .resolver_offset = resolver_offset,
1535 } },
1536 });
1537 },
1538 else => {
1539 const vmoff = try it.takeLeb128();
1540 try exports.append(.{
1541 .name = prefix,
1542 .tag = .@"export",
1543 .data = .{ .@"export" = .{
1544 .kind = switch (flags & macho.EXPORT_SYMBOL_FLAGS_KIND_MASK) {
1545 macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR => .regular,
1546 macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE => .absolute,
1547 macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL => .tlv,
1548 else => unreachable,
1549 },
1550 .weak = flags & macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0,
1551 .vmoffset = vmoff,
1552 } },
1553 });
1554 },
1555 }
1556 }
1557
1558 const nedges = try it.takeByte();
1559 for (0..nedges) |_| {
1560 const label = try it.readString();
1561 const off = try it.takeLeb128();
1562 const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label });
1563 const curr = it.stream.seek;
1564 it.stream.seek = off;
1565 try parseTrieNode(arena, it, prefix_label, exports);
1566 it.stream.seek = curr;
1567 }
1568 }
1569
1570 fn dumpSection(ctx: ObjectContext, sect: macho.section_64, writer: anytype) !void {
1571 const data = ctx.data[sect.offset..][0..sect.size];
1572 try writer.print("{s}", .{data});
1573 }
1574 };
1575
1576 fn parseAndDumpObject(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
1577 const gpa = step.owner.allocator;
1578 const hdr = @as(*align(1) const macho.mach_header_64, @ptrCast(bytes.ptr)).*;
1579 if (hdr.magic != macho.MH_MAGIC_64) {
1580 return error.InvalidMagicNumber;
1581 }
1582
1583 var ctx = ObjectContext{ .gpa = gpa, .data = bytes, .header = hdr };
1584 try ctx.parse();
1585
1586 var output: std.Io.Writer.Allocating = .init(gpa);
1587 defer output.deinit();
1588 const writer = &output.writer;
1589
1590 switch (check.kind) {
1591 .headers => {
1592 try ObjectContext.dumpHeader(ctx.header, writer);
1593
1594 var it = try ctx.getLoadCommandIterator();
1595 var i: usize = 0;
1596 while (try it.next()) |cmd| {
1597 try ObjectContext.dumpLoadCommand(cmd, i, writer);
1598 try writer.writeByte('\n');
1599
1600 i += 1;
1601 }
1602 },
1603
1604 .symtab => if (ctx.symtab.items.len > 0) {
1605 try ctx.dumpSymtab(writer);
1606 } else return step.fail("no symbol table found", .{}),
1607
1608 .indirect_symtab => if (ctx.symtab.items.len > 0 and ctx.indsymtab.items.len > 0) {
1609 try ctx.dumpIndirectSymtab(writer);
1610 } else return step.fail("no indirect symbol table found", .{}),
1611
1612 .dyld_rebase,
1613 .dyld_bind,
1614 .dyld_weak_bind,
1615 .dyld_lazy_bind,
1616 => {
1617 const cmd = try ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse
1618 return step.fail("no dyld info found", .{});
1619 const lc = cmd.cast(macho.dyld_info_command).?;
1620
1621 switch (check.kind) {
1622 .dyld_rebase => if (lc.rebase_size > 0) {
1623 const data = ctx.data[lc.rebase_off..][0..lc.rebase_size];
1624 try writer.writeAll(dyld_rebase_label ++ "\n");
1625 try ctx.dumpRebaseInfo(data, writer);
1626 } else return step.fail("no rebase data found", .{}),
1627
1628 .dyld_bind => if (lc.bind_size > 0) {
1629 const data = ctx.data[lc.bind_off..][0..lc.bind_size];
1630 try writer.writeAll(dyld_bind_label ++ "\n");
1631 try ctx.dumpBindInfo(data, writer);
1632 } else return step.fail("no bind data found", .{}),
1633
1634 .dyld_weak_bind => if (lc.weak_bind_size > 0) {
1635 const data = ctx.data[lc.weak_bind_off..][0..lc.weak_bind_size];
1636 try writer.writeAll(dyld_weak_bind_label ++ "\n");
1637 try ctx.dumpBindInfo(data, writer);
1638 } else return step.fail("no weak bind data found", .{}),
1639
1640 .dyld_lazy_bind => if (lc.lazy_bind_size > 0) {
1641 const data = ctx.data[lc.lazy_bind_off..][0..lc.lazy_bind_size];
1642 try writer.writeAll(dyld_lazy_bind_label ++ "\n");
1643 try ctx.dumpBindInfo(data, writer);
1644 } else return step.fail("no lazy bind data found", .{}),
1645
1646 else => unreachable,
1647 }
1648 },
1649
1650 .exports => blk: {
1651 if (try ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| {
1652 const lc = cmd.cast(macho.dyld_info_command).?;
1653 if (lc.export_size > 0) {
1654 const data = ctx.data[lc.export_off..][0..lc.export_size];
1655 try writer.writeAll(exports_label ++ "\n");
1656 try ctx.dumpExportsTrie(data, writer);
1657 break :blk;
1658 }
1659 }
1660 return step.fail("no exports data found", .{});
1661 },
1662
1663 .dump_section => {
1664 const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(check.data.items.ptr + check.payload.dump_section)), 0);
1665 const sep_index = mem.indexOfScalar(u8, name, ',') orelse
1666 return step.fail("invalid section name: {s}", .{name});
1667 const segname = name[0..sep_index];
1668 const sectname = name[sep_index + 1 ..];
1669 const sect = ctx.getSectionByName(segname, sectname) orelse
1670 return step.fail("section '{s}' not found", .{name});
1671 try ctx.dumpSection(sect, writer);
1672 },
1673
1674 else => return step.fail("invalid check kind for MachO file format: {s}", .{@tagName(check.kind)}),
1675 }
1676
1677 return output.toOwnedSlice();
1678 }
1679};
1680
1681const ElfDumper = struct {
1682 const symtab_label = "symbol table";
1683 const dynamic_symtab_label = "dynamic symbol table";
1684 const dynamic_section_label = "dynamic section";
1685 const archive_symtab_label = "archive symbol table";
1686
1687 fn parseAndDump(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
1688 return parseAndDumpArchive(step, check, bytes) catch |err| switch (err) {
1689 error.InvalidArchiveMagicNumber => try parseAndDumpObject(step, check, bytes),
1690 else => |e| return e,
1691 };
1692 }
1693
1694 fn parseAndDumpArchive(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
1695 const gpa = step.owner.allocator;
1696 var reader: std.Io.Reader = .fixed(bytes);
1697
1698 const magic = try reader.takeArray(elf.ARMAG.len);
1699 if (!mem.eql(u8, magic, elf.ARMAG)) {
1700 return error.InvalidArchiveMagicNumber;
1701 }
1702
1703 var ctx = ArchiveContext{
1704 .gpa = gpa,
1705 .data = bytes,
1706 .strtab = &[0]u8{},
1707 };
1708 defer {
1709 for (ctx.objects.items) |*object| {
1710 gpa.free(object.name);
1711 }
1712 ctx.objects.deinit(gpa);
1713 }
1714
1715 while (true) {
1716 if (reader.seek >= ctx.data.len) break;
1717 if (!mem.isAligned(reader.seek, 2)) reader.seek += 1;
1718
1719 const hdr = try reader.takeStruct(elf.ar_hdr, .little);
1720
1721 if (!mem.eql(u8, &hdr.ar_fmag, elf.ARFMAG)) return error.InvalidArchiveHeaderMagicNumber;
1722
1723 const size = try hdr.size();
1724 defer reader.seek += size;
1725
1726 if (hdr.isSymtab()) {
1727 try ctx.parseSymtab(ctx.data[reader.seek..][0..size], .p32);
1728 continue;
1729 }
1730 if (hdr.isSymtab64()) {
1731 try ctx.parseSymtab(ctx.data[reader.seek..][0..size], .p64);
1732 continue;
1733 }
1734 if (hdr.isStrtab()) {
1735 ctx.strtab = ctx.data[reader.seek..][0..size];
1736 continue;
1737 }
1738 if (hdr.isSymdef() or hdr.isSymdefSorted()) continue;
1739
1740 const name = if (hdr.name()) |name|
1741 try gpa.dupe(u8, name)
1742 else if (try hdr.nameOffset()) |off|
1743 try gpa.dupe(u8, ctx.getString(off))
1744 else
1745 unreachable;
1746
1747 try ctx.objects.append(gpa, .{ .name = name, .off = reader.seek, .len = size });
1748 }
1749
1750 var output: std.Io.Writer.Allocating = .init(gpa);
1751 defer output.deinit();
1752 const writer = &output.writer;
1753
1754 switch (check.kind) {
1755 .archive_symtab => if (ctx.symtab.items.len > 0) {
1756 try ctx.dumpSymtab(writer);
1757 } else return step.fail("no archive symbol table found", .{}),
1758
1759 else => if (ctx.objects.items.len > 0) {
1760 try ctx.dumpObjects(step, check, writer);
1761 } else return step.fail("empty archive", .{}),
1762 }
1763
1764 return output.toOwnedSlice();
1765 }
1766
1767 const ArchiveContext = struct {
1768 gpa: Allocator,
1769 data: []const u8,
1770 symtab: std.ArrayList(ArSymtabEntry) = .empty,
1771 strtab: []const u8,
1772 objects: std.ArrayList(struct { name: []const u8, off: usize, len: usize }) = .empty,
1773
1774 fn parseSymtab(ctx: *ArchiveContext, raw: []const u8, ptr_width: enum { p32, p64 }) !void {
1775 var reader: std.Io.Reader = .fixed(raw);
1776 const num = switch (ptr_width) {
1777 .p32 => try reader.takeInt(u32, .big),
1778 .p64 => try reader.takeInt(u64, .big),
1779 };
1780 const ptr_size: usize = switch (ptr_width) {
1781 .p32 => @sizeOf(u32),
1782 .p64 => @sizeOf(u64),
1783 };
1784 const strtab_off = (num + 1) * ptr_size;
1785 const strtab_len = raw.len - strtab_off;
1786 const strtab = raw[strtab_off..][0..strtab_len];
1787
1788 try ctx.symtab.ensureTotalCapacityPrecise(ctx.gpa, num);
1789
1790 var stroff: usize = 0;
1791 for (0..num) |_| {
1792 const off = switch (ptr_width) {
1793 .p32 => try reader.takeInt(u32, .big),
1794 .p64 => try reader.takeInt(u64, .big),
1795 };
1796 const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + stroff)), 0);
1797 stroff += name.len + 1;
1798 ctx.symtab.appendAssumeCapacity(.{ .off = off, .name = name });
1799 }
1800 }
1801
1802 fn dumpSymtab(ctx: ArchiveContext, writer: anytype) !void {
1803 var files = std.AutoHashMap(usize, []const u8).init(ctx.gpa);
1804 defer files.deinit();
1805 try files.ensureUnusedCapacity(@intCast(ctx.objects.items.len));
1806
1807 for (ctx.objects.items) |object| {
1808 files.putAssumeCapacityNoClobber(object.off - @sizeOf(elf.ar_hdr), object.name);
1809 }
1810
1811 var symbols = std.AutoArrayHashMap(usize, std.array_list.Managed([]const u8)).init(ctx.gpa);
1812 defer {
1813 for (symbols.values()) |*value| {
1814 value.deinit();
1815 }
1816 symbols.deinit();
1817 }
1818
1819 for (ctx.symtab.items) |entry| {
1820 const gop = try symbols.getOrPut(@intCast(entry.off));
1821 if (!gop.found_existing) {
1822 gop.value_ptr.* = std.array_list.Managed([]const u8).init(ctx.gpa);
1823 }
1824 try gop.value_ptr.append(entry.name);
1825 }
1826
1827 try writer.print("{s}\n", .{archive_symtab_label});
1828 for (symbols.keys(), symbols.values()) |off, values| {
1829 try writer.print("in object {s}\n", .{files.get(off).?});
1830 for (values.items) |value| {
1831 try writer.print("{s}\n", .{value});
1832 }
1833 }
1834 }
1835
1836 fn dumpObjects(ctx: ArchiveContext, step: *Step, check: Check, writer: anytype) !void {
1837 for (ctx.objects.items) |object| {
1838 try writer.print("object {s}\n", .{object.name});
1839 const output = try parseAndDumpObject(step, check, ctx.data[object.off..][0..object.len]);
1840 defer ctx.gpa.free(output);
1841 try writer.print("{s}\n", .{output});
1842 }
1843 }
1844
1845 fn getString(ctx: ArchiveContext, off: u32) []const u8 {
1846 assert(off < ctx.strtab.len);
1847 const name = mem.sliceTo(@as([*:'\n']const u8, @ptrCast(ctx.strtab.ptr + off)), 0);
1848 return name[0 .. name.len - 1];
1849 }
1850
1851 const ArSymtabEntry = struct {
1852 name: [:0]const u8,
1853 off: u64,
1854 };
1855 };
1856
1857 fn parseAndDumpObject(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
1858 const gpa = step.owner.allocator;
1859
1860 // `std.elf.Header` takes care of endianness issues for us.
1861 var reader: std.Io.Reader = .fixed(bytes);
1862 const hdr = try elf.Header.read(&reader);
1863
1864 var shdrs = try gpa.alloc(elf.Elf64_Shdr, hdr.shnum);
1865 defer gpa.free(shdrs);
1866 {
1867 var shdr_it = hdr.iterateSectionHeadersBuffer(bytes);
1868 var shdr_i: usize = 0;
1869 while (try shdr_it.next()) |shdr| : (shdr_i += 1) shdrs[shdr_i] = shdr;
1870 }
1871
1872 var phdrs = try gpa.alloc(elf.Elf64_Phdr, hdr.shnum);
1873 defer gpa.free(phdrs);
1874 {
1875 var phdr_it = hdr.iterateProgramHeadersBuffer(bytes);
1876 var phdr_i: usize = 0;
1877 while (try phdr_it.next()) |phdr| : (phdr_i += 1) phdrs[phdr_i] = phdr;
1878 }
1879
1880 var ctx = ObjectContext{
1881 .gpa = gpa,
1882 .data = bytes,
1883 .hdr = hdr,
1884 .shdrs = shdrs,
1885 .phdrs = phdrs,
1886 .shstrtab = undefined,
1887 };
1888 ctx.shstrtab = ctx.getSectionContents(ctx.hdr.shstrndx);
1889
1890 defer gpa.free(ctx.symtab.symbols);
1891 defer gpa.free(ctx.dysymtab.symbols);
1892 defer gpa.free(ctx.dyns);
1893
1894 for (ctx.shdrs, 0..) |shdr, i| switch (shdr.sh_type) {
1895 elf.SHT_SYMTAB, elf.SHT_DYNSYM => {
1896 const raw = ctx.getSectionContents(i);
1897 const nsyms = @divExact(raw.len, @sizeOf(elf.Elf64_Sym));
1898 const symbols = try gpa.alloc(elf.Elf64_Sym, nsyms);
1899
1900 var r: std.Io.Reader = .fixed(raw);
1901 for (0..nsyms) |si| symbols[si] = r.takeStruct(elf.Elf64_Sym, ctx.hdr.endian) catch unreachable;
1902
1903 const strings = ctx.getSectionContents(shdr.sh_link);
1904
1905 switch (shdr.sh_type) {
1906 elf.SHT_SYMTAB => {
1907 ctx.symtab = .{
1908 .symbols = symbols,
1909 .strings = strings,
1910 };
1911 },
1912 elf.SHT_DYNSYM => {
1913 ctx.dysymtab = .{
1914 .symbols = symbols,
1915 .strings = strings,
1916 };
1917 },
1918 else => unreachable,
1919 }
1920 },
1921 elf.SHT_DYNAMIC => {
1922 const raw = ctx.getSectionContents(i);
1923 const ndyns = @divExact(raw.len, @sizeOf(elf.Elf64_Dyn));
1924 const dyns = try gpa.alloc(elf.Elf64_Dyn, ndyns);
1925
1926 var r: std.Io.Reader = .fixed(raw);
1927 for (0..ndyns) |si| dyns[si] = r.takeStruct(elf.Elf64_Dyn, ctx.hdr.endian) catch unreachable;
1928
1929 ctx.dyns = dyns;
1930 ctx.dyns_strings = ctx.getSectionContents(shdr.sh_link);
1931 },
1932
1933 else => {},
1934 };
1935
1936 var output: std.Io.Writer.Allocating = .init(gpa);
1937 defer output.deinit();
1938 const writer = &output.writer;
1939
1940 switch (check.kind) {
1941 .headers => {
1942 try ctx.dumpHeader(writer);
1943 try ctx.dumpShdrs(writer);
1944 try ctx.dumpPhdrs(writer);
1945 },
1946
1947 .symtab => if (ctx.symtab.symbols.len > 0) {
1948 try ctx.dumpSymtab(.symtab, writer);
1949 } else return step.fail("no symbol table found", .{}),
1950
1951 .dynamic_symtab => if (ctx.dysymtab.symbols.len > 0) {
1952 try ctx.dumpSymtab(.dysymtab, writer);
1953 } else return step.fail("no dynamic symbol table found", .{}),
1954
1955 .dynamic_section => if (ctx.dyns.len > 0) {
1956 try ctx.dumpDynamicSection(writer);
1957 } else return step.fail("no dynamic section found", .{}),
1958
1959 .dump_section => {
1960 const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(check.data.items.ptr + check.payload.dump_section)), 0);
1961 const shndx = ctx.getSectionByName(name) orelse return step.fail("no '{s}' section found", .{name});
1962 try ctx.dumpSection(shndx, writer);
1963 },
1964
1965 else => return step.fail("invalid check kind for ELF file format: {s}", .{@tagName(check.kind)}),
1966 }
1967
1968 return output.toOwnedSlice();
1969 }
1970
1971 const ObjectContext = struct {
1972 gpa: Allocator,
1973 data: []const u8,
1974 hdr: elf.Header,
1975 shdrs: []const elf.Elf64_Shdr,
1976 phdrs: []const elf.Elf64_Phdr,
1977 shstrtab: []const u8,
1978 symtab: Symtab = .{},
1979 dysymtab: Symtab = .{},
1980 dyns: []const elf.Elf64_Dyn = &.{},
1981 dyns_strings: []const u8 = &.{},
1982
1983 fn dumpHeader(ctx: ObjectContext, writer: anytype) !void {
1984 try writer.writeAll("header\n");
1985 try writer.print("type {s}\n", .{@tagName(ctx.hdr.type)});
1986 try writer.print("entry {x}\n", .{ctx.hdr.entry});
1987 }
1988
1989 fn dumpPhdrs(ctx: ObjectContext, writer: anytype) !void {
1990 if (ctx.phdrs.len == 0) return;
1991
1992 try writer.writeAll("program headers\n");
1993
1994 for (ctx.phdrs, 0..) |phdr, phndx| {
1995 try writer.print("phdr {d}\n", .{phndx});
1996 try writer.print("type {f}\n", .{fmtPhType(phdr.p_type)});
1997 try writer.print("vaddr {x}\n", .{phdr.p_vaddr});
1998 try writer.print("paddr {x}\n", .{phdr.p_paddr});
1999 try writer.print("offset {x}\n", .{phdr.p_offset});
2000 try writer.print("memsz {x}\n", .{phdr.p_memsz});
2001 try writer.print("filesz {x}\n", .{phdr.p_filesz});
2002 try writer.print("align {x}\n", .{phdr.p_align});
2003
2004 {
2005 const flags = phdr.p_flags;
2006 try writer.writeAll("flags");
2007 if (flags > 0) try writer.writeByte(' ');
2008 if (flags & elf.PF_R != 0) {
2009 try writer.writeByte('R');
2010 }
2011 if (flags & elf.PF_W != 0) {
2012 try writer.writeByte('W');
2013 }
2014 if (flags & elf.PF_X != 0) {
2015 try writer.writeByte('E');
2016 }
2017 if (flags & elf.PF_MASKOS != 0) {
2018 try writer.writeAll("OS");
2019 }
2020 if (flags & elf.PF_MASKPROC != 0) {
2021 try writer.writeAll("PROC");
2022 }
2023 try writer.writeByte('\n');
2024 }
2025 }
2026 }
2027
2028 fn dumpShdrs(ctx: ObjectContext, writer: anytype) !void {
2029 if (ctx.shdrs.len == 0) return;
2030
2031 try writer.writeAll("section headers\n");
2032
2033 for (ctx.shdrs, 0..) |shdr, shndx| {
2034 try writer.print("shdr {d}\n", .{shndx});
2035 try writer.print("name {s}\n", .{ctx.getSectionName(shndx)});
2036 try writer.print("type {f}\n", .{fmtShType(shdr.sh_type)});
2037 try writer.print("addr {x}\n", .{shdr.sh_addr});
2038 try writer.print("offset {x}\n", .{shdr.sh_offset});
2039 try writer.print("size {x}\n", .{shdr.sh_size});
2040 try writer.print("addralign {x}\n", .{shdr.sh_addralign});
2041 // TODO dump formatted sh_flags
2042 }
2043 }
2044
2045 fn dumpDynamicSection(ctx: ObjectContext, writer: anytype) !void {
2046 try writer.writeAll(ElfDumper.dynamic_section_label ++ "\n");
2047
2048 for (ctx.dyns) |entry| {
2049 const key = @as(u64, @bitCast(entry.d_tag));
2050 const value = entry.d_val;
2051
2052 const key_str = switch (key) {
2053 elf.DT_NEEDED => "NEEDED",
2054 elf.DT_SONAME => "SONAME",
2055 elf.DT_INIT_ARRAY => "INIT_ARRAY",
2056 elf.DT_INIT_ARRAYSZ => "INIT_ARRAYSZ",
2057 elf.DT_FINI_ARRAY => "FINI_ARRAY",
2058 elf.DT_FINI_ARRAYSZ => "FINI_ARRAYSZ",
2059 elf.DT_HASH => "HASH",
2060 elf.DT_GNU_HASH => "GNU_HASH",
2061 elf.DT_STRTAB => "STRTAB",
2062 elf.DT_SYMTAB => "SYMTAB",
2063 elf.DT_STRSZ => "STRSZ",
2064 elf.DT_SYMENT => "SYMENT",
2065 elf.DT_PLTGOT => "PLTGOT",
2066 elf.DT_PLTRELSZ => "PLTRELSZ",
2067 elf.DT_PLTREL => "PLTREL",
2068 elf.DT_JMPREL => "JMPREL",
2069 elf.DT_RELA => "RELA",
2070 elf.DT_RELASZ => "RELASZ",
2071 elf.DT_RELAENT => "RELAENT",
2072 elf.DT_VERDEF => "VERDEF",
2073 elf.DT_VERDEFNUM => "VERDEFNUM",
2074 elf.DT_FLAGS => "FLAGS",
2075 elf.DT_FLAGS_1 => "FLAGS_1",
2076 elf.DT_VERNEED => "VERNEED",
2077 elf.DT_VERNEEDNUM => "VERNEEDNUM",
2078 elf.DT_VERSYM => "VERSYM",
2079 elf.DT_RELACOUNT => "RELACOUNT",
2080 elf.DT_RPATH => "RPATH",
2081 elf.DT_RUNPATH => "RUNPATH",
2082 elf.DT_INIT => "INIT",
2083 elf.DT_FINI => "FINI",
2084 elf.DT_NULL => "NULL",
2085 else => "UNKNOWN",
2086 };
2087 try writer.print("{s}", .{key_str});
2088
2089 switch (key) {
2090 elf.DT_NEEDED,
2091 elf.DT_SONAME,
2092 elf.DT_RPATH,
2093 elf.DT_RUNPATH,
2094 => {
2095 const name = getString(ctx.dyns_strings, @intCast(value));
2096 try writer.print(" {s}", .{name});
2097 },
2098
2099 elf.DT_INIT_ARRAY,
2100 elf.DT_FINI_ARRAY,
2101 elf.DT_HASH,
2102 elf.DT_GNU_HASH,
2103 elf.DT_STRTAB,
2104 elf.DT_SYMTAB,
2105 elf.DT_PLTGOT,
2106 elf.DT_JMPREL,
2107 elf.DT_RELA,
2108 elf.DT_VERDEF,
2109 elf.DT_VERNEED,
2110 elf.DT_VERSYM,
2111 elf.DT_INIT,
2112 elf.DT_FINI,
2113 elf.DT_NULL,
2114 => try writer.print(" {x}", .{value}),
2115
2116 elf.DT_INIT_ARRAYSZ,
2117 elf.DT_FINI_ARRAYSZ,
2118 elf.DT_STRSZ,
2119 elf.DT_SYMENT,
2120 elf.DT_PLTRELSZ,
2121 elf.DT_RELASZ,
2122 elf.DT_RELAENT,
2123 elf.DT_RELACOUNT,
2124 => try writer.print(" {d}", .{value}),
2125
2126 elf.DT_PLTREL => try writer.writeAll(switch (value) {
2127 elf.DT_REL => " REL",
2128 elf.DT_RELA => " RELA",
2129 else => " UNKNOWN",
2130 }),
2131
2132 elf.DT_FLAGS => if (value > 0) {
2133 if (value & elf.DF_ORIGIN != 0) try writer.writeAll(" ORIGIN");
2134 if (value & elf.DF_SYMBOLIC != 0) try writer.writeAll(" SYMBOLIC");
2135 if (value & elf.DF_TEXTREL != 0) try writer.writeAll(" TEXTREL");
2136 if (value & elf.DF_BIND_NOW != 0) try writer.writeAll(" BIND_NOW");
2137 if (value & elf.DF_STATIC_TLS != 0) try writer.writeAll(" STATIC_TLS");
2138 },
2139
2140 elf.DT_FLAGS_1 => if (value > 0) {
2141 if (value & elf.DF_1_NOW != 0) try writer.writeAll(" NOW");
2142 if (value & elf.DF_1_GLOBAL != 0) try writer.writeAll(" GLOBAL");
2143 if (value & elf.DF_1_GROUP != 0) try writer.writeAll(" GROUP");
2144 if (value & elf.DF_1_NODELETE != 0) try writer.writeAll(" NODELETE");
2145 if (value & elf.DF_1_LOADFLTR != 0) try writer.writeAll(" LOADFLTR");
2146 if (value & elf.DF_1_INITFIRST != 0) try writer.writeAll(" INITFIRST");
2147 if (value & elf.DF_1_NOOPEN != 0) try writer.writeAll(" NOOPEN");
2148 if (value & elf.DF_1_ORIGIN != 0) try writer.writeAll(" ORIGIN");
2149 if (value & elf.DF_1_DIRECT != 0) try writer.writeAll(" DIRECT");
2150 if (value & elf.DF_1_TRANS != 0) try writer.writeAll(" TRANS");
2151 if (value & elf.DF_1_INTERPOSE != 0) try writer.writeAll(" INTERPOSE");
2152 if (value & elf.DF_1_NODEFLIB != 0) try writer.writeAll(" NODEFLIB");
2153 if (value & elf.DF_1_NODUMP != 0) try writer.writeAll(" NODUMP");
2154 if (value & elf.DF_1_CONFALT != 0) try writer.writeAll(" CONFALT");
2155 if (value & elf.DF_1_ENDFILTEE != 0) try writer.writeAll(" ENDFILTEE");
2156 if (value & elf.DF_1_DISPRELDNE != 0) try writer.writeAll(" DISPRELDNE");
2157 if (value & elf.DF_1_DISPRELPND != 0) try writer.writeAll(" DISPRELPND");
2158 if (value & elf.DF_1_NODIRECT != 0) try writer.writeAll(" NODIRECT");
2159 if (value & elf.DF_1_IGNMULDEF != 0) try writer.writeAll(" IGNMULDEF");
2160 if (value & elf.DF_1_NOKSYMS != 0) try writer.writeAll(" NOKSYMS");
2161 if (value & elf.DF_1_NOHDR != 0) try writer.writeAll(" NOHDR");
2162 if (value & elf.DF_1_EDITED != 0) try writer.writeAll(" EDITED");
2163 if (value & elf.DF_1_NORELOC != 0) try writer.writeAll(" NORELOC");
2164 if (value & elf.DF_1_SYMINTPOSE != 0) try writer.writeAll(" SYMINTPOSE");
2165 if (value & elf.DF_1_GLOBAUDIT != 0) try writer.writeAll(" GLOBAUDIT");
2166 if (value & elf.DF_1_SINGLETON != 0) try writer.writeAll(" SINGLETON");
2167 if (value & elf.DF_1_STUB != 0) try writer.writeAll(" STUB");
2168 if (value & elf.DF_1_PIE != 0) try writer.writeAll(" PIE");
2169 },
2170
2171 else => try writer.print(" {x}", .{value}),
2172 }
2173 try writer.writeByte('\n');
2174 }
2175 }
2176
2177 fn dumpSymtab(ctx: ObjectContext, comptime @"type": enum { symtab, dysymtab }, writer: anytype) !void {
2178 const symtab = switch (@"type") {
2179 .symtab => ctx.symtab,
2180 .dysymtab => ctx.dysymtab,
2181 };
2182
2183 try writer.writeAll(switch (@"type") {
2184 .symtab => symtab_label,
2185 .dysymtab => dynamic_symtab_label,
2186 } ++ "\n");
2187
2188 for (symtab.symbols, 0..) |sym, index| {
2189 try writer.print("{x} {x}", .{ sym.st_value, sym.st_size });
2190
2191 {
2192 if (elf.SHN_LORESERVE <= sym.st_shndx and sym.st_shndx < elf.SHN_HIRESERVE) {
2193 if (elf.SHN_LOPROC <= sym.st_shndx and sym.st_shndx < elf.SHN_HIPROC) {
2194 try writer.print(" LO+{d}", .{sym.st_shndx - elf.SHN_LOPROC});
2195 } else {
2196 const sym_ndx = switch (sym.st_shndx) {
2197 elf.SHN_ABS => "ABS",
2198 elf.SHN_COMMON => "COM",
2199 elf.SHN_LIVEPATCH => "LIV",
2200 else => "UNK",
2201 };
2202 try writer.print(" {s}", .{sym_ndx});
2203 }
2204 } else if (sym.st_shndx == elf.SHN_UNDEF) {
2205 try writer.writeAll(" UND");
2206 } else {
2207 try writer.print(" {x}", .{sym.st_shndx});
2208 }
2209 }
2210
2211 blk: {
2212 const tt = sym.st_type();
2213 const sym_type = switch (tt) {
2214 elf.STT_NOTYPE => "NOTYPE",
2215 elf.STT_OBJECT => "OBJECT",
2216 elf.STT_FUNC => "FUNC",
2217 elf.STT_SECTION => "SECTION",
2218 elf.STT_FILE => "FILE",
2219 elf.STT_COMMON => "COMMON",
2220 elf.STT_TLS => "TLS",
2221 elf.STT_NUM => "NUM",
2222 elf.STT_GNU_IFUNC => "IFUNC",
2223 else => if (elf.STT_LOPROC <= tt and tt < elf.STT_HIPROC) {
2224 break :blk try writer.print(" LOPROC+{d}", .{tt - elf.STT_LOPROC});
2225 } else if (elf.STT_LOOS <= tt and tt < elf.STT_HIOS) {
2226 break :blk try writer.print(" LOOS+{d}", .{tt - elf.STT_LOOS});
2227 } else "UNK",
2228 };
2229 try writer.print(" {s}", .{sym_type});
2230 }
2231
2232 blk: {
2233 const bind = sym.st_bind();
2234 const sym_bind = switch (bind) {
2235 elf.STB_LOCAL => "LOCAL",
2236 elf.STB_GLOBAL => "GLOBAL",
2237 elf.STB_WEAK => "WEAK",
2238 elf.STB_NUM => "NUM",
2239 else => if (elf.STB_LOPROC <= bind and bind < elf.STB_HIPROC) {
2240 break :blk try writer.print(" LOPROC+{d}", .{bind - elf.STB_LOPROC});
2241 } else if (elf.STB_LOOS <= bind and bind < elf.STB_HIOS) {
2242 break :blk try writer.print(" LOOS+{d}", .{bind - elf.STB_LOOS});
2243 } else "UNKNOWN",
2244 };
2245 try writer.print(" {s}", .{sym_bind});
2246 }
2247
2248 const sym_vis = @as(elf.STV, @enumFromInt(@as(u3, @truncate(sym.st_other))));
2249 try writer.print(" {s}", .{@tagName(sym_vis)});
2250
2251 const sym_name = switch (sym.st_type()) {
2252 elf.STT_SECTION => ctx.getSectionName(sym.st_shndx),
2253 else => symtab.getName(index).?,
2254 };
2255 try writer.print(" {s}\n", .{sym_name});
2256 }
2257 }
2258
2259 fn dumpSection(ctx: ObjectContext, shndx: usize, writer: anytype) !void {
2260 const data = ctx.getSectionContents(shndx);
2261 try writer.print("{s}", .{data});
2262 }
2263
2264 inline fn getSectionName(ctx: ObjectContext, shndx: usize) []const u8 {
2265 const shdr = ctx.shdrs[shndx];
2266 return getString(ctx.shstrtab, shdr.sh_name);
2267 }
2268
2269 fn getSectionContents(ctx: ObjectContext, shndx: usize) []const u8 {
2270 const shdr = ctx.shdrs[shndx];
2271 assert(shdr.sh_offset < ctx.data.len);
2272 assert(shdr.sh_offset + shdr.sh_size <= ctx.data.len);
2273 return ctx.data[shdr.sh_offset..][0..shdr.sh_size];
2274 }
2275
2276 fn getSectionByName(ctx: ObjectContext, name: []const u8) ?usize {
2277 for (0..ctx.shdrs.len) |shndx| {
2278 if (mem.eql(u8, ctx.getSectionName(shndx), name)) return shndx;
2279 } else return null;
2280 }
2281 };
2282
2283 const Symtab = struct {
2284 symbols: []const elf.Elf64_Sym = &.{},
2285 strings: []const u8 = &.{},
2286
2287 fn get(st: Symtab, index: usize) ?elf.Elf64_Sym {
2288 if (index >= st.symbols.len) return null;
2289 return st.symbols[index];
2290 }
2291
2292 fn getName(st: Symtab, index: usize) ?[]const u8 {
2293 const sym = st.get(index) orelse return null;
2294 return getString(st.strings, sym.st_name);
2295 }
2296 };
2297
2298 fn getString(strtab: []const u8, off: u32) []const u8 {
2299 assert(off < strtab.len);
2300 return mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + off)), 0);
2301 }
2302
2303 fn fmtShType(sh_type: u32) std.fmt.Alt(u32, formatShType) {
2304 return .{ .data = sh_type };
2305 }
2306
2307 fn formatShType(sh_type: u32, writer: *Writer) Writer.Error!void {
2308 const name = switch (sh_type) {
2309 elf.SHT_NULL => "NULL",
2310 elf.SHT_PROGBITS => "PROGBITS",
2311 elf.SHT_SYMTAB => "SYMTAB",
2312 elf.SHT_STRTAB => "STRTAB",
2313 elf.SHT_RELA => "RELA",
2314 elf.SHT_HASH => "HASH",
2315 elf.SHT_DYNAMIC => "DYNAMIC",
2316 elf.SHT_NOTE => "NOTE",
2317 elf.SHT_NOBITS => "NOBITS",
2318 elf.SHT_REL => "REL",
2319 elf.SHT_SHLIB => "SHLIB",
2320 elf.SHT_DYNSYM => "DYNSYM",
2321 elf.SHT_INIT_ARRAY => "INIT_ARRAY",
2322 elf.SHT_FINI_ARRAY => "FINI_ARRAY",
2323 elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY",
2324 elf.SHT_GROUP => "GROUP",
2325 elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX",
2326 elf.SHT_X86_64_UNWIND => "X86_64_UNWIND",
2327 elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG",
2328 elf.SHT_GNU_HASH => "GNU_HASH",
2329 elf.SHT_GNU_VERDEF => "VERDEF",
2330 elf.SHT_GNU_VERNEED => "VERNEED",
2331 elf.SHT_GNU_VERSYM => "VERSYM",
2332 else => if (elf.SHT_LOOS <= sh_type and sh_type < elf.SHT_HIOS) {
2333 return try writer.print("LOOS+0x{x}", .{sh_type - elf.SHT_LOOS});
2334 } else if (elf.SHT_LOPROC <= sh_type and sh_type < elf.SHT_HIPROC) {
2335 return try writer.print("LOPROC+0x{x}", .{sh_type - elf.SHT_LOPROC});
2336 } else if (elf.SHT_LOUSER <= sh_type and sh_type < elf.SHT_HIUSER) {
2337 return try writer.print("LOUSER+0x{x}", .{sh_type - elf.SHT_LOUSER});
2338 } else "UNKNOWN",
2339 };
2340 try writer.writeAll(name);
2341 }
2342
2343 fn fmtPhType(ph_type: u32) std.fmt.Alt(u32, formatPhType) {
2344 return .{ .data = ph_type };
2345 }
2346
2347 fn formatPhType(ph_type: u32, writer: *Writer) Writer.Error!void {
2348 const p_type = switch (ph_type) {
2349 elf.PT_NULL => "NULL",
2350 elf.PT_LOAD => "LOAD",
2351 elf.PT_DYNAMIC => "DYNAMIC",
2352 elf.PT_INTERP => "INTERP",
2353 elf.PT_NOTE => "NOTE",
2354 elf.PT_SHLIB => "SHLIB",
2355 elf.PT_PHDR => "PHDR",
2356 elf.PT_TLS => "TLS",
2357 elf.PT_NUM => "NUM",
2358 elf.PT_GNU_EH_FRAME => "GNU_EH_FRAME",
2359 elf.PT_GNU_STACK => "GNU_STACK",
2360 elf.PT_GNU_RELRO => "GNU_RELRO",
2361 else => if (elf.PT_LOOS <= ph_type and ph_type < elf.PT_HIOS) {
2362 return try writer.print("LOOS+0x{x}", .{ph_type - elf.PT_LOOS});
2363 } else if (elf.PT_LOPROC <= ph_type and ph_type < elf.PT_HIPROC) {
2364 return try writer.print("LOPROC+0x{x}", .{ph_type - elf.PT_LOPROC});
2365 } else "UNKNOWN",
2366 };
2367 try writer.writeAll(p_type);
2368 }
2369};
2370
2371const WasmDumper = struct {
2372 const symtab_label = "symbols";
2373
2374 fn parseAndDump(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
2375 const gpa = step.owner.allocator;
2376 var reader: std.Io.Reader = .fixed(bytes);
2377
2378 const buf = try reader.takeArray(8);
2379 if (!mem.eql(u8, buf[0..4], &std.wasm.magic)) {
2380 return error.InvalidMagicByte;
2381 }
2382 if (!mem.eql(u8, buf[4..], &std.wasm.version)) {
2383 return error.UnsupportedWasmVersion;
2384 }
2385
2386 var output: std.Io.Writer.Allocating = .init(gpa);
2387 defer output.deinit();
2388 parseAndDumpInner(step, check, bytes, &reader, &output.writer) catch |err| switch (err) {
2389 error.EndOfStream => try output.writer.writeAll("\n<UnexpectedEndOfStream>"),
2390 else => |e| return e,
2391 };
2392 return output.toOwnedSlice();
2393 }
2394
2395 fn parseAndDumpInner(
2396 step: *Step,
2397 check: Check,
2398 bytes: []const u8,
2399 reader: *std.Io.Reader,
2400 writer: *std.Io.Writer,
2401 ) !void {
2402 switch (check.kind) {
2403 .headers => {
2404 while (reader.takeByte()) |current_byte| {
2405 const section = std.enums.fromInt(std.wasm.Section, current_byte) orelse {
2406 return step.fail("Found invalid section id '{d}'", .{current_byte});
2407 };
2408
2409 const section_length = try reader.takeLeb128(u32);
2410 try parseAndDumpSection(step, section, bytes[reader.seek..][0..section_length], writer);
2411 reader.seek += section_length;
2412 } else |_| {} // reached end of stream
2413 },
2414
2415 else => return step.fail("invalid check kind for Wasm file format: {s}", .{@tagName(check.kind)}),
2416 }
2417 }
2418
2419 fn parseAndDumpSection(
2420 step: *Step,
2421 section: std.wasm.Section,
2422 data: []const u8,
2423 writer: *std.Io.Writer,
2424 ) !void {
2425 var reader: std.Io.Reader = .fixed(data);
2426
2427 try writer.print(
2428 \\Section {s}
2429 \\size {d}
2430 , .{ @tagName(section), data.len });
2431
2432 switch (section) {
2433 .type,
2434 .import,
2435 .function,
2436 .table,
2437 .memory,
2438 .global,
2439 .@"export",
2440 .element,
2441 .code,
2442 .data,
2443 => {
2444 const entries = try reader.takeLeb128(u32);
2445 try writer.print("\nentries {d}\n", .{entries});
2446 try parseSection(step, section, data[reader.seek..], entries, writer);
2447 },
2448 .custom => {
2449 const name_length = try reader.takeLeb128(u32);
2450 const name = data[reader.seek..][0..name_length];
2451 reader.seek += name_length;
2452 try writer.print("\nname {s}\n", .{name});
2453
2454 if (mem.eql(u8, name, "name")) {
2455 try parseDumpNames(step, &reader, writer, data);
2456 } else if (mem.eql(u8, name, "producers")) {
2457 try parseDumpProducers(&reader, writer, data);
2458 } else if (mem.eql(u8, name, "target_features")) {
2459 try parseDumpFeatures(&reader, writer, data);
2460 }
2461 // TODO: Implement parsing and dumping other custom sections (such as relocations)
2462 },
2463 .start => {
2464 const start = try reader.takeLeb128(u32);
2465 try writer.print("\nstart {d}\n", .{start});
2466 },
2467 .data_count => {
2468 const count = try reader.takeLeb128(u32);
2469 try writer.print("\ncount {d}\n", .{count});
2470 },
2471 else => {}, // skip unknown sections
2472 }
2473 }
2474
2475 fn parseSection(step: *Step, section: std.wasm.Section, data: []const u8, entries: u32, writer: anytype) !void {
2476 var reader: std.Io.Reader = .fixed(data);
2477
2478 switch (section) {
2479 .type => {
2480 var i: u32 = 0;
2481 while (i < entries) : (i += 1) {
2482 const func_type = try reader.takeByte();
2483 if (func_type != std.wasm.function_type) {
2484 return step.fail("expected function type, found byte '{d}'", .{func_type});
2485 }
2486 const params = try reader.takeLeb128(u32);
2487 try writer.print("params {d}\n", .{params});
2488 var index: u32 = 0;
2489 while (index < params) : (index += 1) {
2490 _ = try parseDumpType(step, std.wasm.Valtype, &reader, writer);
2491 } else index = 0;
2492 const returns = try reader.takeLeb128(u32);
2493 try writer.print("returns {d}\n", .{returns});
2494 while (index < returns) : (index += 1) {
2495 _ = try parseDumpType(step, std.wasm.Valtype, &reader, writer);
2496 }
2497 }
2498 },
2499 .import => {
2500 var i: u32 = 0;
2501 while (i < entries) : (i += 1) {
2502 const module_name_len = try reader.takeLeb128(u32);
2503 const module_name = data[reader.seek..][0..module_name_len];
2504 reader.seek += module_name_len;
2505 const name_len = try reader.takeLeb128(u32);
2506 const name = data[reader.seek..][0..name_len];
2507 reader.seek += name_len;
2508
2509 const kind = std.enums.fromInt(std.wasm.ExternalKind, try reader.takeByte()) orelse {
2510 return step.fail("invalid import kind", .{});
2511 };
2512
2513 try writer.print(
2514 \\module {s}
2515 \\name {s}
2516 \\kind {s}
2517 , .{ module_name, name, @tagName(kind) });
2518 try writer.writeByte('\n');
2519 switch (kind) {
2520 .function => {
2521 try writer.print("index {d}\n", .{try reader.takeLeb128(u32)});
2522 },
2523 .memory => {
2524 try parseDumpLimits(&reader, writer);
2525 },
2526 .global => {
2527 _ = try parseDumpType(step, std.wasm.Valtype, &reader, writer);
2528 try writer.print("mutable {}\n", .{0x01 == try reader.takeLeb128(u32)});
2529 },
2530 .table => {
2531 _ = try parseDumpType(step, std.wasm.RefType, &reader, writer);
2532 try parseDumpLimits(&reader, writer);
2533 },
2534 }
2535 }
2536 },
2537 .function => {
2538 var i: u32 = 0;
2539 while (i < entries) : (i += 1) {
2540 try writer.print("index {d}\n", .{try reader.takeLeb128(u32)});
2541 }
2542 },
2543 .table => {
2544 var i: u32 = 0;
2545 while (i < entries) : (i += 1) {
2546 _ = try parseDumpType(step, std.wasm.RefType, &reader, writer);
2547 try parseDumpLimits(&reader, writer);
2548 }
2549 },
2550 .memory => {
2551 var i: u32 = 0;
2552 while (i < entries) : (i += 1) {
2553 try parseDumpLimits(&reader, writer);
2554 }
2555 },
2556 .global => {
2557 var i: u32 = 0;
2558 while (i < entries) : (i += 1) {
2559 _ = try parseDumpType(step, std.wasm.Valtype, &reader, writer);
2560 try writer.print("mutable {}\n", .{0x01 == try reader.takeLeb128(u1)});
2561 try parseDumpInit(step, &reader, writer);
2562 }
2563 },
2564 .@"export" => {
2565 var i: u32 = 0;
2566 while (i < entries) : (i += 1) {
2567 const name_len = try reader.takeLeb128(u32);
2568 const name = data[reader.seek..][0..name_len];
2569 reader.seek += name_len;
2570 const kind_byte = try reader.takeLeb128(u8);
2571 const kind = std.enums.fromInt(std.wasm.ExternalKind, kind_byte) orelse {
2572 return step.fail("invalid export kind value '{d}'", .{kind_byte});
2573 };
2574 const index = try reader.takeLeb128(u32);
2575 try writer.print(
2576 \\name {s}
2577 \\kind {s}
2578 \\index {d}
2579 , .{ name, @tagName(kind), index });
2580 try writer.writeByte('\n');
2581 }
2582 },
2583 .element => {
2584 var i: u32 = 0;
2585 while (i < entries) : (i += 1) {
2586 try writer.print("table index {d}\n", .{try reader.takeLeb128(u32)});
2587 try parseDumpInit(step, &reader, writer);
2588
2589 const function_indexes = try reader.takeLeb128(u32);
2590 var function_index: u32 = 0;
2591 try writer.print("indexes {d}\n", .{function_indexes});
2592 while (function_index < function_indexes) : (function_index += 1) {
2593 try writer.print("index {d}\n", .{try reader.takeLeb128(u32)});
2594 }
2595 }
2596 },
2597 .code => {}, // code section is considered opaque to linker
2598 .data => {
2599 var i: u32 = 0;
2600 while (i < entries) : (i += 1) {
2601 const flags = try reader.takeLeb128(u32);
2602 const index = if (flags & 0x02 != 0)
2603 try reader.takeLeb128(u32)
2604 else
2605 0;
2606 try writer.print("memory index 0x{x}\n", .{index});
2607 if (flags == 0) {
2608 try parseDumpInit(step, &reader, writer);
2609 }
2610
2611 const size = try reader.takeLeb128(u32);
2612 try writer.print("size {d}\n", .{size});
2613 try reader.discardAll(size); // we do not care about the content of the segments
2614 }
2615 },
2616 else => unreachable,
2617 }
2618 }
2619
2620 fn parseDumpType(step: *Step, comptime E: type, reader: *std.Io.Reader, writer: *std.Io.Writer) !E {
2621 const byte = try reader.takeByte();
2622 const tag = std.enums.fromInt(E, byte) orelse {
2623 return step.fail("invalid wasm type value '{d}'", .{byte});
2624 };
2625 try writer.print("type {s}\n", .{@tagName(tag)});
2626 return tag;
2627 }
2628
2629 fn parseDumpLimits(reader: anytype, writer: anytype) !void {
2630 const flags = try reader.takeLeb128(u8);
2631 const min = try reader.takeLeb128(u32);
2632
2633 try writer.print("min {x}\n", .{min});
2634 if (flags != 0) {
2635 try writer.print("max {x}\n", .{try reader.takeLeb128(u32)});
2636 }
2637 }
2638
2639 fn parseDumpInit(step: *Step, reader: *std.Io.Reader, writer: *std.Io.Writer) !void {
2640 const byte = try reader.takeByte();
2641 const opcode = std.enums.fromInt(std.wasm.Opcode, byte) orelse {
2642 return step.fail("invalid wasm opcode '{d}'", .{byte});
2643 };
2644 switch (opcode) {
2645 .i32_const => try writer.print("i32.const {x}\n", .{try reader.takeLeb128(i32)}),
2646 .i64_const => try writer.print("i64.const {x}\n", .{try reader.takeLeb128(i64)}),
2647 .f32_const => try writer.print("f32.const {x}\n", .{@as(f32, @bitCast(try reader.takeInt(u32, .little)))}),
2648 .f64_const => try writer.print("f64.const {x}\n", .{@as(f64, @bitCast(try reader.takeInt(u64, .little)))}),
2649 .global_get => try writer.print("global.get {x}\n", .{try reader.takeLeb128(u32)}),
2650 else => unreachable,
2651 }
2652 const end_opcode = try reader.takeLeb128(u8);
2653 if (end_opcode != @intFromEnum(std.wasm.Opcode.end)) {
2654 return step.fail("expected 'end' opcode in init expression", .{});
2655 }
2656 }
2657
2658 /// https://webassembly.github.io/spec/core/appendix/custom.html
2659 fn parseDumpNames(step: *Step, reader: *std.Io.Reader, writer: *std.Io.Writer, data: []const u8) !void {
2660 while (reader.seek < data.len) {
2661 switch (try parseDumpType(step, std.wasm.NameSubsection, reader, writer)) {
2662 // The module name subsection ... consists of a single name
2663 // that is assigned to the module itself.
2664 .module => {
2665 const size = try reader.takeLeb128(u32);
2666 const name_len = try reader.takeLeb128(u32);
2667 if (size != name_len + 1) return error.BadSubsectionSize;
2668 if (reader.seek + name_len > data.len) return error.UnexpectedEndOfStream;
2669 try writer.print("name {s}\n", .{data[reader.seek..][0..name_len]});
2670 reader.seek += name_len;
2671 },
2672
2673 // The function name subsection ... consists of a name map
2674 // assigning function names to function indices.
2675 .function, .global, .data_segment => {
2676 const size = try reader.takeLeb128(u32);
2677 const entries = try reader.takeLeb128(u32);
2678 try writer.print(
2679 \\size {d}
2680 \\names {d}
2681 \\
2682 , .{ size, entries });
2683 for (0..entries) |_| {
2684 const index = try reader.takeLeb128(u32);
2685 const name_len = try reader.takeLeb128(u32);
2686 if (reader.seek + name_len > data.len) return error.UnexpectedEndOfStream;
2687 const name = data[reader.seek..][0..name_len];
2688 reader.seek += name.len;
2689
2690 try writer.print(
2691 \\index {d}
2692 \\name {s}
2693 \\
2694 , .{ index, name });
2695 }
2696 },
2697
2698 // The local name subsection ... consists of an indirect name
2699 // map assigning local names to local indices grouped by
2700 // function indices.
2701 .local => {
2702 return step.fail("TODO implement parseDumpNames for local subsections", .{});
2703 },
2704
2705 else => |t| return step.fail("invalid subsection type: {s}", .{@tagName(t)}),
2706 }
2707 }
2708 }
2709
2710 fn parseDumpProducers(reader: *std.Io.Reader, writer: *std.Io.Writer, data: []const u8) !void {
2711 const field_count = try reader.takeLeb128(u32);
2712 try writer.print("fields {d}\n", .{field_count});
2713 var current_field: u32 = 0;
2714 while (current_field < field_count) : (current_field += 1) {
2715 const field_name_length = try reader.takeLeb128(u32);
2716 const field_name = data[reader.seek..][0..field_name_length];
2717 reader.seek += field_name_length;
2718
2719 const value_count = try reader.takeLeb128(u32);
2720 try writer.print(
2721 \\field_name {s}
2722 \\values {d}
2723 , .{ field_name, value_count });
2724 try writer.writeByte('\n');
2725 var current_value: u32 = 0;
2726 while (current_value < value_count) : (current_value += 1) {
2727 const value_length = try reader.takeLeb128(u32);
2728 const value = data[reader.seek..][0..value_length];
2729 reader.seek += value_length;
2730
2731 const version_length = try reader.takeLeb128(u32);
2732 const version = data[reader.seek..][0..version_length];
2733 reader.seek += version_length;
2734
2735 try writer.print(
2736 \\value_name {s}
2737 \\version {s}
2738 , .{ value, version });
2739 try writer.writeByte('\n');
2740 }
2741 }
2742 }
2743
2744 fn parseDumpFeatures(reader: *std.Io.Reader, writer: *std.Io.Writer, data: []const u8) !void {
2745 const feature_count = try reader.takeLeb128(u32);
2746 try writer.print("features {d}\n", .{feature_count});
2747
2748 var index: u32 = 0;
2749 while (index < feature_count) : (index += 1) {
2750 const prefix_byte = try reader.takeLeb128(u8);
2751 const name_length = try reader.takeLeb128(u32);
2752 const feature_name = data[reader.seek..][0..name_length];
2753 reader.seek += name_length;
2754
2755 try writer.print("{c} {s}\n", .{ prefix_byte, feature_name });
2756 }
2757 }
2758};