Commit 04d44db6cc

Andrew Kelley <andrew@ziglang.org>
2021-12-09 07:45:25
tools/gen_stubs: consolidate symbol properties into MultiSym
1 parent 808b923
Changed files (1)
tools/gen_stubs.zig
@@ -6,51 +6,143 @@
 //!
 //! ...each with 'lib/libc.so' inside of them.
 
+// TODO: pick the best index to put them into instead of at the end
+//       - e.g. find a common previous symbol and put it after that one
+//       - they definitely need to go into the correct section
+// TODO: emit MultiSyms to use the preprocessor
+
 const std = @import("std");
 const builtin = std.builtin;
 const mem = std.mem;
+const log = std.log;
 const elf = std.elf;
 const native_endian = @import("builtin").target.cpu.arch.endian();
 
+const arches: [6]std.Target.Cpu.Arch = blk: {
+    var result: [6]std.Target.Cpu.Arch = undefined;
+    for (.{ .riscv64, .mips, .i386, .x86_64, .powerpc, .powerpc64 }) |arch| {
+        result[archIndex(arch)] = arch;
+    }
+    break :blk result;
+};
+
+const MultiSym = struct {
+    size: [arches.len]u64,
+    present: [arches.len]bool,
+    section: u16,
+    ty: u4,
+    binding: u4,
+    visib: elf.STV,
+};
+
+const Parse = struct {
+    arena: mem.Allocator,
+    sym_table: *std.StringArrayHashMap(MultiSym),
+    sections: *std.StringArrayHashMap(void),
+    elf_bytes: []align(@alignOf(elf.Elf64_Ehdr)) u8,
+    header: elf.Header,
+    arch: std.Target.Cpu.Arch,
+};
+
 pub fn main() !void {
     var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
     defer arena_instance.deinit();
     const arena = arena_instance.allocator();
 
     const args = try std.process.argsAlloc(arena);
-    const libc_so_path = args[1];
-
-    // Read the ELF header.
-    const elf_bytes = try std.fs.cwd().readFileAllocOptions(
-        arena,
-        libc_so_path,
-        100 * 1024 * 1024,
-        1 * 1024 * 1024,
-        @alignOf(elf.Elf64_Ehdr),
-        null,
-    );
-    const header = try elf.Header.parse(elf_bytes[0..@sizeOf(elf.Elf64_Ehdr)]);
-
-    switch (header.is_64) {
-        true => switch (header.endian) {
-            .Big => return finishMain(arena, elf_bytes, header, true, .Big),
-            .Little => return finishMain(arena, elf_bytes, header, true, .Little),
-        },
-        false => switch (header.endian) {
-            .Big => return finishMain(arena, elf_bytes, header, false, .Big),
-            .Little => return finishMain(arena, elf_bytes, header, false, .Little),
-        },
+    const build_all_path = args[1];
+
+    var build_all_dir = try std.fs.cwd().openDir(build_all_path, .{});
+
+    for (arches) |arch| {
+        const libc_so_path = try std.fmt.allocPrint(arena, "{s}/lib/libc.so", .{@tagName(arch)});
+
+        // Read the ELF header.
+        const elf_bytes = try build_all_dir.readFileAllocOptions(
+            arena,
+            libc_so_path,
+            100 * 1024 * 1024,
+            1 * 1024 * 1024,
+            @alignOf(elf.Elf64_Ehdr),
+            null,
+        );
+        const header = try elf.Header.parse(elf_bytes[0..@sizeOf(elf.Elf64_Ehdr)]);
+
+        var sym_table = std.StringArrayHashMap(MultiSym).init(arena);
+        var sections = std.StringArrayHashMap(void).init(arena);
+
+        const parse: Parse = .{
+            .arena = arena,
+            .sym_table = &sym_table,
+            .sections = &sections,
+            .elf_bytes = elf_bytes,
+            .header = header,
+            .arch = arch,
+        };
+
+        switch (header.is_64) {
+            true => switch (header.endian) {
+                .Big => try parseElf(parse, true, .Big),
+                .Little => try parseElf(parse, true, .Little),
+            },
+            false => switch (header.endian) {
+                .Big => try parseElf(parse, false, .Big),
+                .Little => try parseElf(parse, false, .Little),
+            },
+        }
     }
+
+    const stdout = std.io.getStdOut().writer();
+    _ = stdout;
+
+    //var prev_section: u16 = 0;
+    //for (all_syms) |sym| {
+    //    const this_section = s(sym.st_shndx);
+    //    if (this_section != prev_section) {
+    //        prev_section = this_section;
+    //        const sh_name = mem.sliceTo(shstrtab[s(shdrs[this_section].sh_name)..], 0);
+    //        try stdout.print("{s}\n", .{sh_name});
+    //    }
+
+    //    switch (binding) {
+    //        elf.STB_GLOBAL => {
+    //            try stdout.print(".globl {s}\n", .{name});
+    //        },
+    //        elf.STB_WEAK => {
+    //            try stdout.print(".weak {s}\n", .{name});
+    //        },
+    //        else => unreachable,
+    //    }
+
+    //    switch (ty) {
+    //        elf.STT_NOTYPE => {},
+    //        elf.STT_FUNC => {
+    //            try stdout.print(".type {s}, %function;\n", .{name});
+    //            // omitting the size is OK for functions
+    //        },
+    //        elf.STT_OBJECT => {
+    //            try stdout.print(".type {s}, %object;\n", .{name});
+    //            if (size != 0) {
+    //                try stdout.print(".size {s}, {d}\n", .{ name, size });
+    //            }
+    //        },
+    //        else => unreachable,
+    //    }
+
+    //    switch (visib) {
+    //        .DEFAULT => {},
+    //        .PROTECTED => try stdout.print(".protected {s}\n", .{name}),
+    //        .INTERNAL, .HIDDEN => unreachable,
+    //    }
+
+    //    try stdout.print("{s}:\n", .{name});
+    //}
 }
 
-fn finishMain(
-    arena: mem.Allocator,
-    elf_bytes: []align(@alignOf(elf.Elf64_Ehdr)) u8,
-    header: elf.Header,
-    comptime is_64: bool,
-    comptime endian: builtin.Endian,
-) !void {
-    _ = arena;
+fn parseElf(parse: Parse, comptime is_64: bool, comptime endian: builtin.Endian) !void {
+    const arena = parse.arena;
+    const elf_bytes = parse.elf_bytes;
+    const header = parse.header;
     const Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym;
     const S = struct {
         fn endianSwap(x: anytype) @TypeOf(x) {
@@ -73,17 +165,26 @@ fn finishMain(
 
     // Obtain the section header string table.
     const shstrtab_offset = s(shdrs[header.shstrndx].sh_offset);
-    std.log.debug("shstrtab is at offset {d}", .{shstrtab_offset});
+    log.debug("shstrtab is at offset {d}", .{shstrtab_offset});
     const shstrtab = elf_bytes[shstrtab_offset..];
 
+    // Maps this ELF file's section header index to the multi arch section ArrayHashMap index.
+    const section_index_map = try arena.alloc(u16, shdrs.len);
+
     // Find the offset of the dynamic symbol table.
-    const dynsym_index = for (shdrs) |shdr, i| {
-        const sh_name = mem.sliceTo(shstrtab[s(shdr.sh_name)..], 0);
-        std.log.debug("found section: {s}", .{sh_name});
-        if (mem.eql(u8, sh_name, ".dynsym")) break @intCast(u16, i);
-    } else @panic("did not find the .dynsym section");
+    var dynsym_index: u16 = 0;
+    for (shdrs) |shdr, i| {
+        const sh_name = try arena.dupe(u8, mem.sliceTo(shstrtab[s(shdr.sh_name)..], 0));
+        log.debug("found section: {s}", .{sh_name});
+        if (mem.eql(u8, sh_name, ".dynsym")) {
+            dynsym_index = @intCast(u16, i);
+        }
+        const gop = try parse.sections.getOrPut(sh_name);
+        section_index_map[i] = @intCast(u16, gop.index);
+    }
+    if (dynsym_index == 0) @panic("did not find the .dynsym section");
 
-    std.log.debug("found .dynsym section at index {d}", .{dynsym_index});
+    log.debug("found .dynsym section at index {d}", .{dynsym_index});
 
     // Read the dynamic symbols into a list.
     const dyn_syms_off = s(shdrs[dynsym_index].sh_offset);
@@ -96,25 +197,25 @@ fn finishMain(
     // Sort the list by address, ascending.
     std.sort.sort(Sym, dyn_syms, {}, S.symbolAddrLessThan);
 
-    const stdout = std.io.getStdOut().writer();
-
-    var prev_section: u16 = 0;
     for (dyn_syms) |sym| {
-        const name = mem.sliceTo(dynstr[s(sym.st_name)..], 0);
+        const this_section = s(sym.st_shndx);
+        const name = try arena.dupe(u8, mem.sliceTo(dynstr[s(sym.st_name)..], 0));
         const ty = @truncate(u4, sym.st_info);
         const binding = @truncate(u4, sym.st_info >> 4);
         const visib = @intToEnum(elf.STV, @truncate(u2, sym.st_other));
         const size = s(sym.st_size);
 
         if (size == 0) {
-            std.log.warn("symbol '{s}' has size 0", .{name});
+            log.warn("{s}: symbol '{s}' has size 0", .{ @tagName(parse.arch), name });
             continue;
         }
 
         switch (binding) {
             elf.STB_GLOBAL, elf.STB_WEAK => {},
             else => {
-                std.log.debug("skipping '{s}' due to it having binding '{d}'", .{ name, binding });
+                log.debug("{s}: skipping '{s}' due to it having binding '{d}'", .{
+                    @tagName(parse.arch), name, binding,
+                });
                 continue;
             },
         }
@@ -122,7 +223,9 @@ fn finishMain(
         switch (ty) {
             elf.STT_NOTYPE, elf.STT_FUNC, elf.STT_OBJECT => {},
             else => {
-                std.log.debug("skipping '{s}' due to it having type '{d}'", .{ name, ty });
+                log.debug("{s}: skipping '{s}' due to it having type '{d}'", .{
+                    @tagName(parse.arch), name, ty,
+                });
                 continue;
             },
         }
@@ -130,51 +233,79 @@ fn finishMain(
         switch (visib) {
             .DEFAULT, .PROTECTED => {},
             .INTERNAL, .HIDDEN => {
-                std.log.debug("skipping '{s}' due to it having visibility '{s}'", .{
-                    name, @tagName(visib),
+                log.debug("{s}: skipping '{s}' due to it having visibility '{s}'", .{
+                    @tagName(parse.arch), name, @tagName(visib),
                 });
                 continue;
             },
         }
 
-        const this_section = s(sym.st_shndx);
-        if (this_section != prev_section) {
-            prev_section = this_section;
-            const sh_name = mem.sliceTo(shstrtab[s(shdrs[this_section].sh_name)..], 0);
-            try stdout.print("{s}\n", .{sh_name});
-        }
-
-        switch (binding) {
-            elf.STB_GLOBAL => {
-                try stdout.print(".globl {s}\n", .{name});
-            },
-            elf.STB_WEAK => {
-                try stdout.print(".weak {s}\n", .{name});
-            },
-            else => unreachable,
+        const gop = try parse.sym_table.getOrPut(name);
+        if (gop.found_existing) {
+            if (gop.value_ptr.section != section_index_map[this_section]) {
+                const sh_name = mem.sliceTo(shstrtab[s(shdrs[this_section].sh_name)..], 0);
+                fatal("symbol '{s}' in arch {s} is in section {s} but in arch {s} is in section {s}", .{
+                    name,                               @tagName(parse.arch),                         sh_name,
+                    archSetName(gop.value_ptr.present), parse.sections.keys()[gop.value_ptr.section],
+                });
+            }
+            if (gop.value_ptr.ty != ty) {
+                fatal("symbol '{s}' in arch {s} has type {d} but in arch {s} has type {d}", .{
+                    name,                               @tagName(parse.arch), ty,
+                    archSetName(gop.value_ptr.present), gop.value_ptr.ty,
+                });
+            }
+            if (gop.value_ptr.binding != binding) {
+                fatal("symbol '{s}' in arch {s} has binding {d} but in arch {s} has binding {d}", .{
+                    name,                               @tagName(parse.arch),  binding,
+                    archSetName(gop.value_ptr.present), gop.value_ptr.binding,
+                });
+            }
+            if (gop.value_ptr.visib != visib) {
+                fatal("symbol '{s}' in arch {s} has visib {s} but in arch {s} has visib {s}", .{
+                    name,                               @tagName(parse.arch),          @tagName(visib),
+                    archSetName(gop.value_ptr.present), @tagName(gop.value_ptr.visib),
+                });
+            }
+        } else {
+            gop.value_ptr.* = .{
+                .present = [1]bool{false} ** arches.len,
+                .section = section_index_map[this_section],
+                .ty = ty,
+                .binding = binding,
+                .visib = visib,
+                .size = [1]u64{0} ** arches.len,
+            };
         }
+        gop.value_ptr.present[archIndex(parse.arch)] = true;
+        gop.value_ptr.size[archIndex(parse.arch)] = size;
+    }
+}
 
-        switch (ty) {
-            elf.STT_NOTYPE => {},
-            elf.STT_FUNC => {
-                try stdout.print(".type {s}, %function;\n", .{name});
-                // omitting the size is OK for functions
-            },
-            elf.STT_OBJECT => {
-                try stdout.print(".type {s}, %object;\n", .{name});
-                if (size != 0) {
-                    try stdout.print(".size {s}, {d}\n", .{ name, size });
-                }
-            },
-            else => unreachable,
-        }
+fn archIndex(arch: std.Target.Cpu.Arch) u8 {
+    return switch (arch) {
+        // zig fmt: off
+        .riscv64   => 0,
+        .mips      => 1,
+        .i386      => 2,
+        .x86_64    => 3,
+        .powerpc   => 4,
+        .powerpc64 => 5,
+        else       => unreachable,
+        // zig fmt: on
+    };
+}
 
-        switch (visib) {
-            .DEFAULT => {},
-            .PROTECTED => try stdout.print(".protected {s}\n", .{name}),
-            .INTERNAL, .HIDDEN => unreachable,
+fn archSetName(arch_set: [arches.len]bool) []const u8 {
+    for (arches) |arch, i| {
+        if (arch_set[i]) {
+            return @tagName(arch);
         }
-
-        try stdout.print("{s}:\n", .{name});
     }
+    return "(none)";
+}
+
+fn fatal(comptime format: []const u8, args: anytype) noreturn {
+    log.err(format, args);
+    std.process.exit(1);
 }