Commit d5c2f8ed32

Jakub Konka <kubkon@jakubkonka.com>
2021-04-02 07:45:11
zld: store a single global symtab
1 parent 1b5bcee
Changed files (3)
src/link/MachO/Object.zig
@@ -237,7 +237,23 @@ pub fn parseSymtab(self: *Object) !void {
             error.EndOfStream => break,
             else => |e| return e,
         };
+        const tag: Symbol.Tag = tag: {
+            if (Symbol.isLocal(symbol)) {
+                if (Symbol.isStab(symbol))
+                    break :tag .Stab
+                else
+                    break :tag .Local;
+            } else if (Symbol.isGlobal(symbol)) {
+                if (Symbol.isWeakDef(symbol))
+                    break :tag .Weak
+                else
+                    break :tag .Strong;
+            } else {
+                break :tag .Undef;
+            }
+        };
         self.symtab.appendAssumeCapacity(.{
+            .tag = tag,
             .inner = symbol,
         });
     }
src/link/MachO/Symbol.zig
@@ -3,6 +3,17 @@ const Symbol = @This();
 const std = @import("std");
 const macho = std.macho;
 
+pub const Tag = enum {
+    Stab,
+    Local,
+    Weak,
+    Strong,
+    Import,
+    Undef,
+};
+
+tag: Tag,
+
 /// MachO representation of this symbol.
 inner: macho.nlist_64,
 
@@ -13,43 +24,43 @@ file: ?u16 = null,
 /// Index of this symbol within the file's symbol table.
 index: ?u32 = null,
 
-pub fn isStab(self: Symbol) bool {
-    return (macho.N_STAB & self.inner.n_type) != 0;
+pub fn isStab(sym: macho.nlist_64) bool {
+    return (macho.N_STAB & sym.n_type) != 0;
 }
 
-pub fn isPext(self: Symbol) bool {
-    return (macho.N_PEXT & self.inner.n_type) != 0;
+pub fn isPext(sym: macho.nlist_64) bool {
+    return (macho.N_PEXT & sym.n_type) != 0;
 }
 
-pub fn isExt(self: Symbol) bool {
-    return (macho.N_EXT & self.inner.n_type) != 0;
+pub fn isExt(sym: macho.nlist_64) bool {
+    return (macho.N_EXT & sym.n_type) != 0;
 }
 
-pub fn isSect(self: Symbol) bool {
-    const type_ = macho.N_TYPE & self.inner.n_type;
+pub fn isSect(sym: macho.nlist_64) bool {
+    const type_ = macho.N_TYPE & sym.n_type;
     return type_ == macho.N_SECT;
 }
 
-pub fn isUndf(self: Symbol) bool {
-    const type_ = macho.N_TYPE & self.inner.n_type;
+pub fn isUndf(sym: macho.nlist_64) bool {
+    const type_ = macho.N_TYPE & sym.n_type;
     return type_ == macho.N_UNDF;
 }
 
-pub fn isWeakDef(self: Symbol) bool {
-    return self.inner.n_desc == macho.N_WEAK_DEF;
+pub fn isWeakDef(sym: macho.nlist_64) bool {
+    return sym.n_desc == macho.N_WEAK_DEF;
 }
 
 /// Symbol is local if it is either a stab or it is defined and not an extern.
-pub fn isLocal(self: Symbol) bool {
-    return self.isStab() or (self.isSect() and !self.isExt());
+pub fn isLocal(sym: macho.nlist_64) bool {
+    return isStab(sym) or (isSect(sym) and !isExt(sym));
 }
 
 /// Symbol is global if it is defined and an extern.
-pub fn isGlobal(self: Symbol) bool {
-    return self.isSect() and self.isExt();
+pub fn isGlobal(sym: macho.nlist_64) bool {
+    return isSect(sym) and isExt(sym);
 }
 
 /// Symbol is undefined if it is not defined and an extern.
-pub fn isUndef(self: Symbol) bool {
-    return self.isUndf() and self.isExt();
+pub fn isUndef(sym: macho.nlist_64) bool {
+    return isUndf(sym) and isExt(sym);
 }
src/link/MachO/Zld.zig
@@ -73,9 +73,7 @@ la_symbol_ptr_section_index: ?u16 = null,
 data_section_index: ?u16 = null,
 bss_section_index: ?u16 = null,
 
-globals: std.StringArrayHashMapUnmanaged(Symbol) = .{},
-undefs: std.StringArrayHashMapUnmanaged(Symbol) = .{},
-externs: std.StringArrayHashMapUnmanaged(Symbol) = .{},
+symtab: std.StringArrayHashMapUnmanaged(Symbol) = .{},
 strtab: std.ArrayListUnmanaged(u8) = .{},
 
 threadlocal_offsets: std.ArrayListUnmanaged(u64) = .{},
@@ -210,20 +208,11 @@ pub fn deinit(self: *Zld) void {
     self.mappings.deinit(self.allocator);
     self.unhandled_sections.deinit(self.allocator);
 
-    for (self.globals.items()) |*entry| {
+    for (self.symtab.items()) |*entry| {
         self.allocator.free(entry.key);
     }
-    self.globals.deinit(self.allocator);
-
-    for (self.undefs.items()) |*entry| {
-        self.allocator.free(entry.key);
-    }
-    self.undefs.deinit(self.allocator);
-
-    for (self.externs.items()) |*entry| {
-        self.allocator.free(entry.key);
-    }
-    self.externs.deinit(self.allocator);
+    self.symtab.deinit(self.allocator);
+    self.strtab.deinit(self.allocator);
 }
 
 pub fn closeFiles(self: Zld) void {
@@ -276,10 +265,11 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
     try self.resolveSymbols();
     try self.updateMetadata();
     try self.sortSections();
-    try self.allocateTextSegment();
-    try self.allocateDataConstSegment();
-    try self.allocateDataSegment();
-    self.allocateLinkeditSegment();
+    self.printSymtab();
+    // try self.allocateTextSegment();
+    // try self.allocateDataConstSegment();
+    // try self.allocateDataSegment();
+    // self.allocateLinkeditSegment();
     // try self.writeStubHelperCommon();
     // try self.doRelocs();
     // try self.flush();
@@ -1216,48 +1206,64 @@ fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void {
     log.warn("resolving symbols in '{s}'", .{object.name});
 
     for (object.symtab.items) |sym, sym_id| {
-        if (sym.isLocal()) continue; // If symbol is local to CU, we don't put it in the global symbol table.
-
-        const sym_name = object.getString(sym.inner.n_strx);
-        if (sym.isGlobal()) {
-            const global = self.globals.getEntry(sym_name) orelse {
-                const name = try self.allocator.dupe(u8, sym_name);
-                try self.globals.putNoClobber(self.allocator, name, .{
-                    .inner = sym.inner,
-                    .file = object_id,
-                    .index = @intCast(u32, sym_id),
-                });
+        switch (sym.tag) {
+            .Local, .Stab => continue, // If symbol is local to CU, we don't put it in the global symbol table.
+            .Weak, .Strong => {
+                const sym_name = object.getString(sym.inner.n_strx);
+                const global = self.symtab.getEntry(sym_name) orelse {
+                    // Put new global symbol into the symbol table.
+                    const name = try self.allocator.dupe(u8, sym_name);
+                    try self.symtab.putNoClobber(self.allocator, name, .{
+                        .tag = sym.tag,
+                        .inner = .{
+                            .n_strx = 0, // This will be populated later.
+                            .n_value = 0, // This will be populated later,
+                            .n_type = macho.N_SECT | macho.N_EXT,
+                            .n_desc = 0,
+                            .n_sect = 0, // This will be populated later.
+                        },
+                        .file = object_id,
+                        .index = @intCast(u32, sym_id),
+                    });
+                    continue;
+                };
 
-                if (self.undefs.swapRemove(sym_name)) |undef| {
-                    self.allocator.free(undef.key);
+                if (sym.tag == .Weak) continue; // If symbol is weak, nothing to do.
+                if (global.value.tag == .Strong) { // If both symbols are strong, we have a collision.
+                    log.err("symbol '{s}' defined multiple times", .{sym_name});
+                    return error.MultipleSymbolDefinitions;
                 }
 
-                continue;
-            };
-
-            if (sym.isWeakDef()) continue; // If symbol is weak, nothing to do.
-            if (!global.value.isWeakDef()) { // If both symbols are strong, we have a collision.
-                log.err("symbol '{s}' defined multiple times", .{sym_name});
-                return error.MultipleSymbolDefinitions;
-            }
-
-            global.value = .{
-                .inner = sym.inner,
-                .file = object_id,
-                .index = @intCast(u32, sym_id),
-            };
-        } else if (sym.isUndef()) {
-            if (self.globals.contains(sym_name)) continue; // Nothing to do if we already found a definition.
-            if (self.undefs.contains(sym_name)) continue; // No need to reinsert the undef ref.
+                global.value = .{
+                    .tag = .Strong,
+                    .inner = .{
+                        .n_strx = 0, // This will be populated later.
+                        .n_value = 0, // This will be populated later,
+                        .n_type = macho.N_SECT | macho.N_EXT,
+                        .n_desc = 0,
+                        .n_sect = 0, // This will be populated later.
+                    },
+                    .file = object_id,
+                    .index = @intCast(u32, sym_id),
+                };
+            },
+            .Undef => {
+                const sym_name = object.getString(sym.inner.n_strx);
+                if (self.symtab.contains(sym_name)) continue; // Nothing to do if we already found a definition.
 
-            const name = try self.allocator.dupe(u8, sym_name);
-            try self.undefs.putNoClobber(self.allocator, name, .{
-                .inner = sym.inner,
-            });
-        } else {
-            // Oh no, unhandled symbol type, report back to the user.
-            log.err("unhandled symbol type for symbol {any}", .{sym});
-            return error.UnhandledSymbolType;
+                const name = try self.allocator.dupe(u8, sym_name);
+                try self.symtab.putNoClobber(self.allocator, name, .{
+                    .tag = .Undef,
+                    .inner = .{
+                        .n_strx = 0,
+                        .n_value = 0,
+                        .n_type = 0,
+                        .n_desc = 0,
+                        .n_sect = 0,
+                    },
+                });
+            },
+            .Import => unreachable, // We don't expect any imports just yet.
         }
     }
 }
@@ -1274,7 +1280,9 @@ fn resolveSymbols(self: *Zld) !void {
         var archive = &self.archives.items[next];
         var hit: bool = false;
 
-        for (self.undefs.items()) |entry| {
+        for (self.symtab.items()) |entry| {
+            if (entry.value.tag != .Undef) continue;
+
             const sym_name = entry.key;
 
             // Check if the entry exists in a static archive.
@@ -1306,9 +1314,10 @@ fn resolveSymbols(self: *Zld) !void {
     // Third pass, resolve symbols in dynamic libraries.
     // TODO Implement libSystem as a hard-coded library, or ship with
     // a libSystem.B.tbd definition file?
-    while (self.undefs.items().len > 0) {
-        const entry = self.undefs.pop();
-        try self.externs.putNoClobber(self.allocator, entry.key, .{
+    for (self.symtab.items()) |*entry| {
+        if (entry.value.tag != .Undef) continue;
+        entry.value = .{
+            .tag = .Import,
             .inner = .{
                 .n_strx = 0, // This will be populated once we write the string table.
                 .n_type = macho.N_UNDF | macho.N_EXT,
@@ -1317,30 +1326,19 @@ fn resolveSymbols(self: *Zld) !void {
                 .n_value = 0,
             },
             .file = 0,
-        });
+        };
     }
 
     // If there are any undefs left, flag an error.
-    if (self.undefs.items().len > 0) {
-        for (self.undefs.items()) |entry| {
-            log.err("undefined reference to symbol '{s}'", .{entry.key});
-        }
-
+    var has_unresolved = false;
+    for (self.symtab.items()) |entry| {
+        if (entry.value.tag != .Undef) continue;
+        has_unresolved = true;
+        log.err("undefined reference to symbol '{s}'", .{entry.key});
+    }
+    if (has_unresolved) {
         return error.UndefinedSymbolReference;
     }
-
-    // Finally, put in a reference to 'dyld_stub_binder'.
-    const name = try self.allocator.dupe(u8, "dyld_stub_binder");
-    try self.externs.putNoClobber(self.allocator, name, .{
-        .inner = .{
-            .n_strx = 0, // This will be populated once we write the string table.
-            .n_type = std.macho.N_UNDF | std.macho.N_EXT,
-            .n_sect = 0,
-            .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
-            .n_value = 0,
-        },
-        .file = 0,
-    });
 }
 
 fn doRelocs(self: *Zld) !void {
@@ -3261,18 +3259,8 @@ fn aarch64IsArithmetic(inst: *const [4]u8) callconv(.Inline) bool {
 }
 
 fn printSymtab(self: Zld) void {
-    log.warn("globals", .{});
-    for (self.globals.items()) |entry| {
-        log.warn("    | {s} => {any}", .{ entry.key, entry.value });
-    }
-
-    log.warn("externs", .{});
-    for (self.externs.items()) |entry| {
-        log.warn("    | {s} => {any}", .{ entry.key, entry.value });
-    }
-
-    log.warn("undefs", .{});
-    for (self.undefs.items()) |entry| {
+    log.warn("symtab", .{});
+    for (self.symtab.items()) |entry| {
         log.warn("    | {s} => {any}", .{ entry.key, entry.value });
     }
 }