Commit 2831d6e9b8

Jakub Konka <kubkon@jakubkonka.com>
2021-08-30 15:43:20
macho: add first pass at allocating parsed atoms in objects
This commit makes it possible to combine self-hosted with a pre-compiled C object file, e.g.: ``` zig-out/bin/zig build-exe hello.zig add.o ``` where `add.o` is a pre-compiled C object file.
1 parent a14e98f
Changed files (4)
src/link/MachO/commands.zig
@@ -337,7 +337,7 @@ pub const SegmentCommand = struct {
         return null;
     }
 
-    pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u16, start: ?u64) u64 {
+    pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u32, start: ?u64) u64 {
         var st: u64 = if (start) |v| v else self.inner.fileoff;
         while (self.detectAllocCollision(st, object_size)) |item_end| {
             st = mem.alignForwardGeneric(u64, item_end, min_alignment);
src/link/MachO/Object.zig
@@ -504,7 +504,6 @@ pub fn parseTextBlocks(
             log.debug("unhandled section", .{});
             continue;
         };
-        // TODO allocate section here.
 
         // Read section's code
         var code = try allocator.alloc(u8, @intCast(usize, sect.size));
@@ -569,12 +568,6 @@ pub fn parseTextBlocks(
                     const block_size = block_code.len;
                     const block = try macho_file.createEmptyAtom(block_local_sym_index, block_size, sect.@"align");
 
-                    if (use_stage1) {
-                        try macho_file.allocateAtomStage1(block, match);
-                    } else {
-                        _ = try macho_file.allocateAtom(block, match);
-                    }
-
                     mem.copy(u8, block.code.items, block_code);
 
                     try block.parseRelocs(relocs, .{
@@ -597,6 +590,11 @@ pub fn parseTextBlocks(
                         }
                     }
 
+                    if (use_stage1) {
+                        try macho_file.allocateAtomStage1(block, match);
+                    } else {
+                        _ = try macho_file.allocateAtom(block, match);
+                    }
                     try self.text_blocks.append(allocator, block);
                 }
 
@@ -648,7 +646,6 @@ pub fn parseTextBlocks(
                     } else {
                         _ = try macho_file.allocateAtom(block, match);
                     }
-
                     try self.text_blocks.append(allocator, block);
                 }
 
@@ -679,12 +676,6 @@ pub fn parseTextBlocks(
             };
             const block = try macho_file.createEmptyAtom(block_local_sym_index, sect.size, sect.@"align");
 
-            if (use_stage1) {
-                try macho_file.allocateAtomStage1(block, match);
-            } else {
-                _ = try macho_file.allocateAtom(block, match);
-            }
-
             mem.copy(u8, block.code.items, code);
 
             try block.parseRelocs(relocs, .{
@@ -743,6 +734,11 @@ pub fn parseTextBlocks(
                 });
             }
 
+            if (use_stage1) {
+                try macho_file.allocateAtomStage1(block, match);
+            } else {
+                _ = try macho_file.allocateAtom(block, match);
+            }
             try self.text_blocks.append(allocator, block);
         }
     }
src/link/MachO/TextBlock.zig
@@ -1183,9 +1183,22 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void {
                 },
                 .undef => {
                     const atom = macho_file.stubs_map.get(rel.where_index) orelse {
+                        // TODO this is required for incremental when we don't have every symbol
+                        // resolved when creating relocations. In this case, we will insert a branch
+                        // reloc to an undef symbol which may happen to be defined within the binary.
+                        // Then, the undef we point at will be a null symbol (free symbol) which we
+                        // should remove/repurpose. To circumvent this (for now), we check if the symbol
+                        // we point to is garbage, and if so we fall back to symbol resolver to find by name.
+                        const n_strx = macho_file.undefs.items[rel.where_index].n_strx;
+                        if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: {
+                            if (resolv.where != .global) break :inner;
+                            break :blk macho_file.globals.items[resolv.where_index].n_value;
+                        }
+
                         // TODO verify in TextBlock that the symbol is indeed dynamically bound.
                         break :blk 0; // Dynamically bound by dyld.
                     };
+
                     break :blk macho_file.locals.items[atom.local_sym_index].n_value;
                 },
             }
src/link/MachO.zig
@@ -789,6 +789,31 @@ pub fn flush(self: *MachO, comp: *Compilation) !void {
             try self.allocateTextBlocks();
             try self.flushZld();
         } else {
+            try self.parseTextBlocks();
+            try self.allocateGlobalSymbols();
+            {
+                log.debug("locals:", .{});
+                for (self.locals.items) |sym| {
+                    log.debug("  {s}: {}", .{ self.getString(sym.n_strx), sym });
+                }
+                log.debug("globals:", .{});
+                for (self.globals.items) |sym| {
+                    log.debug("  {s}: {}", .{ self.getString(sym.n_strx), sym });
+                }
+                log.debug("undefs:", .{});
+                for (self.undefs.items) |sym| {
+                    log.debug("  {s}: {}", .{ self.getString(sym.n_strx), sym });
+                }
+                log.debug("unresolved:", .{});
+                for (self.unresolved.keys()) |key| {
+                    log.debug("  {d} => {s}", .{ key, self.unresolved.get(key).? });
+                }
+                log.debug("resolved:", .{});
+                var it = self.symbol_resolver.iterator();
+                while (it.next()) |entry| {
+                    log.debug("  {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* });
+                }
+            }
             try self.writeAtoms();
             try self.flushModule(comp);
         }
@@ -1114,12 +1139,14 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
     const segname = commands.segmentName(sect);
     const sectname = commands.sectionName(sect);
 
+    var needs_allocation = false;
     const res: ?MatchingSection = blk: {
         switch (commands.sectionType(sect)) {
             macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
                 if (self.text_const_section_index == null) {
                     self.text_const_section_index = @intCast(u16, text_seg.sections.items.len);
                     try text_seg.addSection(self.base.allocator, "__const", .{});
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1136,6 +1163,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         try text_seg.addSection(self.base.allocator, "__objc_methname", .{
                             .flags = macho.S_CSTRING_LITERALS,
                         });
+                        needs_allocation = true;
                     }
 
                     break :blk .{
@@ -1148,6 +1176,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         try text_seg.addSection(self.base.allocator, "__objc_methtype", .{
                             .flags = macho.S_CSTRING_LITERALS,
                         });
+                        needs_allocation = true;
                     }
 
                     break :blk .{
@@ -1158,6 +1187,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     if (self.objc_classname_section_index == null) {
                         self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len);
                         try text_seg.addSection(self.base.allocator, "__objc_classname", .{});
+                        needs_allocation = true;
                     }
 
                     break :blk .{
@@ -1171,6 +1201,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     try text_seg.addSection(self.base.allocator, "__cstring", .{
                         .flags = macho.S_CSTRING_LITERALS,
                     });
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1185,6 +1216,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{
                             .flags = macho.S_LITERAL_POINTERS,
                         });
+                        needs_allocation = true;
                     }
 
                     break :blk .{
@@ -1202,6 +1234,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{
                         .flags = macho.S_MOD_INIT_FUNC_POINTERS,
                     });
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1215,6 +1248,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{
                         .flags = macho.S_MOD_TERM_FUNC_POINTERS,
                     });
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1228,6 +1262,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     try data_seg.addSection(self.base.allocator, "__bss", .{
                         .flags = macho.S_ZEROFILL,
                     });
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1241,6 +1276,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     try data_seg.addSection(self.base.allocator, "__thread_vars", .{
                         .flags = macho.S_THREAD_LOCAL_VARIABLES,
                     });
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1254,6 +1290,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     try data_seg.addSection(self.base.allocator, "__thread_data", .{
                         .flags = macho.S_THREAD_LOCAL_REGULAR,
                     });
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1267,6 +1304,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     try data_seg.addSection(self.base.allocator, "__thread_bss", .{
                         .flags = macho.S_THREAD_LOCAL_ZEROFILL,
                     });
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1281,6 +1319,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     if (self.eh_frame_section_index == null) {
                         self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len);
                         try text_seg.addSection(self.base.allocator, "__eh_frame", .{});
+                        needs_allocation = true;
                     }
 
                     break :blk .{
@@ -1293,6 +1332,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                 if (self.data_const_section_index == null) {
                     self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len);
                     try data_const_seg.addSection(self.base.allocator, "__const", .{});
+                    needs_allocation = true;
                 }
 
                 break :blk .{
@@ -1307,6 +1347,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         try text_seg.addSection(self.base.allocator, "__text", .{
                             .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
                         });
+                        needs_allocation = true;
                     }
 
                     break :blk .{
@@ -1329,6 +1370,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.ustring_section_index == null) {
                             self.ustring_section_index = @intCast(u16, text_seg.sections.items.len);
                             try text_seg.addSection(self.base.allocator, "__ustring", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1339,6 +1381,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.gcc_except_tab_section_index == null) {
                             self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len);
                             try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1349,6 +1392,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.objc_methlist_section_index == null) {
                             self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len);
                             try text_seg.addSection(self.base.allocator, "__objc_methlist", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1364,6 +1408,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.data_const_section_index == null) {
                             self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len);
                             try data_const_seg.addSection(self.base.allocator, "__const", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1374,6 +1419,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.text_const_section_index == null) {
                             self.text_const_section_index = @intCast(u16, text_seg.sections.items.len);
                             try text_seg.addSection(self.base.allocator, "__const", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1387,6 +1433,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                     if (self.data_const_section_index == null) {
                         self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len);
                         try data_const_seg.addSection(self.base.allocator, "__const", .{});
+                        needs_allocation = true;
                     }
 
                     break :blk .{
@@ -1400,6 +1447,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.data_const_section_index == null) {
                             self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len);
                             try data_const_seg.addSection(self.base.allocator, "__const", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1410,6 +1458,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.objc_cfstring_section_index == null) {
                             self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len);
                             try data_const_seg.addSection(self.base.allocator, "__cfstring", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1420,6 +1469,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.objc_classlist_section_index == null) {
                             self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len);
                             try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1430,6 +1480,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.objc_imageinfo_section_index == null) {
                             self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len);
                             try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1440,6 +1491,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.objc_const_section_index == null) {
                             self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len);
                             try data_seg.addSection(self.base.allocator, "__objc_const", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1450,6 +1502,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.objc_classrefs_section_index == null) {
                             self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len);
                             try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1460,6 +1513,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.objc_data_section_index == null) {
                             self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len);
                             try data_seg.addSection(self.base.allocator, "__objc_data", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1470,6 +1524,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
                         if (self.data_section_index == null) {
                             self.data_section_index = @intCast(u16, data_seg.sections.items.len);
                             try data_seg.addSection(self.base.allocator, "__data", .{});
+                            needs_allocation = true;
                         }
 
                         break :blk .{
@@ -1494,6 +1549,36 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
     if (res) |match| {
         _ = try self.section_ordinals.getOrPut(self.base.allocator, match);
         _ = try self.block_free_lists.getOrPutValue(self.base.allocator, match, .{});
+
+        const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1;
+        if (!use_stage1) {
+            const target_seg = &self.load_commands.items[match.seg].Segment;
+            const target_sect = &target_seg.sections.items[match.sect];
+
+            // Update section's alignment
+            // TODO if sect.@"align" > target_sect.@"align", should we move the entire
+            // section to match the required alignment?
+            target_sect.@"align" = math.max(target_sect.@"align", sect.@"align");
+
+            if (needs_allocation) {
+                const alignment = try math.powi(u32, 2, target_sect.@"align");
+                const needed_size = sect.size;
+                const off = target_seg.findFreeSpace(needed_size, alignment, self.header_pad);
+                assert(off + needed_size <= target_seg.inner.fileoff + target_seg.inner.filesize); // TODO expand
+
+                log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{
+                    segname,
+                    sectname,
+                    off,
+                    off + needed_size,
+                });
+
+                target_sect.addr = target_seg.inner.vmaddr + off;
+                target_sect.size = needed_size;
+                target_sect.offset = @intCast(u32, off);
+                self.load_commands_dirty = true;
+            }
+        }
     }
 
     return res;
@@ -1759,23 +1844,41 @@ pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment:
 }
 
 pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 {
-    // TODO converge with `allocateTextBlock`
-    const seg = self.load_commands.items[match.seg].Segment;
-    const sect = seg.sections.items[match.sect];
+    const seg = &self.load_commands.items[match.seg].Segment;
+    const sect = &seg.sections.items[match.sect];
     const sym = &self.locals.items[atom.local_sym_index];
-    const base_addr = if (self.blocks.get(match)) |last| blk: {
+
+    var atom_placement: ?*TextBlock = null;
+
+    // TODO converge with `allocateTextBlock` and handle free list
+    const vaddr = if (self.blocks.get(match)) |last| blk: {
         const last_atom_sym = self.locals.items[last.local_sym_index];
-        break :blk last_atom_sym.n_value + last.size;
+        const ideal_capacity = padToIdeal(last.size);
+        const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity;
+        const last_atom_alignment = try math.powi(u32, 2, atom.alignment);
+        const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment);
+        atom_placement = last;
+        break :blk new_start_vaddr;
     } else sect.addr;
-    const atom_alignment = try math.powi(u32, 2, atom.alignment);
-    const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment);
+
     log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr });
 
-    const expand_section = true;
+    const expand_section = atom_placement == null or atom_placement.?.next == null;
     if (expand_section) {
-        // Expand the section, possibly shifting all the atoms for the sections following it.
-        // It might also be needed to shift entire segments too if there is not enough
-        // padding left.
+        const needed_size = (vaddr + atom.size) - sect.addr;
+        const end_addr = blk: {
+            const next_ordinal = self.section_ordinals.getIndex(match).?; // Ordinals are +1 to begin with.
+            const end_addr = if (self.section_ordinals.keys().len > next_ordinal) inner: {
+                const next_match = self.section_ordinals.keys()[next_ordinal];
+                const next_seg = self.load_commands.items[next_match.seg].Segment;
+                const next_sect = next_seg.sections.items[next_match.sect];
+                break :inner next_sect.addr;
+            } else seg.inner.filesize;
+            break :blk end_addr;
+        };
+        assert(needed_size <= end_addr); // TODO must expand the section
+        sect.size = needed_size;
+        self.load_commands_dirty = true;
     }
     const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1);
     sym.n_value = vaddr;
@@ -1828,6 +1931,21 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void {
     try self.writeLocalSymbol(atom.local_sym_index);
 }
 
+fn allocateGlobalSymbols(self: *MachO) !void {
+    // TODO should we do this in `allocateAtom` (or similar)? Then, we would need to
+    // store the link atom -> globals somewhere.
+    var sym_it = self.symbol_resolver.valueIterator();
+    while (sym_it.next()) |resolv| {
+        if (resolv.where != .global) continue;
+
+        assert(resolv.local_sym_index != 0);
+        const local_sym = self.locals.items[resolv.local_sym_index];
+        const sym = &self.globals.items[resolv.where_index];
+        sym.n_value = local_sym.n_value;
+        sym.n_sect = local_sym.n_sect;
+    }
+}
+
 pub fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !void {
     // Update target section's metadata
     // TODO should we update segment's size here too?
@@ -2313,14 +2431,14 @@ fn resolveSymbolsInObject(
                     continue;
                 },
                 .undef => {
-                    const undef = &self.undefs.items[resolv.where_index];
-                    undef.* = .{
-                        .n_strx = 0,
-                        .n_type = macho.N_UNDF,
-                        .n_sect = 0,
-                        .n_desc = 0,
-                        .n_value = 0,
-                    };
+                    // const undef = &self.undefs.items[resolv.where_index];
+                    // undef.* = .{
+                    //     .n_strx = 0,
+                    //     .n_type = macho.N_UNDF,
+                    //     .n_sect = 0,
+                    //     .n_desc = 0,
+                    //     .n_value = 0,
+                    // };
                     _ = self.unresolved.fetchSwapRemove(resolv.where_index);
                 },
             }
@@ -2457,18 +2575,9 @@ fn resolveSymbols(self: *MachO) !void {
     // text blocks for each tentative defintion.
     while (tentatives.popOrNull()) |entry| {
         const sym = &self.globals.items[entry.key];
-        const match: MatchingSection = blk: {
-            if (self.bss_section_index == null) {
-                const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-                self.bss_section_index = @intCast(u16, data_seg.sections.items.len);
-                try data_seg.addSection(self.base.allocator, "__bss", .{
-                    .flags = macho.S_ZEROFILL,
-                });
-            }
-            break :blk .{
-                .seg = self.data_segment_cmd_index.?,
-                .sect = self.bss_section_index.?,
-            };
+        const match = MatchingSection{
+            .seg = self.data_segment_cmd_index.?,
+            .sect = self.bss_section_index.?,
         };
         _ = try self.section_ordinals.getOrPut(self.base.allocator, match);