Commit 9306dbd619

Jakub Konka <kubkon@jakubkonka.com>
2020-09-07 07:18:44
Fix bug where __text section would get overwritten
Fixes a bug where the last written load command would accidentally override the beginning of the __text section. Also defines missing MachO constants and relocation structs/enums. Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
1 parent 42b1b6b
Changed files (2)
lib
src-self-hosted
lib/std/macho.zig
@@ -647,6 +647,32 @@ pub const nlist_64 = extern struct {
     n_value: u64,
 };
 
+/// Format of a relocation entry of a Mach-O file.  Modified from the 4.3BSD
+/// format.  The modifications from the original format were changing the value
+/// of the r_symbolnum field for "local" (r_extern == 0) relocation entries.
+/// This modification is required to support symbols in an arbitrary number of
+/// sections not just the three sections (text, data and bss) in a 4.3BSD file.
+/// Also the last 4 bits have had the r_type tag added to them.
+pub const relocation_info = packed struct {
+    /// offset in the section to what is being relocated
+    r_address: i32,
+
+    /// symbol index if r_extern == 1 or section ordinal if r_extern == 0
+    r_symbolnum: u24,
+
+    /// was relocated pc relative already
+    r_pcrel: u1,
+
+    /// 0=byte, 1=word, 2=long, 3=quad
+    r_length: u2,
+
+    /// does not include value of sym referenced
+    r_extern: u1,
+
+    /// if not 0, machine specific relocation type
+    r_type: u4,
+};
+
 /// After MacOS X 10.1 when a new load command is added that is required to be
 /// understood by the dynamic linker for the image to execute properly the
 /// LC_REQ_DYLD bit will be or'ed into the load command constant.  If the dynamic
@@ -1086,13 +1112,58 @@ pub const N_ECOML = 0xe8;
 /// second stab entry with length information
 pub const N_LENG = 0xfe;
 
-/// If a segment contains any sections marked with S_ATTR_DEBUG then all
-/// sections in that segment must have this attribute.  No section other than
-/// a section marked with this attribute may reference the contents of this
-/// section.  A section with this attribute may contain no symbols and must have
-/// a section type S_REGULAR.  The static linker will not copy section contents
-/// from sections with this attribute into its output file.  These sections
-/// generally contain DWARF debugging info.
+// For the two types of symbol pointers sections and the symbol stubs section
+// they have indirect symbol table entries.  For each of the entries in the
+// section the indirect symbol table entries, in corresponding order in the
+// indirect symbol table, start at the index stored in the reserved1 field
+// of the section structure.  Since the indirect symbol table entries
+// correspond to the entries in the section the number of indirect symbol table
+// entries is inferred from the size of the section divided by the size of the
+// entries in the section.  For symbol pointers sections the size of the entries
+// in the section is 4 bytes and for symbol stubs sections the byte size of the
+// stubs is stored in the reserved2 field of the section structure.
+
+/// section with only non-lazy symbol pointers
+pub const S_NON_LAZY_SYMBOL_POINTERS = 0x6;
+
+/// section with only lazy symbol pointers
+pub const S_LAZY_SYMBOL_POINTERS = 0x7;
+
+/// section with only symbol stubs, byte size of stub in the reserved2 field
+pub const S_SYMBOL_STUBS = 0x8;
+
+/// section with only function pointers for initialization
+pub const S_MOD_INIT_FUNC_POINTERS = 0x9;
+
+/// section with only function pointers for termination
+pub const S_MOD_TERM_FUNC_POINTERS = 0xa;
+
+/// section contains symbols that are to be coalesced
+pub const S_COALESCED = 0xb;
+
+/// zero fill on demand section (that can be larger than 4 gigabytes)
+pub const S_GB_ZEROFILL = 0xc;
+
+/// section with only pairs of function pointers for interposing
+pub const S_INTERPOSING = 0xd;
+
+/// section with only 16 byte literals
+pub const S_16BYTE_LITERALS = 0xe;
+
+/// section contains DTrace Object Format
+pub const S_DTRACE_DOF = 0xf;
+
+/// section with only lazy symbol pointers to lazy loaded dylibs
+pub const S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10;
+
+// If a segment contains any sections marked with S_ATTR_DEBUG then all
+// sections in that segment must have this attribute.  No section other than
+// a section marked with this attribute may reference the contents of this
+// section.  A section with this attribute may contain no symbols and must have
+// a section type S_REGULAR.  The static linker will not copy section contents
+// from sections with this attribute into its output file.  These sections
+// generally contain DWARF debugging info.
+
 /// a debug section
 pub const S_ATTR_DEBUG = 0x02000000;
 
@@ -1154,3 +1225,35 @@ pub const VM_PROT_WRITE: vm_prot_t = 0x2;
 
 /// VM execute permission
 pub const VM_PROT_EXECUTE: vm_prot_t = 0x4;
+
+pub const reloc_type_x86_64 = packed enum(u4) {
+    /// for absolute addresses
+    X86_64_RELOC_UNSIGNED = 0,
+
+    /// for signed 32-bit displacement
+    X86_64_RELOC_SIGNED,
+
+    /// a CALL/JMP instruction with 32-bit displacement
+    X86_64_RELOC_BRANCH,
+
+    /// a MOVQ load of a GOT entry
+    X86_64_RELOC_GOT_LOAD,
+
+    /// other GOT references
+    X86_64_RELOC_GOT,
+
+    /// must be followed by a X86_64_RELOC_UNSIGNED
+    X86_64_RELOC_SUBTRACTOR,
+
+    /// for signed 32-bit displacement with a -1 addend
+    X86_64_RELOC_SIGNED_1,
+
+    /// for signed 32-bit displacement with a -2 addend
+    X86_64_RELOC_SIGNED_2,
+
+    /// for signed 32-bit displacement with a -4 addend
+    X86_64_RELOC_SIGNED_4,
+
+    /// for thread local variables
+    X86_64_RELOC_TLV,
+};
src-self-hosted/link/MachO.zig
@@ -32,6 +32,20 @@ const LoadCommand = union(enum) {
             .Dysymtab => |x| x.cmdsize,
         };
     }
+
+    pub fn write(self: LoadCommand, file: *fs.File, offset: u64) !void {
+        return switch (self) {
+            .Segment => |cmd| writeGeneric(cmd, file, offset),
+            .LinkeditData => |cmd| writeGeneric(cmd, file, offset),
+            .Symtab => |cmd| writeGeneric(cmd, file, offset),
+            .Dysymtab => |cmd| writeGeneric(cmd, file, offset),
+        };
+    }
+
+    fn writeGeneric(cmd: anytype, file: *fs.File, offset: u64) !void {
+        const slice = [1]@TypeOf(cmd){cmd};
+        return file.pwriteAll(mem.sliceAsBytes(slice[0..1]), offset);
+    }
 };
 
 base: File,
@@ -258,8 +272,7 @@ pub fn flush(self: *MachO, module: *Module) !void {
 
             var last_cmd_offset: usize = @sizeOf(macho.mach_header_64);
             for (self.load_commands.items) |cmd| {
-                const cmd_to_write = [1]@TypeOf(cmd){cmd};
-                try self.base.file.?.pwriteAll(mem.sliceAsBytes(cmd_to_write[0..1]), last_cmd_offset);
+                try cmd.write(&self.base.file.?, last_cmd_offset);
                 last_cmd_offset += cmd.cmdsize();
             }
             const off = @sizeOf(macho.mach_header_64) + @sizeOf(macho.segment_command_64);
@@ -346,19 +359,18 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         .n_desc = 0,
         .n_value = addr,
     };
-    self.offset_table.items[decl.link.macho.offset_table_index.?] = addr;
 
+    // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated.
+    const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{};
+    try self.updateDeclExports(module, decl, decl_exports);
     try self.writeSymbol(decl.link.macho.symbol_table_index.?);
 
     const text_section = self.sections.items[self.text_section_index.?];
     const section_offset = symbol.n_value - text_section.addr;
     const file_offset = text_section.offset + section_offset;
     log.debug("file_offset 0x{x}\n", .{file_offset});
-    try self.base.file.?.pwriteAll(code, file_offset);
 
-    // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated.
-    const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{};
-    return self.updateDeclExports(module, decl, decl_exports);
+    try self.base.file.?.pwriteAll(code, file_offset);
 }
 
 pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {}
@@ -374,7 +386,7 @@ pub fn updateDeclExports(
 
     if (decl.link.macho.symbol_table_index == null) return;
 
-    var decl_sym = self.symbol_table.items[decl.link.macho.symbol_table_index.?];
+    const decl_sym = &self.symbol_table.items[decl.link.macho.symbol_table_index.?];
     // TODO implement
     if (exports.len == 0) return;
 
@@ -504,10 +516,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64,
         const text_capacity = self.allocatedSize(text_section.offset);
         const needed_size = (addr + new_block_size) - text_section.addr;
         log.debug("text capacity 0x{x}, needed size 0x{x}\n", .{ text_capacity, needed_size });
-
-        if (needed_size > text_capacity) {
-            // TODO handle growth
-        }
+        assert(needed_size <= text_capacity); // TODO handle growth
 
         self.last_text_block = text_block;
         text_section.size = needed_size;
@@ -659,7 +668,7 @@ fn writeSymbol(self: *MachO, index: usize) !void {
     defer tracy.end();
 
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    var sym = [1]macho.nlist_64{self.symbol_table.items[index]};
+    const sym = [1]macho.nlist_64{self.symbol_table.items[index]};
     const off = symtab.symoff + @sizeOf(macho.nlist_64) * index;
     log.debug("writing symbol {} at 0x{x}\n", .{ sym[0], off });
     try self.base.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off);