Commit 2cc1623925

Jakub Konka <kubkon@jakubkonka.com>
2024-04-19 00:10:43
link/elf: fix parsing SHF_STRINGS section
1 parent 63a40bf
src/link/Elf/merge_section.zig
@@ -56,9 +56,9 @@ pub const MergeSection = struct {
         return msec.insert(allocator, with_null);
     }
 
+    /// Finalizes the merge section and clears hash table.
     /// Sorts all owned subsections.
-    /// Clears string table.
-    pub fn sort(msec: *MergeSection, elf_file: *Elf) !void {
+    pub fn finalize(msec: *MergeSection, elf_file: *Elf) !void {
         const gpa = elf_file.base.comp.gpa;
         try msec.subsections.ensureTotalCapacityPrecise(gpa, msec.table.count());
 
@@ -270,14 +270,6 @@ pub const InputMergeSection = struct {
         try imsec.strings.append(allocator, .{ .pos = index, .len = @intCast(string.len) });
     }
 
-    pub fn insertZ(imsec: *InputMergeSection, allocator: Allocator, string: []const u8) !void {
-        const index: u32 = @intCast(imsec.bytes.items.len);
-        try imsec.bytes.ensureUnusedCapacity(allocator, string.len + 1);
-        imsec.bytes.appendSliceAssumeCapacity(string);
-        imsec.bytes.appendAssumeCapacity(0);
-        try imsec.strings.append(allocator, .{ .pos = index, .len = @intCast(string.len + 1) });
-    }
-
     pub const Index = u32;
 };
 
src/link/Elf/Object.zig
@@ -683,39 +683,40 @@ pub fn initMergeSections(self: *Object, elf_file: *Elf) !void {
 
         const data = try self.codeDecompressAlloc(elf_file, atom_index);
         defer gpa.free(data);
-        const sh_entsize: u32 = @intCast(shdr.sh_entsize);
 
         if (shdr.sh_flags & elf.SHF_STRINGS != 0) {
-            var pos: u32 = 0;
-            while (pos < data.len) switch (sh_entsize) {
-                0, 1 => {
-                    // According to mold's source code, GHC emits MS sections with sh_entsize = 0.
-                    // This actually can also happen for output created with `-r` mode.
-                    const string = mem.sliceTo(@as([*:0]const u8, @ptrCast(data.ptr + pos)), 0);
-                    if (pos + string.len == data.len) {
-                        var err = try elf_file.addErrorWithNotes(1);
-                        try err.addMsg(elf_file, "string not null terminated", .{});
-                        try err.addNote(elf_file, "in {}:{s}", .{ self.fmtPath(), atom_ptr.name(elf_file) });
-                        return error.MalformedObject;
-                    }
-                    try imsec.insertZ(gpa, string);
-                    try imsec.offsets.append(gpa, pos);
-                    pos += @as(u32, @intCast(string.len)) + 1; // account for null
-                },
-                else => |entsize| {
-                    const string = data.ptr[pos..][0..entsize];
-                    if (string[string.len - 1] != 0) {
-                        var err = try elf_file.addErrorWithNotes(1);
-                        try err.addMsg(elf_file, "string not null terminated", .{});
-                        try err.addNote(elf_file, "in {}:{s}", .{ self.fmtPath(), atom_ptr.name(elf_file) });
-                        return error.MalformedObject;
-                    }
-                    try imsec.insert(gpa, string);
-                    try imsec.offsets.append(gpa, pos);
-                    pos += @as(u32, @intCast(string.len));
-                },
+            const sh_entsize: u32 = switch (shdr.sh_entsize) {
+                // According to mold's source code, GHC emits MS sections with sh_entsize = 0.
+                // This actually can also happen for output created with `-r` mode.
+                0 => 1,
+                else => |x| @intCast(x),
             };
+
+            const isNull = struct {
+                fn isNull(slice: []u8) bool {
+                    for (slice) |x| if (x != 0) return false;
+                    return true;
+                }
+            }.isNull;
+
+            var start: u32 = 0;
+            while (start < data.len) {
+                var end = start;
+                while (end < data.len - sh_entsize and !isNull(data[end .. end + sh_entsize])) : (end += sh_entsize) {}
+                if (!isNull(data[end .. end + sh_entsize])) {
+                    var err = try elf_file.addErrorWithNotes(1);
+                    try err.addMsg(elf_file, "string not null terminated", .{});
+                    try err.addNote(elf_file, "in {}:{s}", .{ self.fmtPath(), atom_ptr.name(elf_file) });
+                    return error.MalformedObject;
+                }
+                end += sh_entsize;
+                const string = data[start..end];
+                try imsec.insert(gpa, string);
+                try imsec.offsets.append(gpa, start);
+                start = end;
+            }
         } else {
+            const sh_entsize: u32 = @intCast(shdr.sh_entsize);
             if (sh_entsize == 0) continue; // Malformed, don't split but don't error out
             if (shdr.sh_size % sh_entsize != 0) {
                 var err = try elf_file.addErrorWithNotes(1);
src/link/Elf/relocatable.zig
@@ -181,7 +181,7 @@ pub fn flushObject(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]const
     elf_file.markEhFrameAtomsDead();
     try elf_file.resolveMergeSections();
     try elf_file.addCommentString();
-    try elf_file.sortMergeSections();
+    try elf_file.finalizeMergeSections();
     claimUnresolved(elf_file);
 
     try initSections(elf_file);
src/link/Elf.zig
@@ -1340,7 +1340,7 @@ pub fn flushModule(self: *Elf, arena: Allocator, prog_node: *std.Progress.Node)
     };
 
     try self.addCommentString();
-    try self.sortMergeSections();
+    try self.finalizeMergeSections();
     try self.initOutputSections();
     try self.addLinkerDefinedSymbols();
     self.claimUnresolved();
@@ -3369,9 +3369,9 @@ pub fn resolveMergeSections(self: *Elf) !void {
     if (has_errors) return error.FlushFailure;
 }
 
-pub fn sortMergeSections(self: *Elf) !void {
+pub fn finalizeMergeSections(self: *Elf) !void {
     for (self.merge_sections.items) |*msec| {
-        try msec.sort(self);
+        try msec.finalize(self);
     }
 }