Commit aca911ca18

Luuk de Gram <luuk@degram.dev>
2022-08-19 19:26:44
wasm/archive: correctly parse long file names
Wasm archive files are encoded the same way as GNU. This means that the header notates the character index within the long file name list rather than the length of the name. The entire name is then delimited by an LF character (0x0a). This also makes a cosmetic update to remove the `self` name, and rather label it as `archive` instead.
1 parent e5e6eb9
Changed files (1)
src
link
src/link/Wasm/Archive.zig
@@ -15,6 +15,12 @@ name: []const u8,
 
 header: ar_hdr = undefined,
 
+/// A list of long file names, delimited by a LF character (0x0a).
+/// This is stored as a single slice of bytes, as the header-names
+/// point to the character index of a file name, rather than the index
+/// in the list.
+long_file_names: []const u8 = undefined,
+
 /// Parsed table of contents.
 /// Each symbol name points to a list of all definition
 /// sites within the current static archive.
@@ -53,32 +59,33 @@ const ar_hdr = extern struct {
     /// Always contains ARFMAG.
     ar_fmag: [2]u8,
 
-    const NameOrLength = union(enum) {
-        Name: []const u8,
-        Length: u32,
+    const NameOrIndex = union(enum) {
+        name: []const u8,
+        index: u32,
     };
-    fn nameOrLength(self: ar_hdr) !NameOrLength {
-        const value = getValue(&self.ar_name);
+
+    fn nameOrIndex(archive: ar_hdr) !NameOrIndex {
+        const value = getValue(&archive.ar_name);
         const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive;
         const len = value.len;
         if (slash_index == len - 1) {
             // Name stored directly
-            return NameOrLength{ .Name = value };
+            return NameOrIndex{ .name = value };
         } else {
             // Name follows the header directly and its length is encoded in
             // the name field.
-            const length = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
-            return NameOrLength{ .Length = length };
+            const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
+            return NameOrIndex{ .index = index };
         }
     }
 
-    fn date(self: ar_hdr) !u64 {
-        const value = getValue(&self.ar_date);
+    fn date(archive: ar_hdr) !u64 {
+        const value = getValue(&archive.ar_date);
         return std.fmt.parseInt(u64, value, 10);
     }
 
-    fn size(self: ar_hdr) !u32 {
-        const value = getValue(&self.ar_size);
+    fn size(archive: ar_hdr) !u32 {
+        const value = getValue(&archive.ar_size);
         return std.fmt.parseInt(u32, value, 10);
     }
 
@@ -87,18 +94,19 @@ const ar_hdr = extern struct {
     }
 };
 
-pub fn deinit(self: *Archive, allocator: Allocator) void {
-    for (self.toc.keys()) |*key| {
+pub fn deinit(archive: *Archive, allocator: Allocator) void {
+    for (archive.toc.keys()) |*key| {
         allocator.free(key.*);
     }
-    for (self.toc.values()) |*value| {
+    for (archive.toc.values()) |*value| {
         value.deinit(allocator);
     }
-    self.toc.deinit(allocator);
+    archive.toc.deinit(allocator);
+    allocator.free(archive.long_file_names);
 }
 
-pub fn parse(self: *Archive, allocator: Allocator) !void {
-    const reader = self.file.reader();
+pub fn parse(archive: *Archive, allocator: Allocator) !void {
+    const reader = archive.file.reader();
 
     const magic = try reader.readBytesNoEof(SARMAG);
     if (!mem.eql(u8, &magic, ARMAG)) {
@@ -106,38 +114,31 @@ pub fn parse(self: *Archive, allocator: Allocator) !void {
         return error.NotArchive;
     }
 
-    self.header = try reader.readStruct(ar_hdr);
-    if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) {
-        log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.ar_fmag });
+    archive.header = try reader.readStruct(ar_hdr);
+    if (!mem.eql(u8, &archive.header.ar_fmag, ARFMAG)) {
+        log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, archive.header.ar_fmag });
         return error.NotArchive;
     }
 
-    try self.parseTableOfContents(allocator, reader);
+    try archive.parseTableOfContents(allocator, reader);
+    try archive.parseNameTable(allocator, reader);
 }
 
-fn parseName(allocator: Allocator, header: ar_hdr, reader: anytype) ![]u8 {
-    const name_or_length = try header.nameOrLength();
-    var name: []u8 = undefined;
-    switch (name_or_length) {
-        .Name => |n| {
-            name = try allocator.dupe(u8, n);
-        },
-        .Length => |len| {
-            var n = try allocator.alloc(u8, len);
-            defer allocator.free(n);
-            try reader.readNoEof(n);
-            const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
-            name = try allocator.dupe(u8, n[0..actual_len]);
+fn parseName(archive: *const Archive, header: ar_hdr) ![]const u8 {
+    const name_or_index = try header.nameOrIndex();
+    switch (name_or_index) {
+        .name => |name| return name,
+        .index => |index| {
+            const name = mem.sliceTo(archive.long_file_names[index..], 0x0a);
+            return mem.trimRight(u8, name, "/");
         },
     }
-    return name;
 }
 
-fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !void {
-    log.debug("parsing table of contents for archive file '{s}'", .{self.name});
+fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype) !void {
     // size field can have extra spaces padded in front as well as the end,
     // so we trim those first before parsing the ASCII value.
-    const size_trimmed = std.mem.trim(u8, &self.header.ar_size, " ");
+    const size_trimmed = mem.trim(u8, &archive.header.ar_size, " ");
     const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10);
 
     const num_symbols = try reader.readIntBig(u32);
@@ -157,7 +158,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !
 
     var i: usize = 0;
     while (i < sym_tab.len) {
-        const string = std.mem.sliceTo(sym_tab[i..], 0);
+        const string = mem.sliceTo(sym_tab[i..], 0);
         if (string.len == 0) {
             i += 1;
             continue;
@@ -165,7 +166,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !
         i += string.len;
         const name = try allocator.dupe(u8, string);
         errdefer allocator.free(name);
-        const gop = try self.toc.getOrPut(allocator, name);
+        const gop = try archive.toc.getOrPut(allocator, name);
         if (gop.found_existing) {
             allocator.free(name);
         } else {
@@ -175,31 +176,46 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !
     }
 }
 
+fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !void {
+    const header: ar_hdr = try reader.readStruct(ar_hdr);
+    if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
+        log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag });
+        return error.MalformedArchive;
+    }
+    if (!mem.eql(u8, header.ar_name[0..2], "//")) {
+        log.err("invalid archive. Long name table missing", .{});
+        return error.MalformedArchive;
+    }
+    const table_size = try header.size();
+    const long_file_names = try allocator.alloc(u8, table_size);
+    errdefer allocator.free(long_file_names);
+    try reader.readNoEof(long_file_names);
+    archive.long_file_names = long_file_names;
+}
+
 /// From a given file offset, starts reading for a file header.
 /// When found, parses the object file into an `Object` and returns it.
-pub fn parseObject(self: Archive, allocator: Allocator, file_offset: u32) !Object {
-    try self.file.seekTo(file_offset);
-    const reader = self.file.reader();
+pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Object {
+    try archive.file.seekTo(file_offset);
+    const reader = archive.file.reader();
     const header = try reader.readStruct(ar_hdr);
-    const current_offset = try self.file.getPos();
-    try self.file.seekTo(0);
+    const current_offset = try archive.file.getPos();
+    try archive.file.seekTo(0);
 
     if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
         log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag });
         return error.MalformedArchive;
     }
 
-    const object_name = try parseName(allocator, header, reader);
-    defer allocator.free(object_name);
-
+    const object_name = try archive.parseName(header);
     const name = name: {
         var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
-        const path = try std.os.realpath(self.name, &buffer);
+        const path = try std.os.realpath(archive.name, &buffer);
         break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name });
     };
     defer allocator.free(name);
 
-    const object_file = try std.fs.cwd().openFile(self.name, .{});
+    const object_file = try std.fs.cwd().openFile(archive.name, .{});
     errdefer object_file.close();
 
     try object_file.seekTo(current_offset);