zig/src/link/Wasm/Archive.zig at master

  1/// A list of long file names, delimited by a LF character (0x0a).
  2/// This is stored as a single slice of bytes, as the header-names
  3/// point to the character index of a file name, rather than the index
  4/// in the list.
  5/// Points into `file_contents`.
  6long_file_names: RelativeSlice,
  7
  8/// Parsed table of contents.
  9/// Each symbol name points to a list of all definition
 10/// sites within the current static archive.
 11toc: Toc,
 12
 13/// Key points into `LazyArchive` `file_contents`.
 14/// Value is allocated with gpa.
 15const Toc = std.StringArrayHashMapUnmanaged(std.ArrayList(u32));
 16
 17const ARMAG = std.elf.ARMAG;
 18const ARFMAG = std.elf.ARFMAG;
 19
 20const RelativeSlice = struct {
 21    off: u32,
 22    len: u32,
 23};
 24
 25const Header = extern struct {
 26    /// Member file name, sometimes / terminated.
 27    name: [16]u8,
 28    /// File date, decimal seconds since Epoch.
 29    date: [12]u8,
 30    /// User ID, in ASCII format.
 31    uid: [6]u8,
 32    /// Group ID, in ASCII format.
 33    gid: [6]u8,
 34    /// File mode, in ASCII octal.
 35    mode: [8]u8,
 36    /// File size, in ASCII decimal.
 37    size: [10]u8,
 38    /// Always contains ARFMAG.
 39    fmag: [2]u8,
 40
 41    const NameOrIndex = union(enum) {
 42        name: []const u8,
 43        index: u32,
 44    };
 45
 46    fn nameOrIndex(archive: Header) !NameOrIndex {
 47        const value = getValue(&archive.name);
 48        const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive;
 49        const len = value.len;
 50        if (slash_index == len - 1) {
 51            // Name stored directly
 52            return .{ .name = value };
 53        } else {
 54            // Name follows the header directly and its length is encoded in
 55            // the name field.
 56            const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
 57            return .{ .index = index };
 58        }
 59    }
 60
 61    fn parsedSize(archive: Header) !u32 {
 62        const value = getValue(&archive.size);
 63        return std.fmt.parseInt(u32, value, 10);
 64    }
 65
 66    fn getValue(raw: []const u8) []const u8 {
 67        return mem.trimEnd(u8, raw, &[_]u8{@as(u8, 0x20)});
 68    }
 69};
 70
 71pub fn deinit(archive: *Archive, gpa: Allocator) void {
 72    deinitToc(gpa, &archive.toc);
 73    archive.* = undefined;
 74}
 75
 76fn deinitToc(gpa: Allocator, toc: *Toc) void {
 77    for (toc.values()) |*value| value.deinit(gpa);
 78    toc.deinit(gpa);
 79}
 80
 81pub fn parse(gpa: Allocator, file_contents: []const u8) !Archive {
 82    var pos: usize = 0;
 83
 84    if (!mem.eql(u8, file_contents[0..ARMAG.len], ARMAG)) return error.BadArchiveMagic;
 85    pos += ARMAG.len;
 86
 87    const header = mem.bytesAsValue(Header, file_contents[pos..][0..@sizeOf(Header)]);
 88    if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter;
 89    pos += @sizeOf(Header);
 90
 91    // The size field can have extra spaces padded in front as well as
 92    // the end, so we trim those first before parsing the ASCII value.
 93    const size_trimmed = mem.trim(u8, &header.size, " ");
 94    const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10);
 95
 96    const num_symbols = mem.readInt(u32, file_contents[pos..][0..4], .big);
 97    pos += 4;
 98
 99    const symbol_positions_size = @sizeOf(u32) * num_symbols;
100    const symbol_positions_be = mem.bytesAsSlice(u32, file_contents[pos..][0..symbol_positions_size]);
101    pos += symbol_positions_size;
102
103    const sym_tab = file_contents[pos..][0 .. sym_tab_size - 4 - symbol_positions_size];
104    pos += sym_tab.len;
105
106    var toc: Toc = .empty;
107    errdefer deinitToc(gpa, &toc);
108
109    var sym_tab_pos: usize = 0;
110    for (0..num_symbols) |i| {
111        const name = mem.sliceTo(sym_tab[sym_tab_pos..], 0);
112        sym_tab_pos += name.len + 1;
113        if (name.len == 0) continue;
114
115        const gop = try toc.getOrPut(gpa, name);
116        if (!gop.found_existing) gop.value_ptr.* = .empty;
117        try gop.value_ptr.append(gpa, switch (native_endian) {
118            .big => symbol_positions_be[i],
119            .little => @byteSwap(symbol_positions_be[i]),
120        });
121    }
122
123    const long_file_names: RelativeSlice = s: {
124        const sub_header = mem.bytesAsValue(Header, file_contents[pos..][0..@sizeOf(Header)]);
125        pos += @sizeOf(Header);
126
127        if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter;
128        if (!mem.eql(u8, sub_header.name[0..2], "//")) return error.MissingTableName;
129        const table_size = try sub_header.parsedSize();
130
131        break :s .{
132            .off = @intCast(pos),
133            .len = table_size,
134        };
135    };
136
137    return .{
138        .toc = toc,
139        .long_file_names = long_file_names,
140    };
141}
142
143/// From a given file offset, starts reading for a file header.
144/// When found, parses the object file into an `Object` and returns it.
145pub fn parseObject(
146    archive: Archive,
147    wasm: *Wasm,
148    file_contents: []const u8,
149    object_offset: u32,
150    path: Path,
151    host_name: Wasm.OptionalString,
152    scratch_space: *Object.ScratchSpace,
153    must_link: bool,
154    gc_sections: bool,
155) !Object {
156    const header = mem.bytesAsValue(Header, file_contents[object_offset..][0..@sizeOf(Header)]);
157    if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter;
158
159    const name_or_index = try header.nameOrIndex();
160    const object_name = switch (name_or_index) {
161        .name => |name| name,
162        .index => |index| n: {
163            const long_file_names = file_contents[archive.long_file_names.off..][0..archive.long_file_names.len];
164            const name = mem.sliceTo(long_file_names[index..], 0x0a);
165            break :n mem.trimEnd(u8, name, "/");
166        },
167    };
168
169    const object_file_size = try header.parsedSize();
170    const contents = file_contents[object_offset + @sizeOf(Header) ..][0..object_file_size];
171
172    return Object.parse(wasm, contents, path, object_name, host_name, scratch_space, must_link, gc_sections);
173}
174
175const Archive = @This();
176
177const builtin = @import("builtin");
178const native_endian = builtin.cpu.arch.endian();
179
180const std = @import("std");
181const mem = std.mem;
182const Allocator = std.mem.Allocator;
183const Path = std.Build.Cache.Path;
184
185const Wasm = @import("../Wasm.zig");
186const Object = @import("Object.zig");