master
   1//! Tar archive is single ordinary file which can contain many files (or
   2//! directories, symlinks, ...). It's build by series of blocks each size of 512
   3//! bytes. First block of each entry is header which defines type, name, size
   4//! permissions and other attributes. Header is followed by series of blocks of
   5//! file content, if any that entry has content. Content is padded to the block
   6//! size, so next header always starts at block boundary.
   7//!
   8//! This simple format is extended by GNU and POSIX pax extensions to support
   9//! file names longer than 256 bytes and additional attributes.
  10//!
  11//! This is not comprehensive tar parser. Here we are only file types needed to
  12//! support Zig package manager; normal file, directory, symbolic link. And
  13//! subset of attributes: name, size, permissions.
  14//!
  15//! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
  16//! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
  17
  18const std = @import("std");
  19const assert = std.debug.assert;
  20const testing = std.testing;
  21
  22pub const Writer = @import("tar/Writer.zig");
  23
  24/// Provide this to receive detailed error messages.
  25/// When this is provided, some errors which would otherwise be returned
  26/// immediately will instead be added to this structure. The API user must check
  27/// the errors in diagnostics to know whether the operation succeeded or failed.
  28pub const Diagnostics = struct {
  29    allocator: std.mem.Allocator,
  30    errors: std.ArrayList(Error) = .empty,
  31
  32    entries: usize = 0,
  33    root_dir: []const u8 = "",
  34
  35    pub const Error = union(enum) {
  36        unable_to_create_sym_link: struct {
  37            code: anyerror,
  38            file_name: []const u8,
  39            link_name: []const u8,
  40        },
  41        unable_to_create_file: struct {
  42            code: anyerror,
  43            file_name: []const u8,
  44        },
  45        unsupported_file_type: struct {
  46            file_name: []const u8,
  47            file_type: Header.Kind,
  48        },
  49        components_outside_stripped_prefix: struct {
  50            file_name: []const u8,
  51        },
  52    };
  53
  54    fn findRoot(d: *Diagnostics, kind: FileKind, path: []const u8) !void {
  55        if (path.len == 0) return;
  56
  57        d.entries += 1;
  58        const root_dir = rootDir(path, kind);
  59        if (d.entries == 1) {
  60            d.root_dir = try d.allocator.dupe(u8, root_dir);
  61            return;
  62        }
  63        if (d.root_dir.len == 0 or std.mem.eql(u8, root_dir, d.root_dir))
  64            return;
  65        d.allocator.free(d.root_dir);
  66        d.root_dir = "";
  67    }
  68
  69    // Returns root dir of the path, assumes non empty path.
  70    fn rootDir(path: []const u8, kind: FileKind) []const u8 {
  71        const start_index: usize = if (path[0] == '/') 1 else 0;
  72        const end_index: usize = if (path[path.len - 1] == '/') path.len - 1 else path.len;
  73        const buf = path[start_index..end_index];
  74        if (std.mem.indexOfScalarPos(u8, buf, 0, '/')) |idx| {
  75            return buf[0..idx];
  76        }
  77
  78        return switch (kind) {
  79            .file => "",
  80            .sym_link => "",
  81            .directory => buf,
  82        };
  83    }
  84
  85    test rootDir {
  86        const expectEqualStrings = testing.expectEqualStrings;
  87        try expectEqualStrings("", rootDir("a", .file));
  88        try expectEqualStrings("a", rootDir("a", .directory));
  89        try expectEqualStrings("b", rootDir("b", .directory));
  90        try expectEqualStrings("c", rootDir("/c", .directory));
  91        try expectEqualStrings("d", rootDir("/d/", .directory));
  92        try expectEqualStrings("a", rootDir("a/b", .directory));
  93        try expectEqualStrings("a", rootDir("a/b", .file));
  94        try expectEqualStrings("a", rootDir("a/b/c", .directory));
  95    }
  96
  97    pub fn deinit(d: *Diagnostics) void {
  98        for (d.errors.items) |item| {
  99            switch (item) {
 100                .unable_to_create_sym_link => |info| {
 101                    d.allocator.free(info.file_name);
 102                    d.allocator.free(info.link_name);
 103                },
 104                .unable_to_create_file => |info| {
 105                    d.allocator.free(info.file_name);
 106                },
 107                .unsupported_file_type => |info| {
 108                    d.allocator.free(info.file_name);
 109                },
 110                .components_outside_stripped_prefix => |info| {
 111                    d.allocator.free(info.file_name);
 112                },
 113            }
 114        }
 115        d.errors.deinit(d.allocator);
 116        d.allocator.free(d.root_dir);
 117        d.* = undefined;
 118    }
 119};
 120
 121/// pipeToFileSystem options
 122pub const PipeOptions = struct {
 123    /// Number of directory levels to skip when extracting files.
 124    strip_components: u32 = 0,
 125    /// How to handle the "mode" property of files from within the tar file.
 126    mode_mode: ModeMode = .executable_bit_only,
 127    /// Prevents creation of empty directories.
 128    exclude_empty_directories: bool = false,
 129    /// Collects error messages during unpacking
 130    diagnostics: ?*Diagnostics = null,
 131
 132    pub const ModeMode = enum {
 133        /// The mode from the tar file is completely ignored. Files are created
 134        /// with the default mode when creating files.
 135        ignore,
 136        /// The mode from the tar file is inspected for the owner executable bit
 137        /// only. This bit is copied to the group and other executable bits.
 138        /// Other bits of the mode are left as the default when creating files.
 139        executable_bit_only,
 140    };
 141};
 142
 143const Header = struct {
 144    const SIZE = 512;
 145    const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
 146    const LINK_NAME_SIZE = 100;
 147
 148    bytes: *const [SIZE]u8,
 149
 150    const Kind = enum(u8) {
 151        normal_alias = 0,
 152        normal = '0',
 153        hard_link = '1',
 154        symbolic_link = '2',
 155        character_special = '3',
 156        block_special = '4',
 157        directory = '5',
 158        fifo = '6',
 159        contiguous = '7',
 160        global_extended_header = 'g',
 161        extended_header = 'x',
 162        // Types 'L' and 'K' are used by the GNU format for a meta file
 163        // used to store the path or link name for the next file.
 164        gnu_long_name = 'L',
 165        gnu_long_link = 'K',
 166        gnu_sparse = 'S',
 167        solaris_extended_header = 'X',
 168        _,
 169    };
 170
 171    /// Includes prefix concatenated, if any.
 172    /// TODO: check against "../" and other nefarious things
 173    pub fn fullName(header: Header, buffer: []u8) ![]const u8 {
 174        const n = name(header);
 175        const p = prefix(header);
 176        if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer;
 177        if (!is_ustar(header) or p.len == 0) {
 178            @memcpy(buffer[0..n.len], n);
 179            return buffer[0..n.len];
 180        }
 181        @memcpy(buffer[0..p.len], p);
 182        buffer[p.len] = '/';
 183        @memcpy(buffer[p.len + 1 ..][0..n.len], n);
 184        return buffer[0 .. p.len + 1 + n.len];
 185    }
 186
 187    /// When kind is symbolic_link linked-to name (target_path) is specified in
 188    /// the linkname field.
 189    pub fn linkName(header: Header, buffer: []u8) ![]const u8 {
 190        const link_name = header.str(157, 100);
 191        if (link_name.len == 0) {
 192            return buffer[0..0];
 193        }
 194        if (buffer.len < link_name.len) return error.TarInsufficientBuffer;
 195        const buf = buffer[0..link_name.len];
 196        @memcpy(buf, link_name);
 197        return buf;
 198    }
 199
 200    pub fn name(header: Header) []const u8 {
 201        return header.str(0, 100);
 202    }
 203
 204    pub fn mode(header: Header) !u32 {
 205        return @intCast(try header.octal(100, 8));
 206    }
 207
 208    pub fn size(header: Header) !u64 {
 209        const start = 124;
 210        const len = 12;
 211        const raw = header.bytes[start..][0..len];
 212        //  If the leading byte is 0xff (255), all the bytes of the field
 213        //  (including the leading byte) are concatenated in big-endian order,
 214        //  with the result being a negative number expressed in two’s
 215        //  complement form.
 216        if (raw[0] == 0xff) return error.TarNumericValueNegative;
 217        // If the leading byte is 0x80 (128), the non-leading bytes of the
 218        // field are concatenated in big-endian order.
 219        if (raw[0] == 0x80) {
 220            if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig;
 221            return std.mem.readInt(u64, raw[4..12], .big);
 222        }
 223        return try header.octal(start, len);
 224    }
 225
 226    pub fn chksum(header: Header) !u64 {
 227        return header.octal(148, 8);
 228    }
 229
 230    pub fn is_ustar(header: Header) bool {
 231        const magic = header.bytes[257..][0..6];
 232        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
 233    }
 234
 235    pub fn prefix(header: Header) []const u8 {
 236        return header.str(345, 155);
 237    }
 238
 239    pub fn kind(header: Header) Kind {
 240        const result: Kind = @enumFromInt(header.bytes[156]);
 241        if (result == .normal_alias) return .normal;
 242        return result;
 243    }
 244
 245    fn str(header: Header, start: usize, len: usize) []const u8 {
 246        return nullStr(header.bytes[start .. start + len]);
 247    }
 248
 249    fn octal(header: Header, start: usize, len: usize) !u64 {
 250        const raw = header.bytes[start..][0..len];
 251        // Zero-filled octal number in ASCII. Each numeric field of width w
 252        // contains w minus 1 digits, and a null
 253        const ltrimmed = std.mem.trimStart(u8, raw, "0 ");
 254        const rtrimmed = std.mem.trimEnd(u8, ltrimmed, " \x00");
 255        if (rtrimmed.len == 0) return 0;
 256        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
 257    }
 258
 259    const Chksums = struct {
 260        unsigned: u64,
 261        signed: i64,
 262    };
 263
 264    // Sum of all bytes in the header block. The chksum field is treated as if
 265    // it were filled with spaces (ASCII 32).
 266    fn computeChksum(header: Header) Chksums {
 267        var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
 268        for (header.bytes, 0..) |v, i| {
 269            const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
 270            cs.unsigned += b;
 271            cs.signed += @as(i8, @bitCast(b));
 272        }
 273        return cs;
 274    }
 275
 276    // Checks calculated chksum with value of chksum field.
 277    // Returns error or valid chksum value.
 278    // Zero value indicates empty block.
 279    pub fn checkChksum(header: Header) !u64 {
 280        const field = try header.chksum();
 281        const cs = header.computeChksum();
 282        if (field == 0 and cs.unsigned == 256) return 0;
 283        if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
 284        return field;
 285    }
 286};
 287
 288// Breaks string on first null character.
 289fn nullStr(str: []const u8) []const u8 {
 290    for (str, 0..) |c, i| {
 291        if (c == 0) return str[0..i];
 292    }
 293    return str;
 294}
 295
 296/// Type of the file returned by iterator `next` method.
 297pub const FileKind = enum {
 298    directory,
 299    sym_link,
 300    file,
 301};
 302
 303/// Iterator over entries in the tar file represented by reader.
 304pub const Iterator = struct {
 305    reader: *std.Io.Reader,
 306    diagnostics: ?*Diagnostics = null,
 307
 308    // buffers for heeader and file attributes
 309    header_buffer: [Header.SIZE]u8 = undefined,
 310    file_name_buffer: []u8,
 311    link_name_buffer: []u8,
 312
 313    // bytes of padding to the end of the block
 314    padding: usize = 0,
 315    // not consumed bytes of file from last next iteration
 316    unread_file_bytes: u64 = 0,
 317
 318    /// Options for iterator.
 319    /// Buffers should be provided by the caller.
 320    pub const Options = struct {
 321        /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
 322        file_name_buffer: []u8,
 323        /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
 324        link_name_buffer: []u8,
 325        /// Collects error messages during unpacking
 326        diagnostics: ?*Diagnostics = null,
 327    };
 328
 329    /// Iterates over files in tar archive.
 330    /// `next` returns each file in tar archive.
 331    pub fn init(reader: *std.Io.Reader, options: Options) Iterator {
 332        return .{
 333            .reader = reader,
 334            .diagnostics = options.diagnostics,
 335            .file_name_buffer = options.file_name_buffer,
 336            .link_name_buffer = options.link_name_buffer,
 337        };
 338    }
 339
 340    pub const File = struct {
 341        name: []const u8, // name of file, symlink or directory
 342        link_name: []const u8, // target name of symlink
 343        size: u64 = 0, // size of the file in bytes
 344        mode: u32 = 0,
 345        kind: FileKind = .file,
 346    };
 347
 348    fn readHeader(self: *Iterator) !?Header {
 349        if (self.padding > 0) {
 350            try self.reader.discardAll(self.padding);
 351        }
 352        const n = try self.reader.readSliceShort(&self.header_buffer);
 353        if (n == 0) return null;
 354        if (n < Header.SIZE) return error.UnexpectedEndOfStream;
 355        const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
 356        if (try header.checkChksum() == 0) return null;
 357        return header;
 358    }
 359
 360    fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 {
 361        if (size > buffer.len) return error.TarInsufficientBuffer;
 362        const buf = buffer[0..size];
 363        try self.reader.readSliceAll(buf);
 364        return nullStr(buf);
 365    }
 366
 367    fn newFile(self: *Iterator) File {
 368        return .{
 369            .name = self.file_name_buffer[0..0],
 370            .link_name = self.link_name_buffer[0..0],
 371        };
 372    }
 373
 374    // Number of padding bytes in the last file block.
 375    fn blockPadding(size: u64) usize {
 376        const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
 377        return @intCast(block_rounded - size);
 378    }
 379
 380    /// Iterates through the tar archive as if it is a series of files.
 381    /// Internally, the tar format often uses entries (header with optional
 382    /// content) to add meta data that describes the next file. These
 383    /// entries should not normally be visible to the outside. As such, this
 384    /// loop iterates through one or more entries until it collects a all
 385    /// file attributes.
 386    pub fn next(self: *Iterator) !?File {
 387        if (self.unread_file_bytes > 0) {
 388            // If file content was not consumed by caller
 389            try self.reader.discardAll64(self.unread_file_bytes);
 390            self.unread_file_bytes = 0;
 391        }
 392        var file: File = self.newFile();
 393
 394        while (try self.readHeader()) |header| {
 395            const kind = header.kind();
 396            const size: u64 = try header.size();
 397            self.padding = blockPadding(size);
 398
 399            switch (kind) {
 400                // File types to return upstream
 401                .directory, .normal, .symbolic_link => {
 402                    file.kind = switch (kind) {
 403                        .directory => .directory,
 404                        .normal => .file,
 405                        .symbolic_link => .sym_link,
 406                        else => unreachable,
 407                    };
 408                    file.mode = try header.mode();
 409
 410                    // set file attributes if not already set by prefix/extended headers
 411                    if (file.size == 0) {
 412                        file.size = size;
 413                    }
 414                    if (file.link_name.len == 0) {
 415                        file.link_name = try header.linkName(self.link_name_buffer);
 416                    }
 417                    if (file.name.len == 0) {
 418                        file.name = try header.fullName(self.file_name_buffer);
 419                    }
 420
 421                    self.padding = blockPadding(file.size);
 422                    self.unread_file_bytes = file.size;
 423                    return file;
 424                },
 425                // Prefix header types
 426                .gnu_long_name => {
 427                    file.name = try self.readString(@intCast(size), self.file_name_buffer);
 428                },
 429                .gnu_long_link => {
 430                    file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
 431                },
 432                .extended_header => {
 433                    // Use just attributes from last extended header.
 434                    file = self.newFile();
 435
 436                    var rdr: PaxIterator = .{
 437                        .reader = self.reader,
 438                        .size = @intCast(size),
 439                    };
 440                    while (try rdr.next()) |attr| {
 441                        switch (attr.kind) {
 442                            .path => {
 443                                file.name = try attr.value(self.file_name_buffer);
 444                            },
 445                            .linkpath => {
 446                                file.link_name = try attr.value(self.link_name_buffer);
 447                            },
 448                            .size => {
 449                                var buf: [pax_max_size_attr_len]u8 = undefined;
 450                                file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
 451                            },
 452                        }
 453                    }
 454                },
 455                // Ignored header type
 456                .global_extended_header => {
 457                    self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
 458                },
 459                // All other are unsupported header types
 460                else => {
 461                    const d = self.diagnostics orelse return error.TarUnsupportedHeader;
 462                    try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
 463                        .file_name = try d.allocator.dupe(u8, header.name()),
 464                        .file_type = kind,
 465                    } });
 466                    if (kind == .gnu_sparse) {
 467                        try self.skipGnuSparseExtendedHeaders(header);
 468                    }
 469                    self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
 470                },
 471            }
 472        }
 473        return null;
 474    }
 475
 476    pub fn streamRemaining(it: *Iterator, file: File, w: *std.Io.Writer) std.Io.Reader.StreamError!void {
 477        try it.reader.streamExact64(w, file.size);
 478        it.unread_file_bytes = 0;
 479    }
 480
 481    fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void {
 482        var is_extended = header.bytes[482] > 0;
 483        while (is_extended) {
 484            var buf: [Header.SIZE]u8 = undefined;
 485            try self.reader.readSliceAll(&buf);
 486            is_extended = buf[504] > 0;
 487        }
 488    }
 489};
 490
 491const PaxAttributeKind = enum {
 492    path,
 493    linkpath,
 494    size,
 495};
 496
 497// maxInt(u64) has 20 chars, base 10 in practice we got 24 chars
 498const pax_max_size_attr_len = 64;
 499
 500pub const PaxIterator = struct {
 501    size: usize, // cumulative size of all pax attributes
 502    reader: *std.Io.Reader,
 503
 504    const Self = @This();
 505
 506    const Attribute = struct {
 507        kind: PaxAttributeKind,
 508        len: usize, // length of the attribute value
 509        reader: *std.Io.Reader, // reader positioned at value start
 510
 511        // Copies pax attribute value into destination buffer.
 512        // Must be called with destination buffer of size at least Attribute.len.
 513        pub fn value(self: Attribute, dst: []u8) ![]const u8 {
 514            if (self.len > dst.len) return error.TarInsufficientBuffer;
 515            // assert(self.len <= dst.len);
 516            const buf = dst[0..self.len];
 517            const n = try self.reader.readSliceShort(buf);
 518            if (n < self.len) return error.UnexpectedEndOfStream;
 519            try validateAttributeEnding(self.reader);
 520            if (hasNull(buf)) return error.PaxNullInValue;
 521            return buf;
 522        }
 523    };
 524
 525    // Iterates over pax attributes. Returns known only known attributes.
 526    // Caller has to call value in Attribute, to advance reader across value.
 527    pub fn next(self: *Self) !?Attribute {
 528        // Pax extended header consists of one or more attributes, each constructed as follows:
 529        // "%d %s=%s\n", <length>, <keyword>, <value>
 530        while (self.size > 0) {
 531            const length_buf = try self.reader.takeSentinel(' ');
 532            const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
 533
 534            const keyword = try self.reader.takeSentinel('=');
 535            if (hasNull(keyword)) return error.PaxNullInKeyword;
 536
 537            // calculate value_len
 538            const value_start = length_buf.len + keyword.len + 2; // 2 separators
 539            if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
 540            const value_len = length - value_start - 1; // \n separator at end
 541            self.size -= length;
 542
 543            const kind: PaxAttributeKind = if (eql(keyword, "path"))
 544                .path
 545            else if (eql(keyword, "linkpath"))
 546                .linkpath
 547            else if (eql(keyword, "size"))
 548                .size
 549            else {
 550                try self.reader.discardAll(value_len);
 551                try validateAttributeEnding(self.reader);
 552                continue;
 553            };
 554            if (kind == .size and value_len > pax_max_size_attr_len) {
 555                return error.PaxSizeAttrOverflow;
 556            }
 557            return .{
 558                .kind = kind,
 559                .len = value_len,
 560                .reader = self.reader,
 561            };
 562        }
 563
 564        return null;
 565    }
 566
 567    fn eql(a: []const u8, b: []const u8) bool {
 568        return std.mem.eql(u8, a, b);
 569    }
 570
 571    fn hasNull(str: []const u8) bool {
 572        return (std.mem.indexOfScalar(u8, str, 0)) != null;
 573    }
 574
 575    // Checks that each record ends with new line.
 576    fn validateAttributeEnding(reader: *std.Io.Reader) !void {
 577        if (try reader.takeByte() != '\n') return error.PaxInvalidAttributeEnd;
 578    }
 579};
 580
 581/// Saves tar file content to the file systems.
 582pub fn pipeToFileSystem(dir: std.fs.Dir, reader: *std.Io.Reader, options: PipeOptions) !void {
 583    var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
 584    var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
 585    var file_contents_buffer: [1024]u8 = undefined;
 586    var it: Iterator = .init(reader, .{
 587        .file_name_buffer = &file_name_buffer,
 588        .link_name_buffer = &link_name_buffer,
 589        .diagnostics = options.diagnostics,
 590    });
 591
 592    while (try it.next()) |file| {
 593        const file_name = stripComponents(file.name, options.strip_components);
 594        if (file_name.len == 0 and file.kind != .directory) {
 595            const d = options.diagnostics orelse return error.TarComponentsOutsideStrippedPrefix;
 596            try d.errors.append(d.allocator, .{ .components_outside_stripped_prefix = .{
 597                .file_name = try d.allocator.dupe(u8, file.name),
 598            } });
 599            continue;
 600        }
 601        if (options.diagnostics) |d| {
 602            try d.findRoot(file.kind, file_name);
 603        }
 604
 605        switch (file.kind) {
 606            .directory => {
 607                if (file_name.len > 0 and !options.exclude_empty_directories) {
 608                    try dir.makePath(file_name);
 609                }
 610            },
 611            .file => {
 612                if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| {
 613                    defer fs_file.close();
 614                    var file_writer = fs_file.writer(&file_contents_buffer);
 615                    try it.streamRemaining(file, &file_writer.interface);
 616                    try file_writer.interface.flush();
 617                } else |err| {
 618                    const d = options.diagnostics orelse return err;
 619                    try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
 620                        .code = err,
 621                        .file_name = try d.allocator.dupe(u8, file_name),
 622                    } });
 623                }
 624            },
 625            .sym_link => {
 626                const link_name = file.link_name;
 627                createDirAndSymlink(dir, link_name, file_name) catch |err| {
 628                    const d = options.diagnostics orelse return error.UnableToCreateSymLink;
 629                    try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
 630                        .code = err,
 631                        .file_name = try d.allocator.dupe(u8, file_name),
 632                        .link_name = try d.allocator.dupe(u8, link_name),
 633                    } });
 634                };
 635            },
 636        }
 637    }
 638}
 639
 640fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8, mode: std.fs.File.Mode) !std.fs.File {
 641    const fs_file = dir.createFile(file_name, .{ .exclusive = true, .mode = mode }) catch |err| {
 642        if (err == error.FileNotFound) {
 643            if (std.fs.path.dirname(file_name)) |dir_name| {
 644                try dir.makePath(dir_name);
 645                return try dir.createFile(file_name, .{ .exclusive = true, .mode = mode });
 646            }
 647        }
 648        return err;
 649    };
 650    return fs_file;
 651}
 652
 653// Creates a symbolic link at path `file_name` which points to `link_name`.
 654fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
 655    dir.symLink(link_name, file_name, .{}) catch |err| {
 656        if (err == error.FileNotFound) {
 657            if (std.fs.path.dirname(file_name)) |dir_name| {
 658                try dir.makePath(dir_name);
 659                return try dir.symLink(link_name, file_name, .{});
 660            }
 661        }
 662        return err;
 663    };
 664}
 665
 666fn stripComponents(path: []const u8, count: u32) []const u8 {
 667    var i: usize = 0;
 668    var c = count;
 669    while (c > 0) : (c -= 1) {
 670        if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
 671            i = pos + 1;
 672        } else {
 673            i = path.len;
 674            break;
 675        }
 676    }
 677    return path[i..];
 678}
 679
 680test stripComponents {
 681    const expectEqualStrings = testing.expectEqualStrings;
 682    try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
 683    try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
 684    try expectEqualStrings("c", stripComponents("a/b/c", 2));
 685    try expectEqualStrings("", stripComponents("a/b/c", 3));
 686    try expectEqualStrings("", stripComponents("a/b/c", 4));
 687}
 688
 689test PaxIterator {
 690    const Attr = struct {
 691        kind: PaxAttributeKind,
 692        value: []const u8 = undefined,
 693        err: ?anyerror = null,
 694    };
 695    const cases = [_]struct {
 696        data: []const u8,
 697        attrs: []const Attr,
 698        err: ?anyerror = null,
 699    }{
 700        .{ // valid but unknown keys
 701            .data =
 702            \\30 mtime=1350244992.023960108
 703            \\6 k=1
 704            \\13 key1=val1
 705            \\10 a=name
 706            \\9 a=name
 707            \\
 708            ,
 709            .attrs = &[_]Attr{},
 710        },
 711        .{ // mix of known and unknown keys
 712            .data =
 713            \\6 k=1
 714            \\13 path=name
 715            \\17 linkpath=link
 716            \\13 key1=val1
 717            \\12 size=123
 718            \\13 key2=val2
 719            \\
 720            ,
 721            .attrs = &[_]Attr{
 722                .{ .kind = .path, .value = "name" },
 723                .{ .kind = .linkpath, .value = "link" },
 724                .{ .kind = .size, .value = "123" },
 725            },
 726        },
 727        .{ // too short size of the second key-value pair
 728            .data =
 729            \\13 path=name
 730            \\10 linkpath=value
 731            \\
 732            ,
 733            .attrs = &[_]Attr{
 734                .{ .kind = .path, .value = "name" },
 735            },
 736            .err = error.UnexpectedEndOfStream,
 737        },
 738        .{ // too long size of the second key-value pair
 739            .data =
 740            \\13 path=name
 741            \\6 k=1
 742            \\19 linkpath=value
 743            \\
 744            ,
 745            .attrs = &[_]Attr{
 746                .{ .kind = .path, .value = "name" },
 747            },
 748            .err = error.UnexpectedEndOfStream,
 749        },
 750
 751        .{ // too long size of the second key-value pair
 752            .data =
 753            \\13 path=name
 754            \\19 linkpath=value
 755            \\6 k=1
 756            \\
 757            ,
 758            .attrs = &[_]Attr{
 759                .{ .kind = .path, .value = "name" },
 760                .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
 761            },
 762        },
 763        .{ // null in keyword is not valid
 764            .data = "13 path=name\n" ++ "7 k\x00b=1\n",
 765            .attrs = &[_]Attr{
 766                .{ .kind = .path, .value = "name" },
 767            },
 768            .err = error.PaxNullInKeyword,
 769        },
 770        .{ // null in value is not valid
 771            .data = "23 path=name\x00with null\n",
 772            .attrs = &[_]Attr{
 773                .{ .kind = .path, .err = error.PaxNullInValue },
 774            },
 775        },
 776        .{ // 1000 characters path
 777            .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
 778            .attrs = &[_]Attr{
 779                .{ .kind = .path, .value = "0123456789" ** 100 },
 780            },
 781        },
 782    };
 783    var buffer: [1024]u8 = undefined;
 784
 785    outer: for (cases) |case| {
 786        var reader: std.Io.Reader = .fixed(case.data);
 787        var it: PaxIterator = .{
 788            .size = case.data.len,
 789            .reader = &reader,
 790        };
 791
 792        var i: usize = 0;
 793        while (it.next() catch |err| {
 794            if (case.err) |e| {
 795                try testing.expectEqual(e, err);
 796                continue;
 797            }
 798            return err;
 799        }) |attr| : (i += 1) {
 800            const exp = case.attrs[i];
 801            try testing.expectEqual(exp.kind, attr.kind);
 802            const value = attr.value(&buffer) catch |err| {
 803                if (exp.err) |e| {
 804                    try testing.expectEqual(e, err);
 805                    break :outer;
 806                }
 807                return err;
 808            };
 809            try testing.expectEqualStrings(exp.value, value);
 810        }
 811        try testing.expectEqual(case.attrs.len, i);
 812        try testing.expect(case.err == null);
 813    }
 814}
 815
 816test "header parse size" {
 817    const cases = [_]struct {
 818        in: []const u8,
 819        want: u64 = 0,
 820        err: ?anyerror = null,
 821    }{
 822        // Test base-256 (binary) encoded values.
 823        .{ .in = "", .want = 0 },
 824        .{ .in = "\x80", .want = 0 },
 825        .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 },
 826        .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 },
 827        .{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 },
 828        .{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig },
 829        .{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 },
 830        .{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig },
 831
 832        // // Test base-8 (octal) encoded values.
 833        .{ .in = "00000000227\x00", .want = 0o227 },
 834        .{ .in = "  000000227\x00", .want = 0o227 },
 835        .{ .in = "00000000228\x00", .err = error.TarHeader },
 836        .{ .in = "11111111111\x00", .want = 0o11111111111 },
 837    };
 838
 839    for (cases) |case| {
 840        var bytes = [_]u8{0} ** Header.SIZE;
 841        @memcpy(bytes[124 .. 124 + case.in.len], case.in);
 842        var header = Header{ .bytes = &bytes };
 843        if (case.err) |err| {
 844            try testing.expectError(err, header.size());
 845        } else {
 846            try testing.expectEqual(case.want, try header.size());
 847        }
 848    }
 849}
 850
 851test "header parse mode" {
 852    const cases = [_]struct {
 853        in: []const u8,
 854        want: u64 = 0,
 855        err: ?anyerror = null,
 856    }{
 857        .{ .in = "0000644\x00", .want = 0o644 },
 858        .{ .in = "0000777\x00", .want = 0o777 },
 859        .{ .in = "7777777\x00", .want = 0o7777777 },
 860        .{ .in = "7777778\x00", .err = error.TarHeader },
 861        .{ .in = "77777777", .want = 0o77777777 },
 862        .{ .in = "777777777777", .want = 0o77777777 },
 863    };
 864    for (cases) |case| {
 865        var bytes = [_]u8{0} ** Header.SIZE;
 866        @memcpy(bytes[100 .. 100 + case.in.len], case.in);
 867        var header = Header{ .bytes = &bytes };
 868        if (case.err) |err| {
 869            try testing.expectError(err, header.mode());
 870        } else {
 871            try testing.expectEqual(case.want, try header.mode());
 872        }
 873    }
 874}
 875
 876test "create file and symlink" {
 877    var root = testing.tmpDir(.{});
 878    defer root.cleanup();
 879
 880    var file = try createDirAndFile(root.dir, "file1", default_mode);
 881    file.close();
 882    file = try createDirAndFile(root.dir, "a/b/c/file2", default_mode);
 883    file.close();
 884
 885    createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| {
 886        // On Windows when developer mode is not enabled
 887        if (err == error.AccessDenied) return error.SkipZigTest;
 888        return err;
 889    };
 890    try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2");
 891
 892    // Danglink symlnik, file created later
 893    try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3");
 894    file = try createDirAndFile(root.dir, "g/h/i/file4", default_mode);
 895    file.close();
 896}
 897
 898test Iterator {
 899    // Example tar file is created from this tree structure:
 900    // $ tree example
 901    //    example
 902    //    ├── a
 903    //    │   └── file
 904    //    ├── b
 905    //    │   └── symlink -> ../a/file
 906    //    └── empty
 907    // $ cat example/a/file
 908    //   content
 909    // $ tar -cf example.tar example
 910    // $ tar -tvf example.tar
 911    //    example/
 912    //    example/b/
 913    //    example/b/symlink -> ../a/file
 914    //    example/a/
 915    //    example/a/file
 916    //    example/empty/
 917
 918    const data = @embedFile("tar/testdata/example.tar");
 919    var reader: std.Io.Reader = .fixed(data);
 920
 921    // User provided buffers to the iterator
 922    var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
 923    var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
 924    // Create iterator
 925    var it: Iterator = .init(&reader, .{
 926        .file_name_buffer = &file_name_buffer,
 927        .link_name_buffer = &link_name_buffer,
 928    });
 929    // Iterate over files in example.tar
 930    var file_no: usize = 0;
 931    while (try it.next()) |file| : (file_no += 1) {
 932        switch (file.kind) {
 933            .directory => {
 934                switch (file_no) {
 935                    0 => try testing.expectEqualStrings("example/", file.name),
 936                    1 => try testing.expectEqualStrings("example/b/", file.name),
 937                    3 => try testing.expectEqualStrings("example/a/", file.name),
 938                    5 => try testing.expectEqualStrings("example/empty/", file.name),
 939                    else => unreachable,
 940                }
 941            },
 942            .file => {
 943                try testing.expectEqualStrings("example/a/file", file.name);
 944                var buf: [16]u8 = undefined;
 945                var w: std.Io.Writer = .fixed(&buf);
 946                try it.streamRemaining(file, &w);
 947                try testing.expectEqualStrings("content\n", w.buffered());
 948            },
 949            .sym_link => {
 950                try testing.expectEqualStrings("example/b/symlink", file.name);
 951                try testing.expectEqualStrings("../a/file", file.link_name);
 952            },
 953        }
 954    }
 955}
 956
 957test pipeToFileSystem {
 958    // Example tar file is created from this tree structure:
 959    // $ tree example
 960    //    example
 961    //    ├── a
 962    //    │   └── file
 963    //    ├── b
 964    //    │   └── symlink -> ../a/file
 965    //    └── empty
 966    // $ cat example/a/file
 967    //   content
 968    // $ tar -cf example.tar example
 969    // $ tar -tvf example.tar
 970    //    example/
 971    //    example/b/
 972    //    example/b/symlink -> ../a/file
 973    //    example/a/
 974    //    example/a/file
 975    //    example/empty/
 976
 977    const data = @embedFile("tar/testdata/example.tar");
 978    var reader: std.Io.Reader = .fixed(data);
 979
 980    var tmp = testing.tmpDir(.{ .follow_symlinks = false });
 981    defer tmp.cleanup();
 982    const dir = tmp.dir;
 983
 984    // Save tar from reader to the file system `dir`
 985    pipeToFileSystem(dir, &reader, .{
 986        .mode_mode = .ignore,
 987        .strip_components = 1,
 988        .exclude_empty_directories = true,
 989    }) catch |err| {
 990        // Skip on platform which don't support symlinks
 991        if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
 992        return err;
 993    };
 994
 995    try testing.expectError(error.FileNotFound, dir.statFile("empty"));
 996    try testing.expect((try dir.statFile("a/file")).kind == .file);
 997    try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
 998
 999    var buf: [32]u8 = undefined;
1000    try testing.expectEqualSlices(
1001        u8,
1002        "../a/file",
1003        normalizePath(try dir.readLink("b/symlink", &buf)),
1004    );
1005}
1006
1007test "pipeToFileSystem root_dir" {
1008    const data = @embedFile("tar/testdata/example.tar");
1009    var reader: std.Io.Reader = .fixed(data);
1010
1011    // with strip_components = 1
1012    {
1013        var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1014        defer tmp.cleanup();
1015        var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1016        defer diagnostics.deinit();
1017
1018        pipeToFileSystem(tmp.dir, &reader, .{
1019            .strip_components = 1,
1020            .diagnostics = &diagnostics,
1021        }) catch |err| {
1022            // Skip on platform which don't support symlinks
1023            if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1024            return err;
1025        };
1026
1027        // there is no root_dir
1028        try testing.expectEqual(0, diagnostics.root_dir.len);
1029        try testing.expectEqual(5, diagnostics.entries);
1030    }
1031
1032    // with strip_components = 0
1033    {
1034        reader = .fixed(data);
1035        var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1036        defer tmp.cleanup();
1037        var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1038        defer diagnostics.deinit();
1039
1040        pipeToFileSystem(tmp.dir, &reader, .{
1041            .strip_components = 0,
1042            .diagnostics = &diagnostics,
1043        }) catch |err| {
1044            // Skip on platform which don't support symlinks
1045            if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1046            return err;
1047        };
1048
1049        // root_dir found
1050        try testing.expectEqualStrings("example", diagnostics.root_dir);
1051        try testing.expectEqual(6, diagnostics.entries);
1052    }
1053}
1054
1055test "findRoot with single file archive" {
1056    const data = @embedFile("tar/testdata/22752.tar");
1057    var reader: std.Io.Reader = .fixed(data);
1058
1059    var tmp = testing.tmpDir(.{});
1060    defer tmp.cleanup();
1061
1062    var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1063    defer diagnostics.deinit();
1064    try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics });
1065
1066    try testing.expectEqualStrings("", diagnostics.root_dir);
1067}
1068
1069test "findRoot without explicit root dir" {
1070    const data = @embedFile("tar/testdata/19820.tar");
1071    var reader: std.Io.Reader = .fixed(data);
1072
1073    var tmp = testing.tmpDir(.{});
1074    defer tmp.cleanup();
1075
1076    var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1077    defer diagnostics.deinit();
1078    try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics });
1079
1080    try testing.expectEqualStrings("root", diagnostics.root_dir);
1081}
1082
1083test "pipeToFileSystem strip_components" {
1084    const data = @embedFile("tar/testdata/example.tar");
1085    var reader: std.Io.Reader = .fixed(data);
1086
1087    var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1088    defer tmp.cleanup();
1089    var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1090    defer diagnostics.deinit();
1091
1092    pipeToFileSystem(tmp.dir, &reader, .{
1093        .strip_components = 3,
1094        .diagnostics = &diagnostics,
1095    }) catch |err| {
1096        // Skip on platform which don't support symlinks
1097        if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1098        return err;
1099    };
1100
1101    try testing.expectEqual(2, diagnostics.errors.items.len);
1102    try testing.expectEqualStrings("example/b/symlink", diagnostics.errors.items[0].components_outside_stripped_prefix.file_name);
1103    try testing.expectEqualStrings("example/a/file", diagnostics.errors.items[1].components_outside_stripped_prefix.file_name);
1104}
1105
1106fn normalizePath(bytes: []u8) []u8 {
1107    const canonical_sep = std.fs.path.sep_posix;
1108    if (std.fs.path.sep == canonical_sep) return bytes;
1109    std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep);
1110    return bytes;
1111}
1112
1113const default_mode = std.fs.File.default_mode;
1114
1115// File system mode based on tar header mode and mode_mode options.
1116fn fileMode(mode: u32, options: PipeOptions) std.fs.File.Mode {
1117    if (!std.fs.has_executable_bit or options.mode_mode == .ignore)
1118        return default_mode;
1119
1120    const S = std.posix.S;
1121
1122    // The mode from the tar file is inspected for the owner executable bit.
1123    if (mode & S.IXUSR == 0)
1124        return default_mode;
1125
1126    // This bit is copied to the group and other executable bits.
1127    // Other bits of the mode are left as the default when creating files.
1128    return default_mode | S.IXUSR | S.IXGRP | S.IXOTH;
1129}
1130
1131test fileMode {
1132    if (!std.fs.has_executable_bit) return error.SkipZigTest;
1133    try testing.expectEqual(default_mode, fileMode(0o744, PipeOptions{ .mode_mode = .ignore }));
1134    try testing.expectEqual(0o777, fileMode(0o744, PipeOptions{}));
1135    try testing.expectEqual(0o666, fileMode(0o644, PipeOptions{}));
1136    try testing.expectEqual(0o666, fileMode(0o655, PipeOptions{}));
1137}
1138
1139test "executable bit" {
1140    if (!std.fs.has_executable_bit) return error.SkipZigTest;
1141
1142    const S = std.posix.S;
1143    const data = @embedFile("tar/testdata/example.tar");
1144
1145    for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| {
1146        var reader: std.Io.Reader = .fixed(data);
1147
1148        var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1149        //defer tmp.cleanup();
1150
1151        pipeToFileSystem(tmp.dir, &reader, .{
1152            .strip_components = 1,
1153            .exclude_empty_directories = true,
1154            .mode_mode = opt,
1155        }) catch |err| {
1156            // Skip on platform which don't support symlinks
1157            if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1158            return err;
1159        };
1160
1161        const fs = try tmp.dir.statFile("a/file");
1162        try testing.expect(fs.kind == .file);
1163
1164        if (opt == .executable_bit_only) {
1165            // Executable bit is set for user, group and others
1166            try testing.expect(fs.mode & S.IXUSR > 0);
1167            try testing.expect(fs.mode & S.IXGRP > 0);
1168            try testing.expect(fs.mode & S.IXOTH > 0);
1169        }
1170        if (opt == .ignore) {
1171            try testing.expect(fs.mode & S.IXUSR == 0);
1172            try testing.expect(fs.mode & S.IXGRP == 0);
1173            try testing.expect(fs.mode & S.IXOTH == 0);
1174        }
1175    }
1176}
1177
1178test {
1179    _ = @import("tar/test.zig");
1180    _ = Writer;
1181    _ = Diagnostics;
1182}