Commit c07527abac

Igor Anić <igor.anic@gmail.com>
2023-12-11 22:00:49
tar: reorganize file, functions before tests
1 parent c76abe0
Changed files (1)
lib
lib/std/tar.zig
@@ -15,8 +15,7 @@
 /// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
 /// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
 ///
-//const std = @import("std.zig");
-const std = @import("std");
+const std = @import("std.zig");
 const assert = std.debug.assert;
 
 pub const Options = struct {
@@ -226,6 +225,276 @@ fn nullStr(str: []const u8) []const u8 {
     return str;
 }
 
+pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
+    return .{
+        .reader = reader,
+        .diagnostics = diagnostics,
+    };
+}
+
+fn TarReader(comptime ReaderType: type) type {
+    return struct {
+        reader: ReaderType,
+        diagnostics: ?*Options.Diagnostics,
+
+        // buffers for heeader and file attributes
+        header_buffer: [Header.SIZE]u8 = undefined,
+        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+
+        // bytes of padding to the end of the block
+        padding: usize = 0,
+        // current tar file
+        file: File = undefined,
+
+        pub const File = struct {
+            name: []const u8, // name of file, symlink or directory
+            link_name: []const u8, // target name of symlink
+            size: usize, // size of the file in bytes
+            mode: u32,
+            kind: Header.Kind,
+
+            reader: ReaderType,
+
+            // Writes file content to writer.
+            pub fn write(self: File, writer: anytype) !void {
+                var buffer: [4096]u8 = undefined;
+
+                var n: usize = 0;
+                while (n < self.size) {
+                    const buf = buffer[0..@min(buffer.len, self.size - n)];
+                    try self.reader.readNoEof(buf);
+                    try writer.writeAll(buf);
+                    n += buf.len;
+                }
+            }
+
+            // Skips file content. Advances reader.
+            pub fn skip(self: File) !void {
+                try self.reader.skipBytes(self.size, .{});
+            }
+        };
+
+        const Self = @This();
+
+        fn readHeader(self: *Self) !?Header {
+            if (self.padding > 0) {
+                try self.reader.skipBytes(self.padding, .{});
+            }
+            const n = try self.reader.readAll(&self.header_buffer);
+            if (n == 0) return null;
+            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
+            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
+            if (try header.checkChksum() == 0) return null;
+            return header;
+        }
+
+        inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
+            assert(buffer.len >= size);
+            const buf = buffer[0..size];
+            try self.reader.readNoEof(buf);
+            return nullStr(buf);
+        }
+
+        inline fn initFile(self: *Self) void {
+            self.file = File{
+                .name = self.file_name_buffer[0..0],
+                .link_name = self.link_name_buffer[0..0],
+                .size = 0,
+                .kind = .normal,
+                .mode = 0,
+                .reader = self.reader,
+            };
+        }
+
+        // Number of padding bytes in the last file block.
+        inline fn blockPadding(size: usize) usize {
+            const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
+            return block_rounded - size;
+        }
+
+        /// Iterates through the tar archive as if it is a series of files.
+        /// Internally, the tar format often uses entries (header with optional
+        /// content) to add meta data that describes the next file. These
+        /// entries should not normally be visible to the outside. As such, this
+        /// loop iterates through one or more entries until it collects a all
+        /// file attributes.
+        pub fn next(self: *Self) !?File {
+            self.initFile();
+
+            while (try self.readHeader()) |header| {
+                const kind = header.kind();
+                const size: usize = @intCast(try header.size());
+                self.padding = blockPadding(size);
+
+                switch (kind) {
+                    // File types to retrun upstream
+                    .directory, .normal, .symbolic_link => {
+                        self.file.kind = kind;
+                        self.file.mode = try header.mode();
+
+                        // set file attributes if not already set by prefix/extended headers
+                        if (self.file.size == 0) {
+                            self.file.size = size;
+                        }
+                        if (self.file.link_name.len == 0) {
+                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
+                        }
+                        if (self.file.name.len == 0) {
+                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
+                        }
+
+                        self.padding = blockPadding(self.file.size);
+                        return self.file;
+                    },
+                    // Prefix header types
+                    .gnu_long_name => {
+                        self.file.name = try self.readString(size, &self.file_name_buffer);
+                    },
+                    .gnu_long_link => {
+                        self.file.link_name = try self.readString(size, &self.link_name_buffer);
+                    },
+                    .extended_header => {
+                        // Use just attributes from last extended header.
+                        self.initFile();
+
+                        var rdr = paxReader(self.reader, size);
+                        while (try rdr.next()) |attr| {
+                            switch (attr.kind) {
+                                .path => {
+                                    self.file.name = try attr.value(&self.file_name_buffer);
+                                },
+                                .linkpath => {
+                                    self.file.link_name = try attr.value(&self.link_name_buffer);
+                                },
+                                .size => {
+                                    var buf: [64]u8 = undefined;
+                                    self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
+                                },
+                            }
+                        }
+                    },
+                    // Ignored header type
+                    .global_extended_header => {
+                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
+                    },
+                    // All other are unsupported header types
+                    else => {
+                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
+                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
+                            .file_name = try d.allocator.dupe(u8, header.name()),
+                            .file_type = kind,
+                        } });
+                    },
+                }
+            }
+            return null;
+        }
+    };
+}
+
+// Pax attributes reader.
+// Size is length of pax extended header in reader.
+fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
+    return PaxReader(@TypeOf(reader)){
+        .reader = reader,
+        .size = size,
+    };
+}
+
+const PaxAttributeKind = enum {
+    path,
+    linkpath,
+    size,
+};
+
+fn PaxReader(comptime ReaderType: type) type {
+    return struct {
+        size: usize, // cumulative size of all pax attributes
+        reader: ReaderType,
+        // scratch buffer used for reading attribute length and keyword
+        scratch: [128]u8 = undefined,
+
+        const Self = @This();
+
+        const Attribute = struct {
+            kind: PaxAttributeKind,
+            len: usize, // length of the attribute value
+            reader: ReaderType, // reader positioned at value start
+
+            // Copies pax attribute value into destination buffer.
+            // Must be called with destination buffer of size at least Attribute.len.
+            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
+                assert(self.len <= dst.len);
+                const buf = dst[0..self.len];
+                const n = try self.reader.readAll(buf);
+                if (n < self.len) return error.UnexpectedEndOfStream;
+                try validateAttributeEnding(self.reader);
+                if (hasNull(buf)) return error.PaxNullInValue;
+                return buf;
+            }
+        };
+
+        // Iterates over pax attributes. Returns known only known attributes.
+        // Caller has to call value in Attribute, to advance reader across value.
+        pub fn next(self: *Self) !?Attribute {
+            // Pax extended header consists of one or more attributes, each constructed as follows:
+            // "%d %s=%s\n", <length>, <keyword>, <value>
+            while (self.size > 0) {
+                const length_buf = try self.readUntil(' ');
+                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
+
+                const keyword = try self.readUntil('=');
+                if (hasNull(keyword)) return error.PaxNullInKeyword;
+
+                // calculate value_len
+                const value_start = length_buf.len + keyword.len + 2; // 2 separators
+                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
+                const value_len = length - value_start - 1; // \n separator at end
+                self.size -= length;
+
+                const kind: PaxAttributeKind = if (eql(keyword, "path"))
+                    .path
+                else if (eql(keyword, "linkpath"))
+                    .linkpath
+                else if (eql(keyword, "size"))
+                    .size
+                else {
+                    try self.reader.skipBytes(value_len, .{});
+                    try validateAttributeEnding(self.reader);
+                    continue;
+                };
+                return Attribute{
+                    .kind = kind,
+                    .len = value_len,
+                    .reader = self.reader,
+                };
+            }
+
+            return null;
+        }
+
+        inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
+            var fbs = std.io.fixedBufferStream(&self.scratch);
+            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
+            return fbs.getWritten();
+        }
+
+        inline fn eql(a: []const u8, b: []const u8) bool {
+            return std.mem.eql(u8, a, b);
+        }
+
+        inline fn hasNull(str: []const u8) bool {
+            return (std.mem.indexOfScalar(u8, str, 0)) != null;
+        }
+
+        // Checks that each record ends with new line.
+        inline fn validateAttributeEnding(reader: ReaderType) !void {
+            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
+        }
+    };
+}
+
 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
     switch (options.mode_mode) {
         .ignore => {},
@@ -639,170 +908,70 @@ test "tar run Go test cases" {
         .{
             // Size in gnu extended format, and name in pax attribute.
             .path = "writer-big-long.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "longname/" ** 15 ++ "16gig.txt",
-                    .size = 16 * 1024 * 1024 * 1024,
-                    .mode = 0o644,
-                    .truncated = true,
-                },
-            },
-        },
-    };
-
-    for (cases) |case| {
-        var fs_file = try test_dir.openFile(case.path, .{});
-        defer fs_file.close();
-
-        //var iter = iterator(fs_file.reader(), null);
-        var iter = tarReader(fs_file.reader(), null);
-        var i: usize = 0;
-        while (iter.next() catch |err| {
-            if (case.err) |e| {
-                try std.testing.expectEqual(e, err);
-                continue;
-            } else {
-                return err;
-            }
-        }) |actual| : (i += 1) {
-            const expected = case.files[i];
-            try std.testing.expectEqualStrings(expected.name, actual.name);
-            try std.testing.expectEqual(expected.size, actual.size);
-            try std.testing.expectEqual(expected.kind, actual.kind);
-            try std.testing.expectEqual(expected.mode, actual.mode);
-            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
-
-            if (case.chksums.len > i) {
-                var md5writer = Md5Writer{};
-                try actual.write(&md5writer);
-                const chksum = md5writer.chksum();
-                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
-            } else {
-                if (!expected.truncated) try actual.skip(); // skip file content
-            }
-        }
-        try std.testing.expectEqual(case.files.len, i);
-    }
-}
-
-// used in test to calculate file chksum
-const Md5Writer = struct {
-    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
-
-    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
-        self.h.update(buf);
-    }
-
-    pub fn writeByte(self: *Md5Writer, byte: u8) !void {
-        self.h.update(&[_]u8{byte});
-    }
-
-    pub fn chksum(self: *Md5Writer) [32]u8 {
-        var s = [_]u8{0} ** 16;
-        self.h.final(&s);
-        return std.fmt.bytesToHex(s, .lower);
-    }
-};
-
-fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
-    return PaxReader(@TypeOf(reader)){
-        .reader = reader,
-        .size = size,
-    };
-}
-
-const PaxAttributeKind = enum {
-    path,
-    linkpath,
-    size,
-};
-
-fn PaxReader(comptime ReaderType: type) type {
-    return struct {
-        size: usize, // cumulative size of all pax attributes
-        reader: ReaderType,
-        // scratch buffer used for reading attribute length and keyword
-        scratch: [128]u8 = undefined,
-
-        const Self = @This();
-
-        const Attribute = struct {
-            kind: PaxAttributeKind,
-            len: usize, // length of the attribute value
-            reader: ReaderType, // reader positioned at value start
-
-            // Copies pax attribute value into destination buffer.
-            // Must be called with destination buffer of size at least Attribute.len.
-            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
-                assert(self.len <= dst.len);
-                const buf = dst[0..self.len];
-                const n = try self.reader.readAll(buf);
-                if (n < self.len) return error.UnexpectedEndOfStream;
-                try validateAttributeEnding(self.reader);
-                if (hasNull(buf)) return error.PaxNullInValue;
-                return buf;
-            }
-        };
-
-        // Iterates over pax attributes. Returns known only known attributes.
-        // Caller has to call value in Attribute, to advance reader across value.
-        pub fn next(self: *Self) !?Attribute {
-            // Pax extended header consists of one or more attributes, each constructed as follows:
-            // "%d %s=%s\n", <length>, <keyword>, <value>
-            while (self.size > 0) {
-                const length_buf = try self.readUntil(' ');
-                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
-
-                const keyword = try self.readUntil('=');
-                if (hasNull(keyword)) return error.PaxNullInKeyword;
+            .files = &[_]Case.File{
+                .{
+                    .name = "longname/" ** 15 ++ "16gig.txt",
+                    .size = 16 * 1024 * 1024 * 1024,
+                    .mode = 0o644,
+                    .truncated = true,
+                },
+            },
+        },
+    };
 
-                // calculate value_len
-                const value_start = length_buf.len + keyword.len + 2; // 2 separators
-                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
-                const value_len = length - value_start - 1; // \n separator at end
-                self.size -= length;
+    for (cases) |case| {
+        var fs_file = try test_dir.openFile(case.path, .{});
+        defer fs_file.close();
 
-                const kind: PaxAttributeKind = if (eql(keyword, "path"))
-                    .path
-                else if (eql(keyword, "linkpath"))
-                    .linkpath
-                else if (eql(keyword, "size"))
-                    .size
-                else {
-                    try self.reader.skipBytes(value_len, .{});
-                    try validateAttributeEnding(self.reader);
-                    continue;
-                };
-                return Attribute{
-                    .kind = kind,
-                    .len = value_len,
-                    .reader = self.reader,
-                };
+        //var iter = iterator(fs_file.reader(), null);
+        var iter = tarReader(fs_file.reader(), null);
+        var i: usize = 0;
+        while (iter.next() catch |err| {
+            if (case.err) |e| {
+                try std.testing.expectEqual(e, err);
+                continue;
+            } else {
+                return err;
             }
+        }) |actual| : (i += 1) {
+            const expected = case.files[i];
+            try std.testing.expectEqualStrings(expected.name, actual.name);
+            try std.testing.expectEqual(expected.size, actual.size);
+            try std.testing.expectEqual(expected.kind, actual.kind);
+            try std.testing.expectEqual(expected.mode, actual.mode);
+            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
 
-            return null;
+            if (case.chksums.len > i) {
+                var md5writer = Md5Writer{};
+                try actual.write(&md5writer);
+                const chksum = md5writer.chksum();
+                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
+            } else {
+                if (!expected.truncated) try actual.skip(); // skip file content
+            }
         }
+        try std.testing.expectEqual(case.files.len, i);
+    }
+}
 
-        inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
-            var fbs = std.io.fixedBufferStream(&self.scratch);
-            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
-            return fbs.getWritten();
-        }
+// used in test to calculate file chksum
+const Md5Writer = struct {
+    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
 
-        inline fn eql(a: []const u8, b: []const u8) bool {
-            return std.mem.eql(u8, a, b);
-        }
+    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
+        self.h.update(buf);
+    }
 
-        inline fn hasNull(str: []const u8) bool {
-            return (std.mem.indexOfScalar(u8, str, 0)) != null;
-        }
+    pub fn writeByte(self: *Md5Writer, byte: u8) !void {
+        self.h.update(&[_]u8{byte});
+    }
 
-        // Checks that each record ends with new line.
-        inline fn validateAttributeEnding(reader: ReaderType) !void {
-            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
-        }
-    };
-}
+    pub fn chksum(self: *Md5Writer) [32]u8 {
+        var s = [_]u8{0} ** 16;
+        self.h.final(&s);
+        return std.fmt.bytesToHex(s, .lower);
+    }
+};
 
 test "tar PaxReader" {
     const Attr = struct {
@@ -927,171 +1096,3 @@ test "tar PaxReader" {
         try std.testing.expect(case.err == null);
     }
 }
-
-pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
-    return .{
-        .reader = reader,
-        .diagnostics = diagnostics,
-    };
-}
-
-fn TarReader(comptime ReaderType: type) type {
-    return struct {
-        reader: ReaderType,
-        diagnostics: ?*Options.Diagnostics,
-
-        // buffers for heeader and file attributes
-        header_buffer: [Header.SIZE]u8 = undefined,
-        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
-        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
-
-        // bytes of padding to the end of the block
-        padding: usize = 0,
-        // current tar file
-        file: File = undefined,
-
-        pub const File = struct {
-            name: []const u8, // name of file, symlink or directory
-            link_name: []const u8, // target name of symlink
-            size: usize, // size of the file in bytes
-            mode: u32,
-            kind: Header.Kind,
-
-            reader: ReaderType,
-
-            // Writes file content to writer.
-            pub fn write(self: File, writer: anytype) !void {
-                var buffer: [4096]u8 = undefined;
-
-                var n: usize = 0;
-                while (n < self.size) {
-                    const buf = buffer[0..@min(buffer.len, self.size - n)];
-                    try self.reader.readNoEof(buf);
-                    try writer.writeAll(buf);
-                    n += buf.len;
-                }
-            }
-
-            // Skips file content. Advances reader.
-            pub fn skip(self: File) !void {
-                try self.reader.skipBytes(self.size, .{});
-            }
-        };
-
-        const Self = @This();
-
-        fn readHeader(self: *Self) !?Header {
-            if (self.padding > 0) {
-                try self.reader.skipBytes(self.padding, .{});
-            }
-            const n = try self.reader.readAll(&self.header_buffer);
-            if (n == 0) return null;
-            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
-            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
-            if (try header.checkChksum() == 0) return null;
-            return header;
-        }
-
-        inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
-            assert(buffer.len >= size);
-            const buf = buffer[0..size];
-            try self.reader.readNoEof(buf);
-            return nullStr(buf);
-        }
-
-        inline fn initFile(self: *Self) void {
-            self.file = File{
-                .name = self.file_name_buffer[0..0],
-                .link_name = self.link_name_buffer[0..0],
-                .size = 0,
-                .kind = .normal,
-                .mode = 0,
-                .reader = self.reader,
-            };
-        }
-
-        // Number of padding bytes in the last file block.
-        inline fn blockPadding(size: usize) usize {
-            const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
-            return block_rounded - size;
-        }
-
-        // Externally, `next` iterates through the tar archive as if it is a
-        // series of files. Internally, the tar format often uses fake "files"
-        // to add meta data that describes the next file. These meta data
-        // "files" should not normally be visible to the outside. As such, this
-        // loop iterates through one or more "header files" until it finds a
-        // "normal file".
-        pub fn next(self: *Self) !?File {
-            self.initFile();
-
-            while (try self.readHeader()) |header| {
-                const kind = header.kind();
-                const size: usize = @intCast(try header.size());
-                self.padding = blockPadding(size);
-
-                switch (kind) {
-                    // File types to retrun upstream
-                    .directory, .normal, .symbolic_link => {
-                        self.file.kind = kind;
-                        self.file.mode = try header.mode();
-
-                        // set file attributes if not already set by prefix/extended headers
-                        if (self.file.size == 0) {
-                            self.file.size = size;
-                        }
-                        if (self.file.link_name.len == 0) {
-                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
-                        }
-                        if (self.file.name.len == 0) {
-                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
-                        }
-
-                        self.padding = blockPadding(self.file.size);
-                        return self.file;
-                    },
-                    // Prefix header types
-                    .gnu_long_name => {
-                        self.file.name = try self.readString(size, &self.file_name_buffer);
-                    },
-                    .gnu_long_link => {
-                        self.file.link_name = try self.readString(size, &self.link_name_buffer);
-                    },
-                    .extended_header => {
-                        // Use just attributes from last extended header.
-                        self.initFile();
-
-                        var rdr = paxReader(self.reader, size);
-                        while (try rdr.next()) |attr| {
-                            switch (attr.kind) {
-                                .path => {
-                                    self.file.name = try attr.value(&self.file_name_buffer);
-                                },
-                                .linkpath => {
-                                    self.file.link_name = try attr.value(&self.link_name_buffer);
-                                },
-                                .size => {
-                                    var buf: [64]u8 = undefined;
-                                    self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
-                                },
-                            }
-                        }
-                    },
-                    // Ignored header type
-                    .global_extended_header => {
-                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
-                    },
-                    // All other are unsupported header types
-                    else => {
-                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
-                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
-                            .file_name = try d.allocator.dupe(u8, header.name()),
-                            .file_type = kind,
-                        } });
-                    },
-                }
-            }
-            return null;
-        }
-    };
-}