Commit c07527abac
Changed files (1)
lib
std
lib/std/tar.zig
@@ -15,8 +15,7 @@
/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
///
-//const std = @import("std.zig");
-const std = @import("std");
+const std = @import("std.zig");
const assert = std.debug.assert;
pub const Options = struct {
@@ -226,6 +225,276 @@ fn nullStr(str: []const u8) []const u8 {
return str;
}
+pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
+ return .{
+ .reader = reader,
+ .diagnostics = diagnostics,
+ };
+}
+
+fn TarReader(comptime ReaderType: type) type {
+ return struct {
+ reader: ReaderType,
+ diagnostics: ?*Options.Diagnostics,
+
+ // buffers for heeader and file attributes
+ header_buffer: [Header.SIZE]u8 = undefined,
+ file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+ link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+
+ // bytes of padding to the end of the block
+ padding: usize = 0,
+ // current tar file
+ file: File = undefined,
+
+ pub const File = struct {
+ name: []const u8, // name of file, symlink or directory
+ link_name: []const u8, // target name of symlink
+ size: usize, // size of the file in bytes
+ mode: u32,
+ kind: Header.Kind,
+
+ reader: ReaderType,
+
+ // Writes file content to writer.
+ pub fn write(self: File, writer: anytype) !void {
+ var buffer: [4096]u8 = undefined;
+
+ var n: usize = 0;
+ while (n < self.size) {
+ const buf = buffer[0..@min(buffer.len, self.size - n)];
+ try self.reader.readNoEof(buf);
+ try writer.writeAll(buf);
+ n += buf.len;
+ }
+ }
+
+ // Skips file content. Advances reader.
+ pub fn skip(self: File) !void {
+ try self.reader.skipBytes(self.size, .{});
+ }
+ };
+
+ const Self = @This();
+
+ fn readHeader(self: *Self) !?Header {
+ if (self.padding > 0) {
+ try self.reader.skipBytes(self.padding, .{});
+ }
+ const n = try self.reader.readAll(&self.header_buffer);
+ if (n == 0) return null;
+ if (n < Header.SIZE) return error.UnexpectedEndOfStream;
+ const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
+ if (try header.checkChksum() == 0) return null;
+ return header;
+ }
+
+ inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
+ assert(buffer.len >= size);
+ const buf = buffer[0..size];
+ try self.reader.readNoEof(buf);
+ return nullStr(buf);
+ }
+
+ inline fn initFile(self: *Self) void {
+ self.file = File{
+ .name = self.file_name_buffer[0..0],
+ .link_name = self.link_name_buffer[0..0],
+ .size = 0,
+ .kind = .normal,
+ .mode = 0,
+ .reader = self.reader,
+ };
+ }
+
+ // Number of padding bytes in the last file block.
+ inline fn blockPadding(size: usize) usize {
+ const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
+ return block_rounded - size;
+ }
+
+ /// Iterates through the tar archive as if it is a series of files.
+ /// Internally, the tar format often uses entries (header with optional
+ /// content) to add meta data that describes the next file. These
+ /// entries should not normally be visible to the outside. As such, this
+ /// loop iterates through one or more entries until it collects a all
+ /// file attributes.
+ pub fn next(self: *Self) !?File {
+ self.initFile();
+
+ while (try self.readHeader()) |header| {
+ const kind = header.kind();
+ const size: usize = @intCast(try header.size());
+ self.padding = blockPadding(size);
+
+ switch (kind) {
+ // File types to retrun upstream
+ .directory, .normal, .symbolic_link => {
+ self.file.kind = kind;
+ self.file.mode = try header.mode();
+
+ // set file attributes if not already set by prefix/extended headers
+ if (self.file.size == 0) {
+ self.file.size = size;
+ }
+ if (self.file.link_name.len == 0) {
+ self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
+ }
+ if (self.file.name.len == 0) {
+ self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
+ }
+
+ self.padding = blockPadding(self.file.size);
+ return self.file;
+ },
+ // Prefix header types
+ .gnu_long_name => {
+ self.file.name = try self.readString(size, &self.file_name_buffer);
+ },
+ .gnu_long_link => {
+ self.file.link_name = try self.readString(size, &self.link_name_buffer);
+ },
+ .extended_header => {
+ // Use just attributes from last extended header.
+ self.initFile();
+
+ var rdr = paxReader(self.reader, size);
+ while (try rdr.next()) |attr| {
+ switch (attr.kind) {
+ .path => {
+ self.file.name = try attr.value(&self.file_name_buffer);
+ },
+ .linkpath => {
+ self.file.link_name = try attr.value(&self.link_name_buffer);
+ },
+ .size => {
+ var buf: [64]u8 = undefined;
+ self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
+ },
+ }
+ }
+ },
+ // Ignored header type
+ .global_extended_header => {
+ self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
+ },
+ // All other are unsupported header types
+ else => {
+ const d = self.diagnostics orelse return error.TarUnsupportedHeader;
+ try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
+ .file_name = try d.allocator.dupe(u8, header.name()),
+ .file_type = kind,
+ } });
+ },
+ }
+ }
+ return null;
+ }
+ };
+}
+
+// Pax attributes reader.
+// Size is length of pax extended header in reader.
+fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
+ return PaxReader(@TypeOf(reader)){
+ .reader = reader,
+ .size = size,
+ };
+}
+
+const PaxAttributeKind = enum {
+ path,
+ linkpath,
+ size,
+};
+
+fn PaxReader(comptime ReaderType: type) type {
+ return struct {
+ size: usize, // cumulative size of all pax attributes
+ reader: ReaderType,
+ // scratch buffer used for reading attribute length and keyword
+ scratch: [128]u8 = undefined,
+
+ const Self = @This();
+
+ const Attribute = struct {
+ kind: PaxAttributeKind,
+ len: usize, // length of the attribute value
+ reader: ReaderType, // reader positioned at value start
+
+ // Copies pax attribute value into destination buffer.
+ // Must be called with destination buffer of size at least Attribute.len.
+ pub fn value(self: Attribute, dst: []u8) ![]const u8 {
+ assert(self.len <= dst.len);
+ const buf = dst[0..self.len];
+ const n = try self.reader.readAll(buf);
+ if (n < self.len) return error.UnexpectedEndOfStream;
+ try validateAttributeEnding(self.reader);
+ if (hasNull(buf)) return error.PaxNullInValue;
+ return buf;
+ }
+ };
+
+ // Iterates over pax attributes. Returns known only known attributes.
+ // Caller has to call value in Attribute, to advance reader across value.
+ pub fn next(self: *Self) !?Attribute {
+ // Pax extended header consists of one or more attributes, each constructed as follows:
+ // "%d %s=%s\n", <length>, <keyword>, <value>
+ while (self.size > 0) {
+ const length_buf = try self.readUntil(' ');
+ const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
+
+ const keyword = try self.readUntil('=');
+ if (hasNull(keyword)) return error.PaxNullInKeyword;
+
+ // calculate value_len
+ const value_start = length_buf.len + keyword.len + 2; // 2 separators
+ if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
+ const value_len = length - value_start - 1; // \n separator at end
+ self.size -= length;
+
+ const kind: PaxAttributeKind = if (eql(keyword, "path"))
+ .path
+ else if (eql(keyword, "linkpath"))
+ .linkpath
+ else if (eql(keyword, "size"))
+ .size
+ else {
+ try self.reader.skipBytes(value_len, .{});
+ try validateAttributeEnding(self.reader);
+ continue;
+ };
+ return Attribute{
+ .kind = kind,
+ .len = value_len,
+ .reader = self.reader,
+ };
+ }
+
+ return null;
+ }
+
+ inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
+ var fbs = std.io.fixedBufferStream(&self.scratch);
+ try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
+ return fbs.getWritten();
+ }
+
+ inline fn eql(a: []const u8, b: []const u8) bool {
+ return std.mem.eql(u8, a, b);
+ }
+
+ inline fn hasNull(str: []const u8) bool {
+ return (std.mem.indexOfScalar(u8, str, 0)) != null;
+ }
+
+ // Checks that each record ends with new line.
+ inline fn validateAttributeEnding(reader: ReaderType) !void {
+ if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
+ }
+ };
+}
+
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
switch (options.mode_mode) {
.ignore => {},
@@ -639,170 +908,70 @@ test "tar run Go test cases" {
.{
// Size in gnu extended format, and name in pax attribute.
.path = "writer-big-long.tar",
- .files = &[_]Case.File{
- .{
- .name = "longname/" ** 15 ++ "16gig.txt",
- .size = 16 * 1024 * 1024 * 1024,
- .mode = 0o644,
- .truncated = true,
- },
- },
- },
- };
-
- for (cases) |case| {
- var fs_file = try test_dir.openFile(case.path, .{});
- defer fs_file.close();
-
- //var iter = iterator(fs_file.reader(), null);
- var iter = tarReader(fs_file.reader(), null);
- var i: usize = 0;
- while (iter.next() catch |err| {
- if (case.err) |e| {
- try std.testing.expectEqual(e, err);
- continue;
- } else {
- return err;
- }
- }) |actual| : (i += 1) {
- const expected = case.files[i];
- try std.testing.expectEqualStrings(expected.name, actual.name);
- try std.testing.expectEqual(expected.size, actual.size);
- try std.testing.expectEqual(expected.kind, actual.kind);
- try std.testing.expectEqual(expected.mode, actual.mode);
- try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
-
- if (case.chksums.len > i) {
- var md5writer = Md5Writer{};
- try actual.write(&md5writer);
- const chksum = md5writer.chksum();
- try std.testing.expectEqualStrings(case.chksums[i], &chksum);
- } else {
- if (!expected.truncated) try actual.skip(); // skip file content
- }
- }
- try std.testing.expectEqual(case.files.len, i);
- }
-}
-
-// used in test to calculate file chksum
-const Md5Writer = struct {
- h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
-
- pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
- self.h.update(buf);
- }
-
- pub fn writeByte(self: *Md5Writer, byte: u8) !void {
- self.h.update(&[_]u8{byte});
- }
-
- pub fn chksum(self: *Md5Writer) [32]u8 {
- var s = [_]u8{0} ** 16;
- self.h.final(&s);
- return std.fmt.bytesToHex(s, .lower);
- }
-};
-
-fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
- return PaxReader(@TypeOf(reader)){
- .reader = reader,
- .size = size,
- };
-}
-
-const PaxAttributeKind = enum {
- path,
- linkpath,
- size,
-};
-
-fn PaxReader(comptime ReaderType: type) type {
- return struct {
- size: usize, // cumulative size of all pax attributes
- reader: ReaderType,
- // scratch buffer used for reading attribute length and keyword
- scratch: [128]u8 = undefined,
-
- const Self = @This();
-
- const Attribute = struct {
- kind: PaxAttributeKind,
- len: usize, // length of the attribute value
- reader: ReaderType, // reader positioned at value start
-
- // Copies pax attribute value into destination buffer.
- // Must be called with destination buffer of size at least Attribute.len.
- pub fn value(self: Attribute, dst: []u8) ![]const u8 {
- assert(self.len <= dst.len);
- const buf = dst[0..self.len];
- const n = try self.reader.readAll(buf);
- if (n < self.len) return error.UnexpectedEndOfStream;
- try validateAttributeEnding(self.reader);
- if (hasNull(buf)) return error.PaxNullInValue;
- return buf;
- }
- };
-
- // Iterates over pax attributes. Returns known only known attributes.
- // Caller has to call value in Attribute, to advance reader across value.
- pub fn next(self: *Self) !?Attribute {
- // Pax extended header consists of one or more attributes, each constructed as follows:
- // "%d %s=%s\n", <length>, <keyword>, <value>
- while (self.size > 0) {
- const length_buf = try self.readUntil(' ');
- const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
-
- const keyword = try self.readUntil('=');
- if (hasNull(keyword)) return error.PaxNullInKeyword;
+ .files = &[_]Case.File{
+ .{
+ .name = "longname/" ** 15 ++ "16gig.txt",
+ .size = 16 * 1024 * 1024 * 1024,
+ .mode = 0o644,
+ .truncated = true,
+ },
+ },
+ },
+ };
- // calculate value_len
- const value_start = length_buf.len + keyword.len + 2; // 2 separators
- if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
- const value_len = length - value_start - 1; // \n separator at end
- self.size -= length;
+ for (cases) |case| {
+ var fs_file = try test_dir.openFile(case.path, .{});
+ defer fs_file.close();
- const kind: PaxAttributeKind = if (eql(keyword, "path"))
- .path
- else if (eql(keyword, "linkpath"))
- .linkpath
- else if (eql(keyword, "size"))
- .size
- else {
- try self.reader.skipBytes(value_len, .{});
- try validateAttributeEnding(self.reader);
- continue;
- };
- return Attribute{
- .kind = kind,
- .len = value_len,
- .reader = self.reader,
- };
+ //var iter = iterator(fs_file.reader(), null);
+ var iter = tarReader(fs_file.reader(), null);
+ var i: usize = 0;
+ while (iter.next() catch |err| {
+ if (case.err) |e| {
+ try std.testing.expectEqual(e, err);
+ continue;
+ } else {
+ return err;
}
+ }) |actual| : (i += 1) {
+ const expected = case.files[i];
+ try std.testing.expectEqualStrings(expected.name, actual.name);
+ try std.testing.expectEqual(expected.size, actual.size);
+ try std.testing.expectEqual(expected.kind, actual.kind);
+ try std.testing.expectEqual(expected.mode, actual.mode);
+ try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
- return null;
+ if (case.chksums.len > i) {
+ var md5writer = Md5Writer{};
+ try actual.write(&md5writer);
+ const chksum = md5writer.chksum();
+ try std.testing.expectEqualStrings(case.chksums[i], &chksum);
+ } else {
+ if (!expected.truncated) try actual.skip(); // skip file content
+ }
}
+ try std.testing.expectEqual(case.files.len, i);
+ }
+}
- inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
- var fbs = std.io.fixedBufferStream(&self.scratch);
- try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
- return fbs.getWritten();
- }
+// used in test to calculate file chksum
+const Md5Writer = struct {
+ h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
- inline fn eql(a: []const u8, b: []const u8) bool {
- return std.mem.eql(u8, a, b);
- }
+ pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
+ self.h.update(buf);
+ }
- inline fn hasNull(str: []const u8) bool {
- return (std.mem.indexOfScalar(u8, str, 0)) != null;
- }
+ pub fn writeByte(self: *Md5Writer, byte: u8) !void {
+ self.h.update(&[_]u8{byte});
+ }
- // Checks that each record ends with new line.
- inline fn validateAttributeEnding(reader: ReaderType) !void {
- if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
- }
- };
-}
+ pub fn chksum(self: *Md5Writer) [32]u8 {
+ var s = [_]u8{0} ** 16;
+ self.h.final(&s);
+ return std.fmt.bytesToHex(s, .lower);
+ }
+};
test "tar PaxReader" {
const Attr = struct {
@@ -927,171 +1096,3 @@ test "tar PaxReader" {
try std.testing.expect(case.err == null);
}
}
-
-pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
- return .{
- .reader = reader,
- .diagnostics = diagnostics,
- };
-}
-
-fn TarReader(comptime ReaderType: type) type {
- return struct {
- reader: ReaderType,
- diagnostics: ?*Options.Diagnostics,
-
- // buffers for heeader and file attributes
- header_buffer: [Header.SIZE]u8 = undefined,
- file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
- link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
-
- // bytes of padding to the end of the block
- padding: usize = 0,
- // current tar file
- file: File = undefined,
-
- pub const File = struct {
- name: []const u8, // name of file, symlink or directory
- link_name: []const u8, // target name of symlink
- size: usize, // size of the file in bytes
- mode: u32,
- kind: Header.Kind,
-
- reader: ReaderType,
-
- // Writes file content to writer.
- pub fn write(self: File, writer: anytype) !void {
- var buffer: [4096]u8 = undefined;
-
- var n: usize = 0;
- while (n < self.size) {
- const buf = buffer[0..@min(buffer.len, self.size - n)];
- try self.reader.readNoEof(buf);
- try writer.writeAll(buf);
- n += buf.len;
- }
- }
-
- // Skips file content. Advances reader.
- pub fn skip(self: File) !void {
- try self.reader.skipBytes(self.size, .{});
- }
- };
-
- const Self = @This();
-
- fn readHeader(self: *Self) !?Header {
- if (self.padding > 0) {
- try self.reader.skipBytes(self.padding, .{});
- }
- const n = try self.reader.readAll(&self.header_buffer);
- if (n == 0) return null;
- if (n < Header.SIZE) return error.UnexpectedEndOfStream;
- const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
- if (try header.checkChksum() == 0) return null;
- return header;
- }
-
- inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
- assert(buffer.len >= size);
- const buf = buffer[0..size];
- try self.reader.readNoEof(buf);
- return nullStr(buf);
- }
-
- inline fn initFile(self: *Self) void {
- self.file = File{
- .name = self.file_name_buffer[0..0],
- .link_name = self.link_name_buffer[0..0],
- .size = 0,
- .kind = .normal,
- .mode = 0,
- .reader = self.reader,
- };
- }
-
- // Number of padding bytes in the last file block.
- inline fn blockPadding(size: usize) usize {
- const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
- return block_rounded - size;
- }
-
- // Externally, `next` iterates through the tar archive as if it is a
- // series of files. Internally, the tar format often uses fake "files"
- // to add meta data that describes the next file. These meta data
- // "files" should not normally be visible to the outside. As such, this
- // loop iterates through one or more "header files" until it finds a
- // "normal file".
- pub fn next(self: *Self) !?File {
- self.initFile();
-
- while (try self.readHeader()) |header| {
- const kind = header.kind();
- const size: usize = @intCast(try header.size());
- self.padding = blockPadding(size);
-
- switch (kind) {
- // File types to retrun upstream
- .directory, .normal, .symbolic_link => {
- self.file.kind = kind;
- self.file.mode = try header.mode();
-
- // set file attributes if not already set by prefix/extended headers
- if (self.file.size == 0) {
- self.file.size = size;
- }
- if (self.file.link_name.len == 0) {
- self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
- }
- if (self.file.name.len == 0) {
- self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
- }
-
- self.padding = blockPadding(self.file.size);
- return self.file;
- },
- // Prefix header types
- .gnu_long_name => {
- self.file.name = try self.readString(size, &self.file_name_buffer);
- },
- .gnu_long_link => {
- self.file.link_name = try self.readString(size, &self.link_name_buffer);
- },
- .extended_header => {
- // Use just attributes from last extended header.
- self.initFile();
-
- var rdr = paxReader(self.reader, size);
- while (try rdr.next()) |attr| {
- switch (attr.kind) {
- .path => {
- self.file.name = try attr.value(&self.file_name_buffer);
- },
- .linkpath => {
- self.file.link_name = try attr.value(&self.link_name_buffer);
- },
- .size => {
- var buf: [64]u8 = undefined;
- self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
- },
- }
- }
- },
- // Ignored header type
- .global_extended_header => {
- self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
- },
- // All other are unsupported header types
- else => {
- const d = self.diagnostics orelse return error.TarUnsupportedHeader;
- try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
- .file_name = try d.allocator.dupe(u8, header.name()),
- .file_type = kind,
- } });
- },
- }
- }
- return null;
- }
- };
-}