Commit 6d5283e835
Changed files (1)
lib
std
lib/std/tar.zig
@@ -62,8 +62,10 @@ pub const Options = struct {
};
};
+const block_size = 512;
+
pub const Header = struct {
- bytes: *const [512]u8,
+ bytes: *const [block_size]u8,
pub const FileType = enum(u8) {
normal_alias = 0,
@@ -135,7 +137,7 @@ pub const Header = struct {
return header.bytes[start..i];
}
- pub fn isZeroBlock(header: Header) bool {
+ pub fn isZero(header: Header) bool {
for (header.bytes) |b| {
if (b != 0) return false;
}
@@ -146,7 +148,7 @@ pub const Header = struct {
fn BufferedReader(comptime ReaderType: type) type {
return struct {
unbuffered_reader: ReaderType,
- buffer: [512 * 8]u8 = undefined,
+ buffer: [block_size * 8]u8 = undefined,
start: usize = 0,
end: usize = 0,
@@ -161,6 +163,17 @@ fn BufferedReader(comptime ReaderType: type) type {
return self.buffer[self.start..self.end];
}
+ pub fn readBlock(self: *Self) !?[]const u8 {
+ const block_bytes = try self.readChunk(block_size * 2);
+ switch (block_bytes.len) {
+ 0 => return null,
+ 1...(block_size - 1) => return error.UnexpectedEndOfStream,
+ else => {},
+ }
+ self.advance(block_size);
+ return block_bytes[0..block_size];
+ }
+
pub fn advance(self: *Self, count: usize) void {
self.start += count;
assert(self.start <= self.end);
@@ -175,6 +188,14 @@ fn BufferedReader(comptime ReaderType: type) type {
}
}
+ pub fn skipPadding(self: *Self, file_size: usize) !void {
+ return self.skip(filePadding(file_size));
+ }
+
+ pub fn skipFile(self: *Self, file_size: usize) !void {
+ return self.skip(roundedFileSize(file_size));
+ }
+
inline fn ensureCapacity(self: *Self, count: usize) void {
if (self.buffer.len - self.start < count) {
const dest_end = self.end - self.start;
@@ -185,179 +206,200 @@ fn BufferedReader(comptime ReaderType: type) type {
}
pub fn write(self: *Self, writer: anytype, size: usize) !void {
- const rounded_file_size = std.mem.alignForward(usize, size, 512);
- const chunk_size = rounded_file_size + 512;
- const pad_len: usize = rounded_file_size - size;
-
- var file_off: usize = 0;
- while (true) {
- const temp = try self.readChunk(chunk_size - file_off);
- if (temp.len == 0) return error.UnexpectedEndOfStream;
- const slice = temp[0..@min(size - file_off, temp.len)];
+ var rdr = self.sliceReader(size, true);
+ while (try rdr.next()) |slice| {
try writer.writeAll(slice);
+ }
+ }
- file_off += slice.len;
- self.advance(slice.len);
- if (file_off >= size) {
- self.advance(pad_len);
- return;
- }
+ // copy dst.len bytes into dst
+ pub fn copy(self: *Self, dst: []u8) ![]const u8 {
+ var rdr = self.sliceReader(dst.len, true);
+ var pos: usize = 0;
+ while (try rdr.next()) |slice| : (pos += slice.len) {
+ @memcpy(dst[pos .. pos + slice.len], slice);
}
+ return dst;
}
- pub fn copy(self: *Self, dst_buffer: []u8, size: usize) !void {
- const rounded_file_size = std.mem.alignForward(usize, size, 512);
- const chunk_size = rounded_file_size + 512;
-
- var i: usize = 0;
- while (i < size) {
- const slice = try self.readChunk(chunk_size - i);
- if (slice.len == 0) return error.UnexpectedEndOfStream;
- const copy_size: usize = @min(size - i, slice.len);
- @memcpy(dst_buffer[i .. i + copy_size], slice[0..copy_size]);
- self.advance(copy_size);
- i += copy_size;
+ const SliceReader = struct {
+ size: usize,
+ chunk_size: usize,
+ offset: usize,
+ reader: *Self,
+ auto_advance: bool,
+
+ fn next(self: *@This()) !?[]const u8 {
+ if (self.offset >= self.size) return null;
+
+ const temp = try self.reader.readChunk(self.chunk_size - self.offset);
+ if (temp.len == 0) return error.UnexpectedEndOfStream;
+ const slice = temp[0..@min(self.remainingSize(), temp.len)];
+ if (self.auto_advance) try self.advance(slice.len);
+ return slice;
+ }
+
+ fn advance(self: *@This(), len: usize) !void {
+ self.offset += len;
+ try self.reader.skip(len);
}
+
+ fn copy(self: *@This(), dst: []u8) ![]const u8 {
+ _ = try self.reader.copy(dst);
+ self.offset += dst.len;
+ return dst;
+ }
+
+ fn remainingSize(self: *@This()) usize {
+ return self.size - self.offset;
+ }
+ };
+
+ pub fn sliceReader(self: *Self, size: usize, auto_advance: bool) Self.SliceReader {
+ return .{
+ .size = size,
+ .chunk_size = roundedFileSize(size) + block_size,
+ .offset = 0,
+ .reader = self,
+ .auto_advance = auto_advance,
+ };
}
};
}
+// file_size rouneded to te block boundary
+inline fn roundedFileSize(file_size: usize) usize {
+ return std.mem.alignForward(usize, file_size, block_size);
+}
+
+// number of padding bytes at the last file block
+inline fn filePadding(file_size: usize) usize {
+ return roundedFileSize(file_size) - file_size;
+}
+
fn Iterator(comptime ReaderType: type) type {
+ const BufferedReaderType = BufferedReader(ReaderType);
return struct {
- file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
- file_name_len: usize = 0,
- link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
- link_name_len: usize = 0,
+ attrs: struct {
+ buffer: [std.fs.MAX_PATH_BYTES * 2]u8 = undefined,
+ tail: usize = 0,
+
+ fn alloc(self: *@This(), size: usize) ![]u8 {
+ if (size > self.len()) return error.NameTooLong;
+ const head = self.tail;
+ self.tail += size;
+ assert(self.tail <= self.buffer.len);
+ return self.buffer[head..self.tail];
+ }
+
+ fn free(self: *@This()) void {
+ self.tail = 0;
+ }
+
+ fn len(self: *@This()) usize {
+ return self.buffer.len - self.tail;
+ }
+ } = .{},
- reader: BufferedReader(ReaderType),
+ reader: BufferedReaderType,
diagnostics: ?*Options.Diagnostics,
const Self = @This();
const File = struct {
- name: []const u8,
- link_name: []const u8,
- size: usize,
- file_type: Header.FileType,
- iter: *Self,
+ name: []const u8 = &[_]u8{},
+ link_name: []const u8 = &[_]u8{},
+ size: usize = 0,
+ file_type: Header.FileType = .normal,
+ reader: *BufferedReaderType,
pub fn write(self: File, writer: anytype) !void {
- try self.iter.reader.write(writer, self.size);
+ try self.reader.write(writer, self.size);
+ try self.skipPadding();
}
pub fn skip(self: File) !void {
- const rounded_file_size = std.mem.alignForward(usize, self.size, 512);
- try self.iter.reader.skip(rounded_file_size);
+ try self.reader.skip(roundedFileSize(self.size));
+ }
+
+ fn skipPadding(self: File) !void {
+ try self.reader.skip(filePadding(self.size));
}
fn chksum(self: File) ![16]u8 {
- var cs = [_]u8{0} ** 16;
- if (self.size == 0) return cs;
+ var sum = [_]u8{0} ** 16;
+ if (self.size == 0) return sum;
- var buffer: [512]u8 = undefined;
+ var rdr = self.reader.sliceReader(self.size, true);
var h = std.crypto.hash.Md5.init(.{});
-
- var remaining_bytes: usize = self.size;
- while (remaining_bytes > 0) {
- const copy_size = @min(buffer.len, remaining_bytes);
- try self.iter.reader.copy(&buffer, copy_size);
- h.update(buffer[0..copy_size]);
- remaining_bytes -= copy_size;
+ while (try rdr.next()) |slice| {
+ h.update(slice);
}
- h.final(&cs);
+ h.final(&sum);
try self.skipPadding();
- return cs;
- }
-
- fn skipPadding(self: File) !void {
- const rounded_file_size = std.mem.alignForward(usize, self.size, 512);
- const pad_len: usize = rounded_file_size - self.size;
- self.iter.reader.advance(pad_len);
+ return sum;
}
};
+ // Externally, Next iterates through the tar archive as if it is a series of
+ // files. Internally, the tar format often uses fake "files" to add meta
+ // data that describes the next file. These meta data "files" should not
+ // normally be visible to the outside. As such, this loop iterates through
+ // one or more "header files" until it finds a "normal file".
pub fn next(self: *Self) !?File {
- self.file_name_len = 0;
- self.link_name_len = 0;
-
- while (true) {
- const chunk = try self.reader.readChunk(1024);
- switch (chunk.len) {
- 0 => return null,
- 1...511 => return error.UnexpectedEndOfStream,
- else => {},
- }
- self.reader.advance(512);
-
- const header: Header = .{ .bytes = chunk[0..512] };
- if (header.isZeroBlock()) return null;
- const file_size = try header.fileSize();
- const rounded_file_size: usize = std.mem.alignForward(usize, file_size, 512);
- const file_type = header.fileType();
- const link_name = if (self.link_name_len == 0)
- header.linkName()
- else
- self.link_name_buffer[0..self.link_name_len];
- const file_name = if (self.file_name_len == 0)
- try header.fullFileName(&self.file_name_buffer)
- else
- self.file_name_buffer[0..self.file_name_len];
+ var file: File = .{ .reader = &self.reader };
+ self.attrs.free();
+
+ while (try self.reader.readBlock()) |block_bytes| {
+ const block: Header = .{ .bytes = block_bytes[0..block_size] };
+ if (block.isZero()) return null;
+ const file_type = block.fileType();
+ const file_size = try block.fileSize();
switch (file_type) {
.directory, .normal, .symbolic_link => {
- return File{
- .name = file_name,
- .size = file_size,
- .file_type = file_type,
- .link_name = link_name,
- .iter = self,
- };
+ if (file.size == 0) file.size = file_size;
+ if (file.name.len == 0)
+ file.name = try block.fullFileName((try self.attrs.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]);
+ if (file.link_name.len == 0) file.link_name = block.linkName();
+ file.file_type = file_type;
+ return file;
},
.global_extended_header => {
- self.reader.skip(rounded_file_size) catch return error.TarHeadersTooBig;
+ self.reader.skipFile(file_size) catch return error.TarHeadersTooBig;
},
.extended_header => {
if (file_size == 0) continue;
- const chunk_size: usize = rounded_file_size + 512;
- var data_off: usize = 0;
- while (data_off < file_size) {
- const slice = try self.reader.readChunk(chunk_size - data_off);
- if (slice.len == 0) return error.UnexpectedEndOfStream;
- const remaining_size: usize = file_size - data_off;
- const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size);
-
- if (std.mem.eql(u8, attr_info.key, "path")) {
- if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong;
- self.reader.advance(attr_info.value_off);
- try self.reader.copy(&self.file_name_buffer, attr_info.value_len);
- self.file_name_len = attr_info.value_len;
- self.reader.advance(1);
- } else if (std.mem.eql(u8, attr_info.key, "linkpath")) {
- if (attr_info.value_len > self.link_name_buffer.len) return error.NameTooLong;
- self.reader.advance(attr_info.value_off);
- try self.reader.copy(&self.link_name_buffer, attr_info.value_len);
- self.link_name_len = attr_info.value_len;
- self.reader.advance(1);
+ var rdr = self.reader.sliceReader(file_size, false);
+ while (try rdr.next()) |slice| {
+ const attr = try parsePaxAttribute(slice, rdr.remainingSize());
+ try rdr.advance(attr.value_off);
+ if (attr.is("path")) {
+ file.name = try rdr.copy(try self.attrs.alloc(attr.value_len));
+ } else if (attr.is("linkpath")) {
+ file.link_name = try rdr.copy(try self.attrs.alloc(attr.value_len));
+ } else if (attr.is("size")) {
+ var buf = [_]u8{'0'} ** 32;
+ file.size = try std.fmt.parseInt(usize, try rdr.copy(buf[0..attr.value_len]), 10);
} else {
- try self.reader.skip(attr_info.size);
+ try rdr.advance(attr.value_len);
}
- data_off += attr_info.size;
+ try rdr.advance(1);
}
- try self.reader.skip(rounded_file_size - data_off);
-
- continue;
+ try self.reader.skipPadding(file_size);
},
.hard_link => return error.TarUnsupportedFileType,
else => {
const d = self.diagnostics orelse return error.TarUnsupportedFileType;
try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
- .file_name = try d.allocator.dupe(u8, file_name),
+ .file_name = try d.allocator.dupe(u8, block.name()),
.file_type = file_type,
} });
},
}
}
+ return null;
}
};
}
@@ -481,6 +523,10 @@ const PaxAttributeInfo = struct {
key: []const u8,
value_off: usize,
value_len: usize,
+
+ inline fn is(self: @This(), key: []const u8) bool {
+ return (std.mem.eql(u8, self.key, key));
+ }
};
fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo {
@@ -515,7 +561,7 @@ test parsePaxAttribute {
try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0));
}
-const std = @import("std.zig");
+const std = @import("std");
const assert = std.debug.assert;
const TestCase = struct {
@@ -628,19 +674,19 @@ test "Go test cases" {
// },
//
// TODO: giving wrong result because we are not reading pax size header
- // .{
- // .path = "pax-pos-size-file.tar",
- // .files = &[_]TestCase.File{
- // .{
- // .name = "foo",
- // .size = 999,
- // .file_type = .normal,
- // },
- // },
- // .chksums = &[_][]const u8{
- // "0afb597b283fe61b5d4879669a350556",
- // },
- // },
+ .{
+ .path = "pax-pos-size-file.tar",
+ .files = &[_]TestCase.File{
+ .{
+ .name = "foo",
+ .size = 999,
+ .file_type = .normal,
+ },
+ },
+ .chksums = &[_][]const u8{
+ "0afb597b283fe61b5d4879669a350556",
+ },
+ },
.{
// has pax records which we are not interested in
.path = "pax-records.tar",