master
1//! Tar archive is single ordinary file which can contain many files (or
2//! directories, symlinks, ...). It's build by series of blocks each size of 512
3//! bytes. First block of each entry is header which defines type, name, size
4//! permissions and other attributes. Header is followed by series of blocks of
5//! file content, if any that entry has content. Content is padded to the block
6//! size, so next header always starts at block boundary.
7//!
8//! This simple format is extended by GNU and POSIX pax extensions to support
9//! file names longer than 256 bytes and additional attributes.
10//!
11//! This is not comprehensive tar parser. Here we are only file types needed to
12//! support Zig package manager; normal file, directory, symbolic link. And
13//! subset of attributes: name, size, permissions.
14//!
15//! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
16//! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
17
18const std = @import("std");
19const assert = std.debug.assert;
20const testing = std.testing;
21
22pub const Writer = @import("tar/Writer.zig");
23
24/// Provide this to receive detailed error messages.
25/// When this is provided, some errors which would otherwise be returned
26/// immediately will instead be added to this structure. The API user must check
27/// the errors in diagnostics to know whether the operation succeeded or failed.
28pub const Diagnostics = struct {
29 allocator: std.mem.Allocator,
30 errors: std.ArrayList(Error) = .empty,
31
32 entries: usize = 0,
33 root_dir: []const u8 = "",
34
35 pub const Error = union(enum) {
36 unable_to_create_sym_link: struct {
37 code: anyerror,
38 file_name: []const u8,
39 link_name: []const u8,
40 },
41 unable_to_create_file: struct {
42 code: anyerror,
43 file_name: []const u8,
44 },
45 unsupported_file_type: struct {
46 file_name: []const u8,
47 file_type: Header.Kind,
48 },
49 components_outside_stripped_prefix: struct {
50 file_name: []const u8,
51 },
52 };
53
54 fn findRoot(d: *Diagnostics, kind: FileKind, path: []const u8) !void {
55 if (path.len == 0) return;
56
57 d.entries += 1;
58 const root_dir = rootDir(path, kind);
59 if (d.entries == 1) {
60 d.root_dir = try d.allocator.dupe(u8, root_dir);
61 return;
62 }
63 if (d.root_dir.len == 0 or std.mem.eql(u8, root_dir, d.root_dir))
64 return;
65 d.allocator.free(d.root_dir);
66 d.root_dir = "";
67 }
68
69 // Returns root dir of the path, assumes non empty path.
70 fn rootDir(path: []const u8, kind: FileKind) []const u8 {
71 const start_index: usize = if (path[0] == '/') 1 else 0;
72 const end_index: usize = if (path[path.len - 1] == '/') path.len - 1 else path.len;
73 const buf = path[start_index..end_index];
74 if (std.mem.indexOfScalarPos(u8, buf, 0, '/')) |idx| {
75 return buf[0..idx];
76 }
77
78 return switch (kind) {
79 .file => "",
80 .sym_link => "",
81 .directory => buf,
82 };
83 }
84
85 test rootDir {
86 const expectEqualStrings = testing.expectEqualStrings;
87 try expectEqualStrings("", rootDir("a", .file));
88 try expectEqualStrings("a", rootDir("a", .directory));
89 try expectEqualStrings("b", rootDir("b", .directory));
90 try expectEqualStrings("c", rootDir("/c", .directory));
91 try expectEqualStrings("d", rootDir("/d/", .directory));
92 try expectEqualStrings("a", rootDir("a/b", .directory));
93 try expectEqualStrings("a", rootDir("a/b", .file));
94 try expectEqualStrings("a", rootDir("a/b/c", .directory));
95 }
96
97 pub fn deinit(d: *Diagnostics) void {
98 for (d.errors.items) |item| {
99 switch (item) {
100 .unable_to_create_sym_link => |info| {
101 d.allocator.free(info.file_name);
102 d.allocator.free(info.link_name);
103 },
104 .unable_to_create_file => |info| {
105 d.allocator.free(info.file_name);
106 },
107 .unsupported_file_type => |info| {
108 d.allocator.free(info.file_name);
109 },
110 .components_outside_stripped_prefix => |info| {
111 d.allocator.free(info.file_name);
112 },
113 }
114 }
115 d.errors.deinit(d.allocator);
116 d.allocator.free(d.root_dir);
117 d.* = undefined;
118 }
119};
120
121/// pipeToFileSystem options
122pub const PipeOptions = struct {
123 /// Number of directory levels to skip when extracting files.
124 strip_components: u32 = 0,
125 /// How to handle the "mode" property of files from within the tar file.
126 mode_mode: ModeMode = .executable_bit_only,
127 /// Prevents creation of empty directories.
128 exclude_empty_directories: bool = false,
129 /// Collects error messages during unpacking
130 diagnostics: ?*Diagnostics = null,
131
132 pub const ModeMode = enum {
133 /// The mode from the tar file is completely ignored. Files are created
134 /// with the default mode when creating files.
135 ignore,
136 /// The mode from the tar file is inspected for the owner executable bit
137 /// only. This bit is copied to the group and other executable bits.
138 /// Other bits of the mode are left as the default when creating files.
139 executable_bit_only,
140 };
141};
142
143const Header = struct {
144 const SIZE = 512;
145 const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
146 const LINK_NAME_SIZE = 100;
147
148 bytes: *const [SIZE]u8,
149
150 const Kind = enum(u8) {
151 normal_alias = 0,
152 normal = '0',
153 hard_link = '1',
154 symbolic_link = '2',
155 character_special = '3',
156 block_special = '4',
157 directory = '5',
158 fifo = '6',
159 contiguous = '7',
160 global_extended_header = 'g',
161 extended_header = 'x',
162 // Types 'L' and 'K' are used by the GNU format for a meta file
163 // used to store the path or link name for the next file.
164 gnu_long_name = 'L',
165 gnu_long_link = 'K',
166 gnu_sparse = 'S',
167 solaris_extended_header = 'X',
168 _,
169 };
170
171 /// Includes prefix concatenated, if any.
172 /// TODO: check against "../" and other nefarious things
173 pub fn fullName(header: Header, buffer: []u8) ![]const u8 {
174 const n = name(header);
175 const p = prefix(header);
176 if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer;
177 if (!is_ustar(header) or p.len == 0) {
178 @memcpy(buffer[0..n.len], n);
179 return buffer[0..n.len];
180 }
181 @memcpy(buffer[0..p.len], p);
182 buffer[p.len] = '/';
183 @memcpy(buffer[p.len + 1 ..][0..n.len], n);
184 return buffer[0 .. p.len + 1 + n.len];
185 }
186
187 /// When kind is symbolic_link linked-to name (target_path) is specified in
188 /// the linkname field.
189 pub fn linkName(header: Header, buffer: []u8) ![]const u8 {
190 const link_name = header.str(157, 100);
191 if (link_name.len == 0) {
192 return buffer[0..0];
193 }
194 if (buffer.len < link_name.len) return error.TarInsufficientBuffer;
195 const buf = buffer[0..link_name.len];
196 @memcpy(buf, link_name);
197 return buf;
198 }
199
200 pub fn name(header: Header) []const u8 {
201 return header.str(0, 100);
202 }
203
204 pub fn mode(header: Header) !u32 {
205 return @intCast(try header.octal(100, 8));
206 }
207
208 pub fn size(header: Header) !u64 {
209 const start = 124;
210 const len = 12;
211 const raw = header.bytes[start..][0..len];
212 // If the leading byte is 0xff (255), all the bytes of the field
213 // (including the leading byte) are concatenated in big-endian order,
214 // with the result being a negative number expressed in two’s
215 // complement form.
216 if (raw[0] == 0xff) return error.TarNumericValueNegative;
217 // If the leading byte is 0x80 (128), the non-leading bytes of the
218 // field are concatenated in big-endian order.
219 if (raw[0] == 0x80) {
220 if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig;
221 return std.mem.readInt(u64, raw[4..12], .big);
222 }
223 return try header.octal(start, len);
224 }
225
226 pub fn chksum(header: Header) !u64 {
227 return header.octal(148, 8);
228 }
229
230 pub fn is_ustar(header: Header) bool {
231 const magic = header.bytes[257..][0..6];
232 return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
233 }
234
235 pub fn prefix(header: Header) []const u8 {
236 return header.str(345, 155);
237 }
238
239 pub fn kind(header: Header) Kind {
240 const result: Kind = @enumFromInt(header.bytes[156]);
241 if (result == .normal_alias) return .normal;
242 return result;
243 }
244
245 fn str(header: Header, start: usize, len: usize) []const u8 {
246 return nullStr(header.bytes[start .. start + len]);
247 }
248
249 fn octal(header: Header, start: usize, len: usize) !u64 {
250 const raw = header.bytes[start..][0..len];
251 // Zero-filled octal number in ASCII. Each numeric field of width w
252 // contains w minus 1 digits, and a null
253 const ltrimmed = std.mem.trimStart(u8, raw, "0 ");
254 const rtrimmed = std.mem.trimEnd(u8, ltrimmed, " \x00");
255 if (rtrimmed.len == 0) return 0;
256 return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
257 }
258
259 const Chksums = struct {
260 unsigned: u64,
261 signed: i64,
262 };
263
264 // Sum of all bytes in the header block. The chksum field is treated as if
265 // it were filled with spaces (ASCII 32).
266 fn computeChksum(header: Header) Chksums {
267 var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
268 for (header.bytes, 0..) |v, i| {
269 const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
270 cs.unsigned += b;
271 cs.signed += @as(i8, @bitCast(b));
272 }
273 return cs;
274 }
275
276 // Checks calculated chksum with value of chksum field.
277 // Returns error or valid chksum value.
278 // Zero value indicates empty block.
279 pub fn checkChksum(header: Header) !u64 {
280 const field = try header.chksum();
281 const cs = header.computeChksum();
282 if (field == 0 and cs.unsigned == 256) return 0;
283 if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
284 return field;
285 }
286};
287
288// Breaks string on first null character.
289fn nullStr(str: []const u8) []const u8 {
290 for (str, 0..) |c, i| {
291 if (c == 0) return str[0..i];
292 }
293 return str;
294}
295
296/// Type of the file returned by iterator `next` method.
297pub const FileKind = enum {
298 directory,
299 sym_link,
300 file,
301};
302
303/// Iterator over entries in the tar file represented by reader.
304pub const Iterator = struct {
305 reader: *std.Io.Reader,
306 diagnostics: ?*Diagnostics = null,
307
308 // buffers for heeader and file attributes
309 header_buffer: [Header.SIZE]u8 = undefined,
310 file_name_buffer: []u8,
311 link_name_buffer: []u8,
312
313 // bytes of padding to the end of the block
314 padding: usize = 0,
315 // not consumed bytes of file from last next iteration
316 unread_file_bytes: u64 = 0,
317
318 /// Options for iterator.
319 /// Buffers should be provided by the caller.
320 pub const Options = struct {
321 /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
322 file_name_buffer: []u8,
323 /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
324 link_name_buffer: []u8,
325 /// Collects error messages during unpacking
326 diagnostics: ?*Diagnostics = null,
327 };
328
329 /// Iterates over files in tar archive.
330 /// `next` returns each file in tar archive.
331 pub fn init(reader: *std.Io.Reader, options: Options) Iterator {
332 return .{
333 .reader = reader,
334 .diagnostics = options.diagnostics,
335 .file_name_buffer = options.file_name_buffer,
336 .link_name_buffer = options.link_name_buffer,
337 };
338 }
339
340 pub const File = struct {
341 name: []const u8, // name of file, symlink or directory
342 link_name: []const u8, // target name of symlink
343 size: u64 = 0, // size of the file in bytes
344 mode: u32 = 0,
345 kind: FileKind = .file,
346 };
347
348 fn readHeader(self: *Iterator) !?Header {
349 if (self.padding > 0) {
350 try self.reader.discardAll(self.padding);
351 }
352 const n = try self.reader.readSliceShort(&self.header_buffer);
353 if (n == 0) return null;
354 if (n < Header.SIZE) return error.UnexpectedEndOfStream;
355 const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
356 if (try header.checkChksum() == 0) return null;
357 return header;
358 }
359
360 fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 {
361 if (size > buffer.len) return error.TarInsufficientBuffer;
362 const buf = buffer[0..size];
363 try self.reader.readSliceAll(buf);
364 return nullStr(buf);
365 }
366
367 fn newFile(self: *Iterator) File {
368 return .{
369 .name = self.file_name_buffer[0..0],
370 .link_name = self.link_name_buffer[0..0],
371 };
372 }
373
374 // Number of padding bytes in the last file block.
375 fn blockPadding(size: u64) usize {
376 const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
377 return @intCast(block_rounded - size);
378 }
379
380 /// Iterates through the tar archive as if it is a series of files.
381 /// Internally, the tar format often uses entries (header with optional
382 /// content) to add meta data that describes the next file. These
383 /// entries should not normally be visible to the outside. As such, this
384 /// loop iterates through one or more entries until it collects a all
385 /// file attributes.
386 pub fn next(self: *Iterator) !?File {
387 if (self.unread_file_bytes > 0) {
388 // If file content was not consumed by caller
389 try self.reader.discardAll64(self.unread_file_bytes);
390 self.unread_file_bytes = 0;
391 }
392 var file: File = self.newFile();
393
394 while (try self.readHeader()) |header| {
395 const kind = header.kind();
396 const size: u64 = try header.size();
397 self.padding = blockPadding(size);
398
399 switch (kind) {
400 // File types to return upstream
401 .directory, .normal, .symbolic_link => {
402 file.kind = switch (kind) {
403 .directory => .directory,
404 .normal => .file,
405 .symbolic_link => .sym_link,
406 else => unreachable,
407 };
408 file.mode = try header.mode();
409
410 // set file attributes if not already set by prefix/extended headers
411 if (file.size == 0) {
412 file.size = size;
413 }
414 if (file.link_name.len == 0) {
415 file.link_name = try header.linkName(self.link_name_buffer);
416 }
417 if (file.name.len == 0) {
418 file.name = try header.fullName(self.file_name_buffer);
419 }
420
421 self.padding = blockPadding(file.size);
422 self.unread_file_bytes = file.size;
423 return file;
424 },
425 // Prefix header types
426 .gnu_long_name => {
427 file.name = try self.readString(@intCast(size), self.file_name_buffer);
428 },
429 .gnu_long_link => {
430 file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
431 },
432 .extended_header => {
433 // Use just attributes from last extended header.
434 file = self.newFile();
435
436 var rdr: PaxIterator = .{
437 .reader = self.reader,
438 .size = @intCast(size),
439 };
440 while (try rdr.next()) |attr| {
441 switch (attr.kind) {
442 .path => {
443 file.name = try attr.value(self.file_name_buffer);
444 },
445 .linkpath => {
446 file.link_name = try attr.value(self.link_name_buffer);
447 },
448 .size => {
449 var buf: [pax_max_size_attr_len]u8 = undefined;
450 file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
451 },
452 }
453 }
454 },
455 // Ignored header type
456 .global_extended_header => {
457 self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
458 },
459 // All other are unsupported header types
460 else => {
461 const d = self.diagnostics orelse return error.TarUnsupportedHeader;
462 try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
463 .file_name = try d.allocator.dupe(u8, header.name()),
464 .file_type = kind,
465 } });
466 if (kind == .gnu_sparse) {
467 try self.skipGnuSparseExtendedHeaders(header);
468 }
469 self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
470 },
471 }
472 }
473 return null;
474 }
475
476 pub fn streamRemaining(it: *Iterator, file: File, w: *std.Io.Writer) std.Io.Reader.StreamError!void {
477 try it.reader.streamExact64(w, file.size);
478 it.unread_file_bytes = 0;
479 }
480
481 fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void {
482 var is_extended = header.bytes[482] > 0;
483 while (is_extended) {
484 var buf: [Header.SIZE]u8 = undefined;
485 try self.reader.readSliceAll(&buf);
486 is_extended = buf[504] > 0;
487 }
488 }
489};
490
491const PaxAttributeKind = enum {
492 path,
493 linkpath,
494 size,
495};
496
497// maxInt(u64) has 20 chars, base 10 in practice we got 24 chars
498const pax_max_size_attr_len = 64;
499
500pub const PaxIterator = struct {
501 size: usize, // cumulative size of all pax attributes
502 reader: *std.Io.Reader,
503
504 const Self = @This();
505
506 const Attribute = struct {
507 kind: PaxAttributeKind,
508 len: usize, // length of the attribute value
509 reader: *std.Io.Reader, // reader positioned at value start
510
511 // Copies pax attribute value into destination buffer.
512 // Must be called with destination buffer of size at least Attribute.len.
513 pub fn value(self: Attribute, dst: []u8) ![]const u8 {
514 if (self.len > dst.len) return error.TarInsufficientBuffer;
515 // assert(self.len <= dst.len);
516 const buf = dst[0..self.len];
517 const n = try self.reader.readSliceShort(buf);
518 if (n < self.len) return error.UnexpectedEndOfStream;
519 try validateAttributeEnding(self.reader);
520 if (hasNull(buf)) return error.PaxNullInValue;
521 return buf;
522 }
523 };
524
525 // Iterates over pax attributes. Returns known only known attributes.
526 // Caller has to call value in Attribute, to advance reader across value.
527 pub fn next(self: *Self) !?Attribute {
528 // Pax extended header consists of one or more attributes, each constructed as follows:
529 // "%d %s=%s\n", <length>, <keyword>, <value>
530 while (self.size > 0) {
531 const length_buf = try self.reader.takeSentinel(' ');
532 const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
533
534 const keyword = try self.reader.takeSentinel('=');
535 if (hasNull(keyword)) return error.PaxNullInKeyword;
536
537 // calculate value_len
538 const value_start = length_buf.len + keyword.len + 2; // 2 separators
539 if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
540 const value_len = length - value_start - 1; // \n separator at end
541 self.size -= length;
542
543 const kind: PaxAttributeKind = if (eql(keyword, "path"))
544 .path
545 else if (eql(keyword, "linkpath"))
546 .linkpath
547 else if (eql(keyword, "size"))
548 .size
549 else {
550 try self.reader.discardAll(value_len);
551 try validateAttributeEnding(self.reader);
552 continue;
553 };
554 if (kind == .size and value_len > pax_max_size_attr_len) {
555 return error.PaxSizeAttrOverflow;
556 }
557 return .{
558 .kind = kind,
559 .len = value_len,
560 .reader = self.reader,
561 };
562 }
563
564 return null;
565 }
566
567 fn eql(a: []const u8, b: []const u8) bool {
568 return std.mem.eql(u8, a, b);
569 }
570
571 fn hasNull(str: []const u8) bool {
572 return (std.mem.indexOfScalar(u8, str, 0)) != null;
573 }
574
575 // Checks that each record ends with new line.
576 fn validateAttributeEnding(reader: *std.Io.Reader) !void {
577 if (try reader.takeByte() != '\n') return error.PaxInvalidAttributeEnd;
578 }
579};
580
581/// Saves tar file content to the file systems.
582pub fn pipeToFileSystem(dir: std.fs.Dir, reader: *std.Io.Reader, options: PipeOptions) !void {
583 var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
584 var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
585 var file_contents_buffer: [1024]u8 = undefined;
586 var it: Iterator = .init(reader, .{
587 .file_name_buffer = &file_name_buffer,
588 .link_name_buffer = &link_name_buffer,
589 .diagnostics = options.diagnostics,
590 });
591
592 while (try it.next()) |file| {
593 const file_name = stripComponents(file.name, options.strip_components);
594 if (file_name.len == 0 and file.kind != .directory) {
595 const d = options.diagnostics orelse return error.TarComponentsOutsideStrippedPrefix;
596 try d.errors.append(d.allocator, .{ .components_outside_stripped_prefix = .{
597 .file_name = try d.allocator.dupe(u8, file.name),
598 } });
599 continue;
600 }
601 if (options.diagnostics) |d| {
602 try d.findRoot(file.kind, file_name);
603 }
604
605 switch (file.kind) {
606 .directory => {
607 if (file_name.len > 0 and !options.exclude_empty_directories) {
608 try dir.makePath(file_name);
609 }
610 },
611 .file => {
612 if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| {
613 defer fs_file.close();
614 var file_writer = fs_file.writer(&file_contents_buffer);
615 try it.streamRemaining(file, &file_writer.interface);
616 try file_writer.interface.flush();
617 } else |err| {
618 const d = options.diagnostics orelse return err;
619 try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
620 .code = err,
621 .file_name = try d.allocator.dupe(u8, file_name),
622 } });
623 }
624 },
625 .sym_link => {
626 const link_name = file.link_name;
627 createDirAndSymlink(dir, link_name, file_name) catch |err| {
628 const d = options.diagnostics orelse return error.UnableToCreateSymLink;
629 try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
630 .code = err,
631 .file_name = try d.allocator.dupe(u8, file_name),
632 .link_name = try d.allocator.dupe(u8, link_name),
633 } });
634 };
635 },
636 }
637 }
638}
639
640fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8, mode: std.fs.File.Mode) !std.fs.File {
641 const fs_file = dir.createFile(file_name, .{ .exclusive = true, .mode = mode }) catch |err| {
642 if (err == error.FileNotFound) {
643 if (std.fs.path.dirname(file_name)) |dir_name| {
644 try dir.makePath(dir_name);
645 return try dir.createFile(file_name, .{ .exclusive = true, .mode = mode });
646 }
647 }
648 return err;
649 };
650 return fs_file;
651}
652
653// Creates a symbolic link at path `file_name` which points to `link_name`.
654fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
655 dir.symLink(link_name, file_name, .{}) catch |err| {
656 if (err == error.FileNotFound) {
657 if (std.fs.path.dirname(file_name)) |dir_name| {
658 try dir.makePath(dir_name);
659 return try dir.symLink(link_name, file_name, .{});
660 }
661 }
662 return err;
663 };
664}
665
666fn stripComponents(path: []const u8, count: u32) []const u8 {
667 var i: usize = 0;
668 var c = count;
669 while (c > 0) : (c -= 1) {
670 if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
671 i = pos + 1;
672 } else {
673 i = path.len;
674 break;
675 }
676 }
677 return path[i..];
678}
679
680test stripComponents {
681 const expectEqualStrings = testing.expectEqualStrings;
682 try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
683 try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
684 try expectEqualStrings("c", stripComponents("a/b/c", 2));
685 try expectEqualStrings("", stripComponents("a/b/c", 3));
686 try expectEqualStrings("", stripComponents("a/b/c", 4));
687}
688
689test PaxIterator {
690 const Attr = struct {
691 kind: PaxAttributeKind,
692 value: []const u8 = undefined,
693 err: ?anyerror = null,
694 };
695 const cases = [_]struct {
696 data: []const u8,
697 attrs: []const Attr,
698 err: ?anyerror = null,
699 }{
700 .{ // valid but unknown keys
701 .data =
702 \\30 mtime=1350244992.023960108
703 \\6 k=1
704 \\13 key1=val1
705 \\10 a=name
706 \\9 a=name
707 \\
708 ,
709 .attrs = &[_]Attr{},
710 },
711 .{ // mix of known and unknown keys
712 .data =
713 \\6 k=1
714 \\13 path=name
715 \\17 linkpath=link
716 \\13 key1=val1
717 \\12 size=123
718 \\13 key2=val2
719 \\
720 ,
721 .attrs = &[_]Attr{
722 .{ .kind = .path, .value = "name" },
723 .{ .kind = .linkpath, .value = "link" },
724 .{ .kind = .size, .value = "123" },
725 },
726 },
727 .{ // too short size of the second key-value pair
728 .data =
729 \\13 path=name
730 \\10 linkpath=value
731 \\
732 ,
733 .attrs = &[_]Attr{
734 .{ .kind = .path, .value = "name" },
735 },
736 .err = error.UnexpectedEndOfStream,
737 },
738 .{ // too long size of the second key-value pair
739 .data =
740 \\13 path=name
741 \\6 k=1
742 \\19 linkpath=value
743 \\
744 ,
745 .attrs = &[_]Attr{
746 .{ .kind = .path, .value = "name" },
747 },
748 .err = error.UnexpectedEndOfStream,
749 },
750
751 .{ // too long size of the second key-value pair
752 .data =
753 \\13 path=name
754 \\19 linkpath=value
755 \\6 k=1
756 \\
757 ,
758 .attrs = &[_]Attr{
759 .{ .kind = .path, .value = "name" },
760 .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
761 },
762 },
763 .{ // null in keyword is not valid
764 .data = "13 path=name\n" ++ "7 k\x00b=1\n",
765 .attrs = &[_]Attr{
766 .{ .kind = .path, .value = "name" },
767 },
768 .err = error.PaxNullInKeyword,
769 },
770 .{ // null in value is not valid
771 .data = "23 path=name\x00with null\n",
772 .attrs = &[_]Attr{
773 .{ .kind = .path, .err = error.PaxNullInValue },
774 },
775 },
776 .{ // 1000 characters path
777 .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
778 .attrs = &[_]Attr{
779 .{ .kind = .path, .value = "0123456789" ** 100 },
780 },
781 },
782 };
783 var buffer: [1024]u8 = undefined;
784
785 outer: for (cases) |case| {
786 var reader: std.Io.Reader = .fixed(case.data);
787 var it: PaxIterator = .{
788 .size = case.data.len,
789 .reader = &reader,
790 };
791
792 var i: usize = 0;
793 while (it.next() catch |err| {
794 if (case.err) |e| {
795 try testing.expectEqual(e, err);
796 continue;
797 }
798 return err;
799 }) |attr| : (i += 1) {
800 const exp = case.attrs[i];
801 try testing.expectEqual(exp.kind, attr.kind);
802 const value = attr.value(&buffer) catch |err| {
803 if (exp.err) |e| {
804 try testing.expectEqual(e, err);
805 break :outer;
806 }
807 return err;
808 };
809 try testing.expectEqualStrings(exp.value, value);
810 }
811 try testing.expectEqual(case.attrs.len, i);
812 try testing.expect(case.err == null);
813 }
814}
815
816test "header parse size" {
817 const cases = [_]struct {
818 in: []const u8,
819 want: u64 = 0,
820 err: ?anyerror = null,
821 }{
822 // Test base-256 (binary) encoded values.
823 .{ .in = "", .want = 0 },
824 .{ .in = "\x80", .want = 0 },
825 .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 },
826 .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 },
827 .{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 },
828 .{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig },
829 .{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 },
830 .{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig },
831
832 // // Test base-8 (octal) encoded values.
833 .{ .in = "00000000227\x00", .want = 0o227 },
834 .{ .in = " 000000227\x00", .want = 0o227 },
835 .{ .in = "00000000228\x00", .err = error.TarHeader },
836 .{ .in = "11111111111\x00", .want = 0o11111111111 },
837 };
838
839 for (cases) |case| {
840 var bytes = [_]u8{0} ** Header.SIZE;
841 @memcpy(bytes[124 .. 124 + case.in.len], case.in);
842 var header = Header{ .bytes = &bytes };
843 if (case.err) |err| {
844 try testing.expectError(err, header.size());
845 } else {
846 try testing.expectEqual(case.want, try header.size());
847 }
848 }
849}
850
851test "header parse mode" {
852 const cases = [_]struct {
853 in: []const u8,
854 want: u64 = 0,
855 err: ?anyerror = null,
856 }{
857 .{ .in = "0000644\x00", .want = 0o644 },
858 .{ .in = "0000777\x00", .want = 0o777 },
859 .{ .in = "7777777\x00", .want = 0o7777777 },
860 .{ .in = "7777778\x00", .err = error.TarHeader },
861 .{ .in = "77777777", .want = 0o77777777 },
862 .{ .in = "777777777777", .want = 0o77777777 },
863 };
864 for (cases) |case| {
865 var bytes = [_]u8{0} ** Header.SIZE;
866 @memcpy(bytes[100 .. 100 + case.in.len], case.in);
867 var header = Header{ .bytes = &bytes };
868 if (case.err) |err| {
869 try testing.expectError(err, header.mode());
870 } else {
871 try testing.expectEqual(case.want, try header.mode());
872 }
873 }
874}
875
876test "create file and symlink" {
877 var root = testing.tmpDir(.{});
878 defer root.cleanup();
879
880 var file = try createDirAndFile(root.dir, "file1", default_mode);
881 file.close();
882 file = try createDirAndFile(root.dir, "a/b/c/file2", default_mode);
883 file.close();
884
885 createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| {
886 // On Windows when developer mode is not enabled
887 if (err == error.AccessDenied) return error.SkipZigTest;
888 return err;
889 };
890 try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2");
891
892 // Danglink symlnik, file created later
893 try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3");
894 file = try createDirAndFile(root.dir, "g/h/i/file4", default_mode);
895 file.close();
896}
897
898test Iterator {
899 // Example tar file is created from this tree structure:
900 // $ tree example
901 // example
902 // ├── a
903 // │  └── file
904 // ├── b
905 // │  └── symlink -> ../a/file
906 // └── empty
907 // $ cat example/a/file
908 // content
909 // $ tar -cf example.tar example
910 // $ tar -tvf example.tar
911 // example/
912 // example/b/
913 // example/b/symlink -> ../a/file
914 // example/a/
915 // example/a/file
916 // example/empty/
917
918 const data = @embedFile("tar/testdata/example.tar");
919 var reader: std.Io.Reader = .fixed(data);
920
921 // User provided buffers to the iterator
922 var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
923 var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
924 // Create iterator
925 var it: Iterator = .init(&reader, .{
926 .file_name_buffer = &file_name_buffer,
927 .link_name_buffer = &link_name_buffer,
928 });
929 // Iterate over files in example.tar
930 var file_no: usize = 0;
931 while (try it.next()) |file| : (file_no += 1) {
932 switch (file.kind) {
933 .directory => {
934 switch (file_no) {
935 0 => try testing.expectEqualStrings("example/", file.name),
936 1 => try testing.expectEqualStrings("example/b/", file.name),
937 3 => try testing.expectEqualStrings("example/a/", file.name),
938 5 => try testing.expectEqualStrings("example/empty/", file.name),
939 else => unreachable,
940 }
941 },
942 .file => {
943 try testing.expectEqualStrings("example/a/file", file.name);
944 var buf: [16]u8 = undefined;
945 var w: std.Io.Writer = .fixed(&buf);
946 try it.streamRemaining(file, &w);
947 try testing.expectEqualStrings("content\n", w.buffered());
948 },
949 .sym_link => {
950 try testing.expectEqualStrings("example/b/symlink", file.name);
951 try testing.expectEqualStrings("../a/file", file.link_name);
952 },
953 }
954 }
955}
956
957test pipeToFileSystem {
958 // Example tar file is created from this tree structure:
959 // $ tree example
960 // example
961 // ├── a
962 // │  └── file
963 // ├── b
964 // │  └── symlink -> ../a/file
965 // └── empty
966 // $ cat example/a/file
967 // content
968 // $ tar -cf example.tar example
969 // $ tar -tvf example.tar
970 // example/
971 // example/b/
972 // example/b/symlink -> ../a/file
973 // example/a/
974 // example/a/file
975 // example/empty/
976
977 const data = @embedFile("tar/testdata/example.tar");
978 var reader: std.Io.Reader = .fixed(data);
979
980 var tmp = testing.tmpDir(.{ .follow_symlinks = false });
981 defer tmp.cleanup();
982 const dir = tmp.dir;
983
984 // Save tar from reader to the file system `dir`
985 pipeToFileSystem(dir, &reader, .{
986 .mode_mode = .ignore,
987 .strip_components = 1,
988 .exclude_empty_directories = true,
989 }) catch |err| {
990 // Skip on platform which don't support symlinks
991 if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
992 return err;
993 };
994
995 try testing.expectError(error.FileNotFound, dir.statFile("empty"));
996 try testing.expect((try dir.statFile("a/file")).kind == .file);
997 try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
998
999 var buf: [32]u8 = undefined;
1000 try testing.expectEqualSlices(
1001 u8,
1002 "../a/file",
1003 normalizePath(try dir.readLink("b/symlink", &buf)),
1004 );
1005}
1006
1007test "pipeToFileSystem root_dir" {
1008 const data = @embedFile("tar/testdata/example.tar");
1009 var reader: std.Io.Reader = .fixed(data);
1010
1011 // with strip_components = 1
1012 {
1013 var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1014 defer tmp.cleanup();
1015 var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1016 defer diagnostics.deinit();
1017
1018 pipeToFileSystem(tmp.dir, &reader, .{
1019 .strip_components = 1,
1020 .diagnostics = &diagnostics,
1021 }) catch |err| {
1022 // Skip on platform which don't support symlinks
1023 if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1024 return err;
1025 };
1026
1027 // there is no root_dir
1028 try testing.expectEqual(0, diagnostics.root_dir.len);
1029 try testing.expectEqual(5, diagnostics.entries);
1030 }
1031
1032 // with strip_components = 0
1033 {
1034 reader = .fixed(data);
1035 var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1036 defer tmp.cleanup();
1037 var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1038 defer diagnostics.deinit();
1039
1040 pipeToFileSystem(tmp.dir, &reader, .{
1041 .strip_components = 0,
1042 .diagnostics = &diagnostics,
1043 }) catch |err| {
1044 // Skip on platform which don't support symlinks
1045 if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1046 return err;
1047 };
1048
1049 // root_dir found
1050 try testing.expectEqualStrings("example", diagnostics.root_dir);
1051 try testing.expectEqual(6, diagnostics.entries);
1052 }
1053}
1054
1055test "findRoot with single file archive" {
1056 const data = @embedFile("tar/testdata/22752.tar");
1057 var reader: std.Io.Reader = .fixed(data);
1058
1059 var tmp = testing.tmpDir(.{});
1060 defer tmp.cleanup();
1061
1062 var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1063 defer diagnostics.deinit();
1064 try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics });
1065
1066 try testing.expectEqualStrings("", diagnostics.root_dir);
1067}
1068
1069test "findRoot without explicit root dir" {
1070 const data = @embedFile("tar/testdata/19820.tar");
1071 var reader: std.Io.Reader = .fixed(data);
1072
1073 var tmp = testing.tmpDir(.{});
1074 defer tmp.cleanup();
1075
1076 var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1077 defer diagnostics.deinit();
1078 try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics });
1079
1080 try testing.expectEqualStrings("root", diagnostics.root_dir);
1081}
1082
1083test "pipeToFileSystem strip_components" {
1084 const data = @embedFile("tar/testdata/example.tar");
1085 var reader: std.Io.Reader = .fixed(data);
1086
1087 var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1088 defer tmp.cleanup();
1089 var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1090 defer diagnostics.deinit();
1091
1092 pipeToFileSystem(tmp.dir, &reader, .{
1093 .strip_components = 3,
1094 .diagnostics = &diagnostics,
1095 }) catch |err| {
1096 // Skip on platform which don't support symlinks
1097 if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1098 return err;
1099 };
1100
1101 try testing.expectEqual(2, diagnostics.errors.items.len);
1102 try testing.expectEqualStrings("example/b/symlink", diagnostics.errors.items[0].components_outside_stripped_prefix.file_name);
1103 try testing.expectEqualStrings("example/a/file", diagnostics.errors.items[1].components_outside_stripped_prefix.file_name);
1104}
1105
1106fn normalizePath(bytes: []u8) []u8 {
1107 const canonical_sep = std.fs.path.sep_posix;
1108 if (std.fs.path.sep == canonical_sep) return bytes;
1109 std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep);
1110 return bytes;
1111}
1112
1113const default_mode = std.fs.File.default_mode;
1114
1115// File system mode based on tar header mode and mode_mode options.
1116fn fileMode(mode: u32, options: PipeOptions) std.fs.File.Mode {
1117 if (!std.fs.has_executable_bit or options.mode_mode == .ignore)
1118 return default_mode;
1119
1120 const S = std.posix.S;
1121
1122 // The mode from the tar file is inspected for the owner executable bit.
1123 if (mode & S.IXUSR == 0)
1124 return default_mode;
1125
1126 // This bit is copied to the group and other executable bits.
1127 // Other bits of the mode are left as the default when creating files.
1128 return default_mode | S.IXUSR | S.IXGRP | S.IXOTH;
1129}
1130
1131test fileMode {
1132 if (!std.fs.has_executable_bit) return error.SkipZigTest;
1133 try testing.expectEqual(default_mode, fileMode(0o744, PipeOptions{ .mode_mode = .ignore }));
1134 try testing.expectEqual(0o777, fileMode(0o744, PipeOptions{}));
1135 try testing.expectEqual(0o666, fileMode(0o644, PipeOptions{}));
1136 try testing.expectEqual(0o666, fileMode(0o655, PipeOptions{}));
1137}
1138
1139test "executable bit" {
1140 if (!std.fs.has_executable_bit) return error.SkipZigTest;
1141
1142 const S = std.posix.S;
1143 const data = @embedFile("tar/testdata/example.tar");
1144
1145 for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| {
1146 var reader: std.Io.Reader = .fixed(data);
1147
1148 var tmp = testing.tmpDir(.{ .follow_symlinks = false });
1149 //defer tmp.cleanup();
1150
1151 pipeToFileSystem(tmp.dir, &reader, .{
1152 .strip_components = 1,
1153 .exclude_empty_directories = true,
1154 .mode_mode = opt,
1155 }) catch |err| {
1156 // Skip on platform which don't support symlinks
1157 if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
1158 return err;
1159 };
1160
1161 const fs = try tmp.dir.statFile("a/file");
1162 try testing.expect(fs.kind == .file);
1163
1164 if (opt == .executable_bit_only) {
1165 // Executable bit is set for user, group and others
1166 try testing.expect(fs.mode & S.IXUSR > 0);
1167 try testing.expect(fs.mode & S.IXGRP > 0);
1168 try testing.expect(fs.mode & S.IXOTH > 0);
1169 }
1170 if (opt == .ignore) {
1171 try testing.expect(fs.mode & S.IXUSR == 0);
1172 try testing.expect(fs.mode & S.IXGRP == 0);
1173 try testing.expect(fs.mode & S.IXOTH == 0);
1174 }
1175 }
1176}
1177
1178test {
1179 _ = @import("tar/test.zig");
1180 _ = Writer;
1181 _ = Diagnostics;
1182}