Commit 88bbec8f9b
Changed files (5)
src/Package/Fetch.zig
@@ -0,0 +1,1012 @@
+//! Represents one independent job whose responsibility is to:
+//!
+//! 1. Check the global zig package cache to see if the hash already exists.
+//! If so, load, parse, and validate the build.zig.zon file therein, and
+//! goto step 8. Likewise if the location is a relative path, treat this
+//! the same as a cache hit. Otherwise, proceed.
+//! 2. Fetch and unpack a URL into a temporary directory.
+//! 3. Load, parse, and validate the build.zig.zon file therein. It is allowed
+//! for the file to be missing, in which case this fetched package is considered
+//! to be a "naked" package.
+//! 4. Apply inclusion rules of the build.zig.zon to the temporary directory by
+//! deleting excluded files. If any files had errors for files that were
+//! ultimately excluded, those errors should be ignored, such as failure to
+//! create symlinks that weren't supposed to be included anyway.
+//! 5. Compute the package hash based on the remaining files in the temporary
+//! directory.
+//! 6. Rename the temporary directory into the global zig package cache
+//! directory. If the hash already exists, delete the temporary directory and
+//! leave the zig package cache directory untouched as it may be in use by the
+//! system. This is done even if the hash is invalid, in case the package with
+//! the different hash is used in the future.
+//! 7. Validate the computed hash against the expected hash. If invalid,
+//! this job is done.
+//! 8. Spawn a new fetch job for each dependency in the manifest file. Use
+//! a mutex and a hash map so that redundant jobs do not get queued up.
+//!
+//! All of this must be done with only referring to the state inside this struct
+//! because this work will be done in a dedicated thread.
+
+/// Try to avoid this as much as possible since arena will have less contention.
+gpa: Allocator,
+arena: std.heap.ArenaAllocator,
+location: Location,
+location_tok: std.zig.Ast.TokenIndex,
+hash_tok: std.zig.Ast.TokenIndex,
+global_cache: Cache.Directory,
+parent_package_root: Path,
+parent_manifest_ast: ?*const std.zig.Ast,
+prog_node: *std.Progress.Node,
+http_client: *std.http.Client,
+thread_pool: *ThreadPool,
+job_queue: *JobQueue,
+wait_group: *WaitGroup,
+
+// Above this are fields provided as inputs to `run`.
+// Below this are fields populated by `run`.
+
+/// This will either be relative to `global_cache`, or to the build root of
+/// the root package.
+package_root: Path,
+error_bundle: std.zig.ErrorBundle.Wip,
+manifest: ?Manifest,
+manifest_ast: ?*std.zig.Ast,
+actual_hash: Digest,
+/// Fetch logic notices whether a package has a build.zig file and sets this flag.
+has_build_zig: bool,
+/// Indicates whether the task aborted due to an out-of-memory condition.
+oom_flag: bool,
+
+pub const JobQueue = struct {
+ mutex: std.Thread.Mutex = .{},
+};
+
+pub const Digest = [Manifest.Hash.digest_length]u8;
+pub const MultiHashHexDigest = [hex_multihash_len]u8;
+
+pub const Path = struct {
+ root_dir: Cache.Directory,
+ /// The path, relative to the root dir, that this `Path` represents.
+ /// Empty string means the root_dir is the path.
+ sub_path: []const u8 = "",
+};
+
+pub const Location = union(enum) {
+ remote: Remote,
+ relative_path: []const u8,
+
+ pub const Remote = struct {
+ url: []const u8,
+ /// If this is null it means the user omitted the hash field from a dependency.
+ /// It will be an error but the logic should still fetch and print the discovered hash.
+ hash: ?[hex_multihash_len]u8,
+ };
+};
+
+pub const RunError = error{
+ OutOfMemory,
+ /// This error code is intended to be handled by inspecting the
+ /// `error_bundle` field.
+ FetchFailed,
+};
+
+pub fn run(f: *Fetch) RunError!void {
+ const eb = &f.error_bundle;
+ const arena = f.arena_allocator.allocator();
+
+ // Check the global zig package cache to see if the hash already exists. If
+ // so, load, parse, and validate the build.zig.zon file therein, and skip
+ // ahead to queuing up jobs for dependencies. Likewise if the location is a
+ // relative path, treat this the same as a cache hit. Otherwise, proceed.
+
+ const remote = switch (f.location) {
+ .relative_path => |sub_path| {
+ if (fs.path.isAbsolute(sub_path)) return f.fail(
+ f.location_tok,
+ try eb.addString("expected path relative to build root; found absolute path"),
+ );
+ if (f.hash_tok != 0) return f.fail(
+ f.hash_tok,
+ try eb.addString("path-based dependencies are not hashed"),
+ );
+ f.package_root = try f.parent_package_root.join(arena, sub_path);
+ try loadManifest(f, f.package_root);
+ // Package hashes are used as unique identifiers for packages, so
+ // we still need one for relative paths.
+ const hash = h: {
+ var hasher = Manifest.Hash.init(.{});
+ // This hash is a tuple of:
+ // * whether it relative to the global cache directory or to the root package
+ // * the relative file path from there to the build root of the package
+ hasher.update(if (f.package_root.root_dir.handle == f.global_cache.handle)
+ &package_hash_prefix_cached
+ else
+ &package_hash_prefix_project);
+ hasher.update(f.package_root.sub_path);
+ break :h hasher.finalResult();
+ };
+ return queueJobsForDeps(f, hash);
+ },
+ .remote => |remote| remote,
+ };
+ const s = fs.path.sep_str;
+ if (remote.hash) |expected_hash| {
+ const pkg_sub_path = "p" ++ s ++ expected_hash;
+ if (f.global_cache.handle.access(pkg_sub_path, .{})) |_| {
+ f.package_root = .{
+ .root_dir = f.global_cache,
+ .sub_path = pkg_sub_path,
+ };
+ try loadManifest(f, f.package_root);
+ return queueJobsForDeps(f, expected_hash);
+ } else |err| switch (err) {
+ error.FileNotFound => {},
+ else => |e| {
+ try eb.addRootErrorMessage(.{
+ .msg = try eb.printString("unable to open global package cache directory '{s}': {s}", .{
+ try f.global_cache.join(arena, .{pkg_sub_path}), @errorName(e),
+ }),
+ .src_loc = .none,
+ .notes_len = 0,
+ });
+ return error.FetchFailed;
+ },
+ }
+ }
+
+ // Fetch and unpack the remote into a temporary directory.
+
+ const uri = std.Uri.parse(remote.url) catch |err| return f.fail(
+ f.location_tok,
+ "invalid URI: {s}",
+ .{@errorName(err)},
+ );
+ const rand_int = std.crypto.random.int(u64);
+ const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int);
+
+ var tmp_directory: Cache.Directory = .{
+ .path = try f.global_cache.join(arena, &.{tmp_dir_sub_path}),
+ .handle = (try f.global_cache.handle.makeOpenPathIterable(tmp_dir_sub_path, .{})).dir,
+ };
+ defer tmp_directory.handle.close();
+
+ var resource = try f.initResource(uri);
+ defer resource.deinit(); // releases more than memory
+
+ try f.unpackResource(&resource, uri.path, tmp_directory);
+
+ // Load, parse, and validate the unpacked build.zig.zon file. It is allowed
+ // for the file to be missing, in which case this fetched package is
+ // considered to be a "naked" package.
+ try loadManifest(f, .{ .root_dir = tmp_directory });
+
+ // Apply the manifest's inclusion rules to the temporary directory by
+ // deleting excluded files. If any error occurred for files that were
+ // ultimately excluded, those errors should be ignored, such as failure to
+ // create symlinks that weren't supposed to be included anyway.
+
+ // Empty directories have already been omitted by `unpackResource`.
+
+ const filter: Filter = .{
+ .include_paths = if (f.manifest) |m| m.paths else .{},
+ };
+
+ // Compute the package hash based on the remaining files in the temporary
+ // directory.
+
+ if (builtin.os.tag == .linux and f.work_around_btrfs_bug) {
+ // https://github.com/ziglang/zig/issues/17095
+ tmp_directory.handle.close();
+ const iterable_dir = f.global_cache.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}) catch
+ @panic("btrfs workaround failed");
+ tmp_directory.handle = iterable_dir.dir;
+ }
+
+ f.actual_hash = try computeHash(f, .{ .dir = tmp_directory.handle }, filter);
+
+ // Rename the temporary directory into the global zig package cache
+ // directory. If the hash already exists, delete the temporary directory
+ // and leave the zig package cache directory untouched as it may be in use
+ // by the system. This is done even if the hash is invalid, in case the
+ // package with the different hash is used in the future.
+
+ const dest_pkg_sub_path = "p" ++ s ++ Manifest.hexDigest(f.actual_hash);
+ try renameTmpIntoCache(f.global_cache.handle, tmp_dir_sub_path, dest_pkg_sub_path);
+
+ // Validate the computed hash against the expected hash. If invalid, this
+ // job is done.
+
+ const actual_hex = Manifest.hexDigest(f.actual_hash);
+ if (remote.hash) |declared_hash| {
+ if (!std.mem.eql(u8, declared_hash, &actual_hex)) {
+ return f.fail(f.hash_tok, "hash mismatch: manifest declares {s} but the fetched package has {s}", .{
+ declared_hash, actual_hex,
+ });
+ }
+ } else {
+ const notes_len = 1;
+ try f.addErrorWithNotes(notes_len, f.location_tok, "dependency is missing hash field");
+ const notes_start = try eb.reserveNotes(notes_len);
+ eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
+ .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}),
+ }));
+ return error.PackageFetchFailed;
+ }
+
+ // Spawn a new fetch job for each dependency in the manifest file. Use
+ // a mutex and a hash map so that redundant jobs do not get queued up.
+ return queueJobsForDeps(f, .{ .hash = f.actual_hash });
+}
+
+/// This function populates `f.manifest` or leaves it `null`.
+fn loadManifest(f: *Fetch, pkg_root: Path) RunError!void {
+ const eb = &f.error_bundle;
+ const arena = f.arena_allocator.allocator();
+ const manifest_bytes = pkg_root.readFileAllocOptions(
+ arena,
+ Manifest.basename,
+ Manifest.max_bytes,
+ null,
+ 1,
+ 0,
+ ) catch |err| switch (err) {
+ error.FileNotFound => return,
+ else => |e| {
+ const file_path = try pkg_root.join(arena, .{Manifest.basename});
+ try eb.addRootErrorMessage(.{
+ .msg = try eb.printString("unable to load package manifest '{s}': {s}", .{
+ file_path, @errorName(e),
+ }),
+ .src_loc = .none,
+ .notes_len = 0,
+ });
+ },
+ };
+
+ var ast = try std.zig.Ast.parse(arena, manifest_bytes, .zon);
+ f.manifest_ast = ast;
+
+ if (ast.errors.len > 0) {
+ const file_path = try pkg_root.join(arena, .{Manifest.basename});
+ try main.putAstErrorsIntoBundle(arena, ast, file_path, eb);
+ return error.PackageFetchFailed;
+ }
+
+ f.manifest = try Manifest.parse(arena, ast);
+
+ if (f.manifest.errors.len > 0) {
+ const file_path = try pkg_root.join(arena, .{Manifest.basename});
+ const token_starts = ast.tokens.items(.start);
+
+ for (f.manifest.errors) |msg| {
+ const start_loc = ast.tokenLocation(0, msg.tok);
+
+ try eb.addRootErrorMessage(.{
+ .msg = try eb.addString(msg.msg),
+ .src_loc = try eb.addSourceLocation(.{
+ .src_path = try eb.addString(file_path),
+ .span_start = token_starts[msg.tok],
+ .span_end = @intCast(token_starts[msg.tok] + ast.tokenSlice(msg.tok).len),
+ .span_main = token_starts[msg.tok] + msg.off,
+ .line = @intCast(start_loc.line),
+ .column = @intCast(start_loc.column),
+ .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]),
+ }),
+ .notes_len = 0,
+ });
+ }
+ return error.PackageFetchFailed;
+ }
+}
+
+fn queueJobsForDeps(f: *Fetch, hash: Digest) RunError!void {
+ // If the package does not have a build.zig.zon file then there are no dependencies.
+ const manifest = f.manifest orelse return;
+
+ const new_fetches = nf: {
+ // Grab the new tasks into a temporary buffer so we can unlock that mutex
+ // as fast as possible.
+ // This overallocates any fetches that get skipped by the `continue` in the
+ // loop below.
+ const new_fetches = try f.arena.alloc(Fetch, manifest.dependencies.count());
+ var new_fetch_index: usize = 0;
+
+ f.job_queue.lock();
+ defer f.job_queue.unlock();
+
+ // It is impossible for there to be a collision here. Consider all three cases:
+ // * Correct hash is provided by manifest.
+ // - Redundant jobs are skipped in the loop below.
+ // * Incorrect has is provided by manifest.
+ // - Hash mismatch error emitted; `queueJobsForDeps` is not called.
+ // * Hash is not provided by manifest.
+ // - Hash missing error emitted; `queueJobsForDeps` is not called.
+ try f.job_queue.finish(hash, f, new_fetches.len);
+
+ for (manifest.dependencies.values()) |dep| {
+ const location: Location = switch (dep.location) {
+ .url => |url| .{ .remote = .{
+ .url = url,
+ .hash = if (dep.hash) |h| h[0..hex_multihash_len].* else null,
+ } },
+ .path => |path| .{ .relative_path = path },
+ };
+ const new_fetch = &new_fetches[new_fetch_index];
+ const already_done = f.job_queue.add(location, new_fetch);
+ if (already_done) continue;
+ new_fetch_index += 1;
+
+ new_fetch.* = .{
+ .gpa = f.gpa,
+ .arena = std.heap.ArenaAllocator.init(f.gpa),
+ .location = location,
+ .location_tok = dep.location_tok,
+ .hash_tok = dep.hash_tok,
+ .global_cache = f.global_cache,
+ .parent_package_root = f.package_root,
+ .parent_manifest_ast = f.manifest_ast.?,
+ .prog_node = f.prog_node,
+ .http_client = f.http_client,
+ .thread_pool = f.thread_pool,
+ .job_queue = f.job_queue,
+ .wait_group = f.wait_group,
+
+ .package_root = undefined,
+ .error_bundle = .{},
+ .manifest = null,
+ .manifest_ast = null,
+ .actual_hash = undefined,
+ .has_build_zig = false,
+ };
+ }
+
+ break :nf new_fetches[0..new_fetch_index];
+ };
+
+ // Now it's time to give tasks to the thread pool.
+ for (new_fetches) |new_fetch| {
+ f.wait_group.start();
+ f.thread_pool.spawn(workerRun, .{f}) catch |err| switch (err) {
+ error.OutOfMemory => {
+ new_fetch.oom_flag = true;
+ f.wait_group.finish();
+ continue;
+ },
+ };
+ }
+}
+
+fn workerRun(f: *Fetch) void {
+ defer f.wait_group.finish();
+ run(f) catch |err| switch (err) {
+ error.OutOfMemory => f.oom_flag = true,
+ error.FetchFailed => {}, // See `error_bundle`.
+ };
+}
+
+fn fail(f: *Fetch, msg_tok: std.zig.Ast.TokenIndex, msg_str: u32) RunError!void {
+ const ast = f.parent_manifest_ast;
+ const token_starts = ast.tokens.items(.start);
+ const start_loc = ast.tokenLocation(0, msg_tok);
+ const eb = &f.error_bundle;
+ const file_path = try f.parent_package_root.join(f.arena, Manifest.basename);
+ const msg_off = 0;
+
+ try eb.addRootErrorMessage(.{
+ .msg = msg_str,
+ .src_loc = try eb.addSourceLocation(.{
+ .src_path = try eb.addString(file_path),
+ .span_start = token_starts[msg_tok],
+ .span_end = @intCast(token_starts[msg_tok] + ast.tokenSlice(msg_tok).len),
+ .span_main = token_starts[msg_tok] + msg_off,
+ .line = @intCast(start_loc.line),
+ .column = @intCast(start_loc.column),
+ .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]),
+ }),
+ .notes_len = 0,
+ });
+
+ return error.FetchFailed;
+}
+
+const Resource = union(enum) {
+ file: fs.File,
+ http_request: std.http.Client.Request,
+ git_fetch_stream: git.Session.FetchStream,
+ dir: fs.IterableDir,
+};
+
+const FileType = enum {
+ tar,
+ @"tar.gz",
+ @"tar.xz",
+ git_pack,
+
+ fn fromPath(file_path: []const u8) ?FileType {
+ if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar;
+ if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) return .@"tar.gz";
+ if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz";
+ return null;
+ }
+
+ /// Parameter is a content-disposition header value.
+ fn fromContentDisposition(cd_header: []const u8) ?FileType {
+ const attach_end = ascii.indexOfIgnoreCase(cd_header, "attachment;") orelse
+ return null;
+
+ var value_start = ascii.indexOfIgnoreCasePos(cd_header, attach_end + 1, "filename") orelse
+ return null;
+ value_start += "filename".len;
+ if (cd_header[value_start] == '*') {
+ value_start += 1;
+ }
+ if (cd_header[value_start] != '=') return null;
+ value_start += 1;
+
+ var value_end = std.mem.indexOfPos(u8, cd_header, value_start, ";") orelse cd_header.len;
+ if (cd_header[value_end - 1] == '\"') {
+ value_end -= 1;
+ }
+ return fromPath(cd_header[value_start..value_end]);
+ }
+
+ test fromContentDisposition {
+ try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attaChment; FILENAME=\"stuff.tar.gz\"; size=42"));
+ try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; filename*=\"stuff.tar.gz\""));
+ try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("ATTACHMENT; filename=\"stuff.tar.xz\""));
+ try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("attachment; FileName=\"stuff.tar.xz\""));
+ try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz"));
+
+ try std.testing.expect(fromContentDisposition("attachment FileName=\"stuff.tar.gz\"") == null);
+ try std.testing.expect(fromContentDisposition("attachment; FileName=\"stuff.tar\"") == null);
+ try std.testing.expect(fromContentDisposition("attachment; FileName\"stuff.gz\"") == null);
+ try std.testing.expect(fromContentDisposition("attachment; size=42") == null);
+ try std.testing.expect(fromContentDisposition("inline; size=42") == null);
+ try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\"; attachment;") == null);
+ try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\";") == null);
+ }
+};
+
+fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
+ const gpa = f.gpa;
+ const arena = f.arena_allocator.allocator();
+ const eb = &f.error_bundle;
+
+ if (ascii.eqlIgnoreCase(uri.scheme, "file")) return .{
+ .file = try f.parent_package_root.openFile(uri.path, .{}),
+ };
+
+ if (ascii.eqlIgnoreCase(uri.scheme, "http") or
+ ascii.eqlIgnoreCase(uri.scheme, "https"))
+ {
+ var h = std.http.Headers{ .allocator = gpa };
+ defer h.deinit();
+
+ var req = try f.http_client.request(.GET, uri, h, .{});
+ errdefer req.deinit(); // releases more than memory
+
+ try req.start(.{});
+ try req.wait();
+
+ if (req.response.status != .ok) {
+ return f.fail(f.location_tok, "expected response status '200 OK' got '{s} {s}'", .{
+ @intFromEnum(req.response.status), req.response.status.phrase() orelse "",
+ });
+ }
+
+ return .{ .http_request = req };
+ }
+
+ if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or
+ ascii.eqlIgnoreCase(uri.scheme, "git+https"))
+ {
+ var transport_uri = uri;
+ transport_uri.scheme = uri.scheme["git+".len..];
+ var redirect_uri: []u8 = undefined;
+ var session: git.Session = .{ .transport = f.http_client, .uri = transport_uri };
+ session.discoverCapabilities(gpa, &redirect_uri) catch |e| switch (e) {
+ error.Redirected => {
+ defer gpa.free(redirect_uri);
+ return f.fail(f.location_tok, "repository moved to {s}", .{redirect_uri});
+ },
+ else => |other| return other,
+ };
+
+ const want_oid = want_oid: {
+ const want_ref = uri.fragment orelse "HEAD";
+ if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {}
+
+ const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref});
+ const want_ref_tag = try std.fmt.allocPrint(arena, "refs/tags/{s}", .{want_ref});
+
+ var ref_iterator = try session.listRefs(gpa, .{
+ .ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag },
+ .include_peeled = true,
+ });
+ defer ref_iterator.deinit();
+ while (try ref_iterator.next()) |ref| {
+ if (std.mem.eql(u8, ref.name, want_ref) or
+ std.mem.eql(u8, ref.name, want_ref_head) or
+ std.mem.eql(u8, ref.name, want_ref_tag))
+ {
+ break :want_oid ref.peeled orelse ref.oid;
+ }
+ }
+ return f.fail(f.location_tok, "ref not found: {s}", .{want_ref});
+ };
+ if (uri.fragment == null) {
+ const notes_len = 1;
+ try f.addErrorWithNotes(notes_len, f.location_tok, "url field is missing an explicit ref");
+ const notes_start = try eb.reserveNotes(notes_len);
+ eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
+ .msg = try eb.printString("try .url = \"{+/}#{}\",", .{
+ uri, std.fmt.fmtSliceHexLower(&want_oid),
+ }),
+ }));
+ return error.PackageFetchFailed;
+ }
+
+ var want_oid_buf: [git.fmt_oid_length]u8 = undefined;
+ _ = std.fmt.bufPrint(&want_oid_buf, "{}", .{
+ std.fmt.fmtSliceHexLower(&want_oid),
+ }) catch unreachable;
+ var fetch_stream = try session.fetch(gpa, &.{&want_oid_buf});
+ errdefer fetch_stream.deinit();
+
+ return .{ .git_fetch_stream = fetch_stream };
+ }
+
+ return f.fail(f.location_tok, "unsupported URL scheme: {s}", .{uri.scheme});
+}
+
+fn unpackResource(
+ f: *Fetch,
+ resource: *Resource,
+ uri_path: []const u8,
+ tmp_directory: Cache.Directory,
+) RunError!void {
+ const file_type = switch (resource.*) {
+ .file => FileType.fromPath(uri_path) orelse
+ return f.fail(f.location_tok, "unknown file type: '{s}'", .{uri_path}),
+
+ .http_request => |req| ft: {
+ // Content-Type takes first precedence.
+ const content_type = req.response.headers.getFirstValue("Content-Type") orelse
+ return f.fail(f.location_tok, "missing 'Content-Type' header", .{});
+
+ if (ascii.eqlIgnoreCase(content_type, "application/x-tar"))
+ return .tar;
+
+ if (ascii.eqlIgnoreCase(content_type, "application/gzip") or
+ ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
+ ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
+ {
+ return .@"tar.gz";
+ }
+
+ if (ascii.eqlIgnoreCase(content_type, "application/x-xz"))
+ return .@"tar.xz";
+
+ if (!ascii.eqlIgnoreCase(content_type, "application/octet-stream")) {
+ return f.fail(f.location_tok, "unrecognized 'Content-Type' header: '{s}'", .{
+ content_type,
+ });
+ }
+
+ // Next, the filename from 'content-disposition: attachment' takes precedence.
+ if (req.response.headers.getFirstValue("Content-Disposition")) |cd_header| {
+ break :ft FileType.fromContentDisposition(cd_header) orelse
+ return f.fail(
+ f.location_tok,
+ "unsupported Content-Disposition header value: '{s}' for Content-Type=application/octet-stream",
+ .{cd_header},
+ );
+ }
+
+ // Finally, the path from the URI is used.
+ break :ft FileType.fromPath(uri_path) orelse
+ return f.fail(f.location_tok, "unknown file type: '{s}'", .{uri_path});
+ },
+ .git_fetch_stream => return .git_pack,
+ .dir => |dir| {
+ try f.recursiveDirectoryCopy(dir, tmp_directory.handle);
+ return;
+ },
+ };
+
+ switch (file_type) {
+ .tar => try unpackTarball(f, tmp_directory.handle, resource.reader()),
+ .@"tar.gz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.gzip),
+ .@"tar.xz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.xz),
+ .git_pack => try unpackGitPack(f, tmp_directory.handle, resource),
+ }
+}
+
+fn unpackTarballCompressed(
+ f: *Fetch,
+ out_dir: fs.Dir,
+ resource: *Resource,
+ comptime Compression: type,
+) RunError!void {
+ const gpa = f.gpa;
+ const reader = resource.reader();
+ var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader);
+
+ var decompress = try Compression.decompress(gpa, br.reader());
+ defer decompress.deinit();
+
+ return unpackTarball(f, out_dir, decompress.reader());
+}
+
+fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void {
+ const eb = &f.error_bundle;
+
+ var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = f.gpa };
+ defer diagnostics.deinit();
+
+ try std.tar.pipeToFileSystem(out_dir, reader, .{
+ .diagnostics = &diagnostics,
+ .strip_components = 1,
+ // TODO: we would like to set this to executable_bit_only, but two
+ // things need to happen before that:
+ // 1. the tar implementation needs to support it
+ // 2. the hashing algorithm here needs to support detecting the is_executable
+ // bit on Windows from the ACLs (see the isExecutable function).
+ .mode_mode = .ignore,
+ .filter = .{ .exclude_empty_directories = true },
+ });
+
+ if (diagnostics.errors.items.len > 0) {
+ const notes_len: u32 = @intCast(diagnostics.errors.items.len);
+ try f.addErrorWithNotes(notes_len, f.location_tok, "unable to unpack tarball");
+ const notes_start = try eb.reserveNotes(notes_len);
+ for (diagnostics.errors.items, notes_start..) |item, note_i| {
+ switch (item) {
+ .unable_to_create_sym_link => |info| {
+ eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{
+ .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{
+ info.file_name, info.link_name, @errorName(info.code),
+ }),
+ }));
+ },
+ .unsupported_file_type => |info| {
+ eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{
+ .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{
+ info.file_name, @intFromEnum(info.file_type),
+ }),
+ }));
+ },
+ }
+ }
+ return error.InvalidTarball;
+ }
+}
+
+fn unpackGitPack(
+ f: *Fetch,
+ out_dir: fs.Dir,
+ resource: *Resource,
+ want_oid: git.Oid,
+) !void {
+ const eb = &f.error_bundle;
+ const gpa = f.gpa;
+ const reader = resource.reader();
+ // The .git directory is used to store the packfile and associated index, but
+ // we do not attempt to replicate the exact structure of a real .git
+ // directory, since that isn't relevant for fetching a package.
+ {
+ var pack_dir = try out_dir.makeOpenPath(".git", .{});
+ defer pack_dir.close();
+ var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true });
+ defer pack_file.close();
+ var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
+ try fifo.pump(reader.reader(), pack_file.writer());
+ try pack_file.sync();
+
+ var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });
+ defer index_file.close();
+ {
+ var index_prog_node = reader.prog_node.start("Index pack", 0);
+ defer index_prog_node.end();
+ index_prog_node.activate();
+ var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
+ try git.indexPack(gpa, pack_file, index_buffered_writer.writer());
+ try index_buffered_writer.flush();
+ try index_file.sync();
+ }
+
+ {
+ var checkout_prog_node = reader.prog_node.start("Checkout", 0);
+ defer checkout_prog_node.end();
+ checkout_prog_node.activate();
+ var repository = try git.Repository.init(gpa, pack_file, index_file);
+ defer repository.deinit();
+ var diagnostics: git.Diagnostics = .{ .allocator = gpa };
+ defer diagnostics.deinit();
+ try repository.checkout(out_dir, want_oid, &diagnostics);
+
+ if (diagnostics.errors.items.len > 0) {
+ const notes_len: u32 = @intCast(diagnostics.errors.items.len);
+ try f.addErrorWithNotes(notes_len, f.location_tok, "unable to unpack packfile");
+ const notes_start = try eb.reserveNotes(notes_len);
+ for (diagnostics.errors.items, notes_start..) |item, note_i| {
+ switch (item) {
+ .unable_to_create_sym_link => |info| {
+ eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{
+ .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{
+ info.file_name, info.link_name, @errorName(info.code),
+ }),
+ }));
+ },
+ }
+ }
+ return error.InvalidGitPack;
+ }
+ }
+ }
+
+ try out_dir.deleteTree(".git");
+}
+
+fn recursiveDirectoryCopy(f: *Fetch, dir: fs.IterableDir, tmp_dir: fs.Dir) RunError!void {
+ // Recursive directory copy.
+ var it = try dir.walk(f.gpa);
+ defer it.deinit();
+ while (try it.next()) |entry| {
+ switch (entry.kind) {
+ .directory => {}, // omit empty directories
+ .file => {
+ dir.dir.copyFile(
+ entry.path,
+ tmp_dir,
+ entry.path,
+ .{},
+ ) catch |err| switch (err) {
+ error.FileNotFound => {
+ if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname);
+ try dir.dir.copyFile(entry.path, tmp_dir, entry.path, .{});
+ },
+ else => |e| return e,
+ };
+ },
+ .sym_link => {
+ var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
+ const link_name = try dir.dir.readLink(entry.path, &buf);
+ // TODO: if this would create a symlink to outside
+ // the destination directory, fail with an error instead.
+ try tmp_dir.symLink(link_name, entry.path, .{});
+ },
+ else => return error.IllegalFileTypeInPackage,
+ }
+ }
+}
+
+pub fn renameTmpIntoCache(
+ cache_dir: fs.Dir,
+ tmp_dir_sub_path: []const u8,
+ dest_dir_sub_path: []const u8,
+) !void {
+ assert(dest_dir_sub_path[1] == fs.path.sep);
+ var handled_missing_dir = false;
+ while (true) {
+ cache_dir.rename(tmp_dir_sub_path, dest_dir_sub_path) catch |err| switch (err) {
+ error.FileNotFound => {
+ if (handled_missing_dir) return err;
+ cache_dir.makeDir(dest_dir_sub_path[0..1]) catch |mkd_err| switch (mkd_err) {
+ error.PathAlreadyExists => handled_missing_dir = true,
+ else => |e| return e,
+ };
+ continue;
+ },
+ error.PathAlreadyExists, error.AccessDenied => {
+ // Package has been already downloaded and may already be in use on the system.
+ cache_dir.deleteTree(tmp_dir_sub_path) catch {
+ // Garbage files leftover in zig-cache/tmp/ is, as they say
+ // on Star Trek, "operating within normal parameters".
+ };
+ },
+ else => |e| return e,
+ };
+ break;
+ }
+}
+
+/// Assumes that files not included in the package have already been filtered
+/// prior to calling this function. This ensures that files not protected by
+/// the hash are not present on the file system. Empty directories are *not
+/// hashed* and must not be present on the file system when calling this
+/// function.
+fn computeHash(f: *Fetch, pkg_dir: fs.IterableDir, filter: Filter) RunError!Digest {
+ // All the path name strings need to be in memory for sorting.
+ const arena = f.arena_allocator.allocator();
+ const gpa = f.gpa;
+
+ // Collect all files, recursively, then sort.
+ var all_files = std.ArrayList(*HashedFile).init(gpa);
+ defer all_files.deinit();
+
+ var walker = try pkg_dir.walk(gpa);
+ defer walker.deinit();
+
+ {
+ // The final hash will be a hash of each file hashed independently. This
+ // allows hashing in parallel.
+ var wait_group: WaitGroup = .{};
+ // `computeHash` is called from a worker thread so there must not be
+ // any waiting without working or a deadlock could occur.
+ defer wait_group.waitAndWork();
+
+ while (try walker.next()) |entry| {
+ _ = filter; // TODO: apply filter rules here
+
+ const kind: HashedFile.Kind = switch (entry.kind) {
+ .directory => continue,
+ .file => .file,
+ .sym_link => .sym_link,
+ else => return error.IllegalFileTypeInPackage,
+ };
+
+ if (std.mem.eql(u8, entry.path, build_zig_basename))
+ f.has_build_zig = true;
+
+ const hashed_file = try arena.create(HashedFile);
+ const fs_path = try arena.dupe(u8, entry.path);
+ hashed_file.* = .{
+ .fs_path = fs_path,
+ .normalized_path = try normalizePath(arena, fs_path),
+ .kind = kind,
+ .hash = undefined, // to be populated by the worker
+ .failure = undefined, // to be populated by the worker
+ };
+ wait_group.start();
+ try f.thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group });
+
+ try all_files.append(hashed_file);
+ }
+ }
+
+ std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan);
+
+ var hasher = Manifest.Hash.init(.{});
+ var any_failures = false;
+ const eb = &f.error_bundle;
+ for (all_files.items) |hashed_file| {
+ hashed_file.failure catch |err| {
+ any_failures = true;
+ try eb.addRootErrorMessage(.{
+ .msg = try eb.printString("unable to hash: {s}", .{@errorName(err)}),
+ .src_loc = try eb.addSourceLocation(.{
+ .src_path = try eb.addString(hashed_file.fs_path),
+ .span_start = 0,
+ .span_end = 0,
+ .span_main = 0,
+ }),
+ .notes_len = 0,
+ });
+ };
+ hasher.update(&hashed_file.hash);
+ }
+ if (any_failures) return error.FetchFailed;
+ return hasher.finalResult();
+}
+
+fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void {
+ defer wg.finish();
+ hashed_file.failure = hashFileFallible(dir, hashed_file);
+}
+
+fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
+ var buf: [8000]u8 = undefined;
+ var hasher = Manifest.Hash.init(.{});
+ hasher.update(hashed_file.normalized_path);
+ switch (hashed_file.kind) {
+ .file => {
+ var file = try dir.openFile(hashed_file.fs_path, .{});
+ defer file.close();
+ hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
+ while (true) {
+ const bytes_read = try file.read(&buf);
+ if (bytes_read == 0) break;
+ hasher.update(buf[0..bytes_read]);
+ }
+ },
+ .sym_link => {
+ const link_name = try dir.readLink(hashed_file.fs_path, &buf);
+ hasher.update(link_name);
+ },
+ }
+ hasher.final(&hashed_file.hash);
+}
+
+fn isExecutable(file: fs.File) !bool {
+ if (builtin.os.tag == .windows) {
+ // TODO check the ACL on Windows.
+ // Until this is implemented, this could be a false negative on
+ // Windows, which is why we do not yet set executable_bit_only above
+ // when unpacking the tarball.
+ return false;
+ } else {
+ const stat = try file.stat();
+ return (stat.mode & std.os.S.IXUSR) != 0;
+ }
+}
+
+const HashedFile = struct {
+ fs_path: []const u8,
+ normalized_path: []const u8,
+ hash: Digest,
+ failure: Error!void,
+ kind: Kind,
+
+ const Error =
+ fs.File.OpenError ||
+ fs.File.ReadError ||
+ fs.File.StatError ||
+ fs.Dir.ReadLinkError;
+
+ const Kind = enum { file, sym_link };
+
+ fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
+ _ = context;
+ return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
+ }
+};
+
+/// Make a file system path identical independently of operating system path inconsistencies.
+/// This converts backslashes into forward slashes.
+fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 {
+ const canonical_sep = '/';
+
+ if (fs.path.sep == canonical_sep)
+ return fs_path;
+
+ const normalized = try arena.dupe(u8, fs_path);
+ for (normalized) |*byte| {
+ switch (byte.*) {
+ fs.path.sep => byte.* = canonical_sep,
+ else => continue,
+ }
+ }
+ return normalized;
+}
+
+pub const Filter = struct {
+ include_paths: std.StringArrayHashMapUnmanaged(void) = .{},
+
+ /// sub_path is relative to the tarball root.
+ pub fn includePath(self: Filter, sub_path: []const u8) bool {
+ if (self.include_paths.count() == 0) return true;
+ if (self.include_paths.contains("")) return true;
+ if (self.include_paths.contains(sub_path)) return true;
+
+ // Check if any included paths are parent directories of sub_path.
+ var dirname = sub_path;
+ while (std.fs.path.dirname(sub_path)) |next_dirname| {
+ if (self.include_paths.contains(sub_path)) return true;
+ dirname = next_dirname;
+ }
+
+ return false;
+ }
+};
+
+const build_zig_basename = @import("../Package.zig").build_zig_basename;
+const hex_multihash_len = 2 * Manifest.multihash_len;
+
+// These are random bytes.
+const package_hash_prefix_cached: [8]u8 = &.{ 0x53, 0x7e, 0xfa, 0x94, 0x65, 0xe9, 0xf8, 0x73 };
+const package_hash_prefix_project: [8]u8 = &.{ 0xe1, 0x25, 0xee, 0xfa, 0xa6, 0x17, 0x38, 0xcc };
+
+const builtin = @import("builtin");
+const std = @import("std");
+const fs = std.fs;
+const assert = std.debug.assert;
+const ascii = std.ascii;
+const Allocator = std.mem.Allocator;
+const Cache = std.Build.Cache;
+const ThreadPool = std.Thread.Pool;
+const WaitGroup = std.Thread.WaitGroup;
+const Manifest = @import("../Manifest.zig");
+const Fetch = @This();
+const main = @import("../main.zig");
+const git = @import("../git.zig");
src/Package/hash.zig
@@ -1,153 +0,0 @@
-const builtin = @import("builtin");
-const std = @import("std");
-const fs = std.fs;
-const ThreadPool = std.Thread.Pool;
-const WaitGroup = std.Thread.WaitGroup;
-const Allocator = std.mem.Allocator;
-
-const Hash = @import("../Manifest.zig").Hash;
-
-pub fn compute(thread_pool: *ThreadPool, pkg_dir: fs.IterableDir) ![Hash.digest_length]u8 {
- const gpa = thread_pool.allocator;
-
- // We'll use an arena allocator for the path name strings since they all
- // need to be in memory for sorting.
- var arena_instance = std.heap.ArenaAllocator.init(gpa);
- defer arena_instance.deinit();
- const arena = arena_instance.allocator();
-
- // TODO: delete files not included in the package prior to computing the package hash.
- // for example, if the ini file has directives to include/not include certain files,
- // apply those rules directly to the filesystem right here. This ensures that files
- // not protected by the hash are not present on the file system.
-
- // Collect all files, recursively, then sort.
- var all_files = std.ArrayList(*HashedFile).init(gpa);
- defer all_files.deinit();
-
- var walker = try pkg_dir.walk(gpa);
- defer walker.deinit();
-
- {
- // The final hash will be a hash of each file hashed independently. This
- // allows hashing in parallel.
- var wait_group: WaitGroup = .{};
- defer wait_group.wait();
-
- while (try walker.next()) |entry| {
- const kind: HashedFile.Kind = switch (entry.kind) {
- .directory => continue,
- .file => .file,
- .sym_link => .sym_link,
- else => return error.IllegalFileTypeInPackage,
- };
- const hashed_file = try arena.create(HashedFile);
- const fs_path = try arena.dupe(u8, entry.path);
- hashed_file.* = .{
- .fs_path = fs_path,
- .normalized_path = try normalizePath(arena, fs_path),
- .kind = kind,
- .hash = undefined, // to be populated by the worker
- .failure = undefined, // to be populated by the worker
- };
- wait_group.start();
- try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group });
-
- try all_files.append(hashed_file);
- }
- }
-
- std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan);
-
- var hasher = Hash.init(.{});
- var any_failures = false;
- for (all_files.items) |hashed_file| {
- hashed_file.failure catch |err| {
- any_failures = true;
- std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) });
- };
- hasher.update(&hashed_file.hash);
- }
- if (any_failures) return error.PackageHashUnavailable;
- return hasher.finalResult();
-}
-
-const HashedFile = struct {
- fs_path: []const u8,
- normalized_path: []const u8,
- hash: [Hash.digest_length]u8,
- failure: Error!void,
- kind: Kind,
-
- const Error =
- fs.File.OpenError ||
- fs.File.ReadError ||
- fs.File.StatError ||
- fs.Dir.ReadLinkError;
-
- const Kind = enum { file, sym_link };
-
- fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool {
- _ = context;
- return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path);
- }
-};
-
-/// Make a file system path identical independently of operating system path inconsistencies.
-/// This converts backslashes into forward slashes.
-fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 {
- const canonical_sep = '/';
-
- if (fs.path.sep == canonical_sep)
- return fs_path;
-
- const normalized = try arena.dupe(u8, fs_path);
- for (normalized) |*byte| {
- switch (byte.*) {
- fs.path.sep => byte.* = canonical_sep,
- else => continue,
- }
- }
- return normalized;
-}
-
-fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void {
- defer wg.finish();
- hashed_file.failure = hashFileFallible(dir, hashed_file);
-}
-
-fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void {
- var buf: [8000]u8 = undefined;
- var hasher = Hash.init(.{});
- hasher.update(hashed_file.normalized_path);
- switch (hashed_file.kind) {
- .file => {
- var file = try dir.openFile(hashed_file.fs_path, .{});
- defer file.close();
- hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) });
- while (true) {
- const bytes_read = try file.read(&buf);
- if (bytes_read == 0) break;
- hasher.update(buf[0..bytes_read]);
- }
- },
- .sym_link => {
- const link_name = try dir.readLink(hashed_file.fs_path, &buf);
- hasher.update(link_name);
- },
- }
- hasher.final(&hashed_file.hash);
-}
-
-fn isExecutable(file: fs.File) !bool {
- if (builtin.os.tag == .windows) {
- // TODO check the ACL on Windows.
- // Until this is implemented, this could be a false negative on
- // Windows, which is why we do not yet set executable_bit_only above
- // when unpacking the tarball.
- return false;
- } else {
- const stat = try file.stat();
- return (stat.mode & std.os.S.IXUSR) != 0;
- }
-}
src/main.zig
@@ -4714,7 +4714,7 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
defer if (cleanup_build_dir) |*dir| dir.close();
const cwd_path = try process.getCwdAlloc(arena);
- const build_zig_basename = if (build_file) |bf| fs.path.basename(bf) else "build.zig";
+ const build_zig_basename = if (build_file) |bf| fs.path.basename(bf) else Package.build_zig_basename;
const build_directory: Compilation.Directory = blk: {
if (build_file) |bf| {
if (fs.path.dirname(bf)) |dirname| {
src/Package.zig
@@ -15,9 +15,9 @@ const Compilation = @import("Compilation.zig");
const Module = @import("Module.zig");
const Cache = std.Build.Cache;
const build_options = @import("build_options");
-const git = @import("git.zig");
-const computePackageHash = @import("Package/hash.zig").compute;
+const Fetch = @import("Package/Fetch.zig");
+pub const build_zig_basename = "build.zig";
pub const Manifest = @import("Manifest.zig");
pub const Table = std.StringHashMapUnmanaged(*Package);
@@ -213,223 +213,6 @@ pub fn getName(target: *const Package, gpa: Allocator, mod: Module) ![]const u8
return buf.toOwnedSlice();
}
-pub const build_zig_basename = "build.zig";
-
-/// Fetches a package and all of its dependencies recursively. Writes the
-/// corresponding datastructures for the build runner into `dependencies_source`.
-pub fn fetchAndAddDependencies(
- pkg: *Package,
- deps_pkg: *Package,
- arena: Allocator,
- thread_pool: *ThreadPool,
- http_client: *std.http.Client,
- directory: Compilation.Directory,
- global_cache_directory: Compilation.Directory,
- local_cache_directory: Compilation.Directory,
- dependencies_source: *std.ArrayList(u8),
- error_bundle: *std.zig.ErrorBundle.Wip,
- all_modules: *AllModules,
- root_prog_node: *std.Progress.Node,
- /// null for the root package
- this_hash: ?[]const u8,
-) !void {
- const max_bytes = 10 * 1024 * 1024;
- const gpa = thread_pool.allocator;
- const build_zig_zon_bytes = directory.handle.readFileAllocOptions(
- arena,
- Manifest.basename,
- max_bytes,
- null,
- 1,
- 0,
- ) catch |err| switch (err) {
- error.FileNotFound => {
- // Handle the same as no dependencies.
- if (this_hash) |hash| {
- try dependencies_source.writer().print(
- \\ pub const {} = struct {{
- \\ pub const build_root = "{}";
- \\ pub const build_zig = @import("{}");
- \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}};
- \\ }};
- \\
- , .{
- std.zig.fmtId(hash),
- std.zig.fmtEscapes(pkg.root_src_directory.path.?),
- std.zig.fmtEscapes(hash),
- });
- } else {
- try dependencies_source.writer().writeAll(
- \\pub const packages = struct {};
- \\pub const root_deps: []const struct { []const u8, []const u8 } = &.{};
- \\
- );
- }
- return;
- },
- else => |e| return e,
- };
-
- var ast = try std.zig.Ast.parse(gpa, build_zig_zon_bytes, .zon);
- defer ast.deinit(gpa);
-
- if (ast.errors.len > 0) {
- const file_path = try directory.join(arena, &.{Manifest.basename});
- try main.putAstErrorsIntoBundle(gpa, ast, file_path, error_bundle);
- return error.PackageFetchFailed;
- }
-
- var manifest = try Manifest.parse(gpa, ast);
- defer manifest.deinit(gpa);
-
- if (manifest.errors.len > 0) {
- const file_path = try directory.join(arena, &.{Manifest.basename});
- for (manifest.errors) |msg| {
- const str = try error_bundle.addString(msg.msg);
- try Report.addErrorMessage(&ast, file_path, error_bundle, 0, str, msg.tok, msg.off);
- }
- return error.PackageFetchFailed;
- }
-
- const report: Report = .{
- .ast = &ast,
- .directory = directory,
- .error_bundle = error_bundle,
- };
-
- for (manifest.dependencies.values()) |dep| {
- // If the hash is invalid, let errors happen later
- // We only want to add these for progress reporting
- const hash = dep.hash orelse continue;
- if (hash.len != hex_multihash_len) continue;
- const gop = try all_modules.getOrPut(gpa, hash[0..hex_multihash_len].*);
- if (!gop.found_existing) gop.value_ptr.* = null;
- }
-
- root_prog_node.setEstimatedTotalItems(all_modules.count());
-
- if (this_hash == null) {
- try dependencies_source.writer().writeAll("pub const packages = struct {\n");
- }
-
- for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, *dep| {
- var fetch_location = try FetchLocation.init(gpa, dep.*, directory, report);
- defer fetch_location.deinit(gpa);
-
- // Directories do not provide a hash in build.zig.zon.
- // Hash the path to the module rather than its contents.
- const sub_mod, const found_existing = if (fetch_location == .directory)
- try getDirectoryModule(gpa, fetch_location, directory, all_modules, dep, report)
- else
- try getCachedPackage(
- gpa,
- global_cache_directory,
- dep.*,
- all_modules,
- root_prog_node,
- ) orelse .{
- try fetchAndUnpack(
- fetch_location,
- thread_pool,
- http_client,
- directory,
- global_cache_directory,
- dep.*,
- report,
- all_modules,
- root_prog_node,
- name,
- ),
- false,
- };
-
- assert(dep.hash != null);
-
- switch (sub_mod) {
- .zig_pkg => |sub_pkg| {
- if (!found_existing) {
- try sub_pkg.fetchAndAddDependencies(
- deps_pkg,
- arena,
- thread_pool,
- http_client,
- sub_pkg.root_src_directory,
- global_cache_directory,
- local_cache_directory,
- dependencies_source,
- error_bundle,
- all_modules,
- root_prog_node,
- dep.hash.?,
- );
- }
-
- try pkg.add(gpa, name, sub_pkg);
- if (deps_pkg.table.get(dep.hash.?)) |other_sub| {
- // This should be the same package (and hence module) since it's the same hash
- // TODO: dedup multiple versions of the same package
- assert(other_sub == sub_pkg);
- } else {
- try deps_pkg.add(gpa, dep.hash.?, sub_pkg);
- }
- },
- .non_zig_pkg => |sub_pkg| {
- if (!found_existing) {
- try dependencies_source.writer().print(
- \\ pub const {} = struct {{
- \\ pub const build_root = "{}";
- \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}};
- \\ }};
- \\
- , .{
- std.zig.fmtId(dep.hash.?),
- std.zig.fmtEscapes(sub_pkg.root_src_directory.path.?),
- });
- }
- },
- }
- }
-
- if (this_hash) |hash| {
- try dependencies_source.writer().print(
- \\ pub const {} = struct {{
- \\ pub const build_root = "{}";
- \\ pub const build_zig = @import("{}");
- \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{
- \\
- , .{
- std.zig.fmtId(hash),
- std.zig.fmtEscapes(pkg.root_src_directory.path.?),
- std.zig.fmtEscapes(hash),
- });
- for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| {
- try dependencies_source.writer().print(
- " .{{ \"{}\", \"{}\" }},\n",
- .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(dep.hash.?) },
- );
- }
- try dependencies_source.writer().writeAll(
- \\ };
- \\ };
- \\
- );
- } else {
- try dependencies_source.writer().writeAll(
- \\};
- \\
- \\pub const root_deps: []const struct { []const u8, []const u8 } = &.{
- \\
- );
- for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| {
- try dependencies_source.writer().print(
- " .{{ \"{}\", \"{}\" }},\n",
- .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(dep.hash.?) },
- );
- }
- try dependencies_source.writer().writeAll("};\n");
- }
-}
-
pub fn createFilePkg(
gpa: Allocator,
cache_directory: Compilation.Directory,
@@ -450,484 +233,11 @@ pub fn createFilePkg(
const hex_digest = hh.final();
const o_dir_sub_path = "o" ++ fs.path.sep_str ++ hex_digest;
- try renameTmpIntoCache(cache_directory.handle, tmp_dir_sub_path, o_dir_sub_path);
+ try Fetch.renameTmpIntoCache(cache_directory.handle, tmp_dir_sub_path, o_dir_sub_path);
return createWithDir(gpa, cache_directory, o_dir_sub_path, basename);
}
-pub const Report = struct {
- ast: ?*const std.zig.Ast,
- directory: Compilation.Directory,
- error_bundle: *std.zig.ErrorBundle.Wip,
-
- fn fail(
- report: Report,
- tok: std.zig.Ast.TokenIndex,
- comptime fmt_string: []const u8,
- fmt_args: anytype,
- ) error{ PackageFetchFailed, OutOfMemory } {
- const msg = try report.error_bundle.printString(fmt_string, fmt_args);
- return failMsg(report, tok, msg);
- }
-
- fn failMsg(
- report: Report,
- tok: std.zig.Ast.TokenIndex,
- msg: u32,
- ) error{ PackageFetchFailed, OutOfMemory } {
- const gpa = report.error_bundle.gpa;
-
- const file_path = try report.directory.join(gpa, &.{Manifest.basename});
- defer gpa.free(file_path);
-
- const eb = report.error_bundle;
-
- if (report.ast) |ast| {
- try addErrorMessage(ast, file_path, eb, 0, msg, tok, 0);
- } else {
- try eb.addRootErrorMessage(.{
- .msg = msg,
- .src_loc = .none,
- .notes_len = 0,
- });
- }
-
- return error.PackageFetchFailed;
- }
-
- fn addErrorWithNotes(
- report: Report,
- notes_len: u32,
- msg: Manifest.ErrorMessage,
- ) error{OutOfMemory}!void {
- const eb = report.error_bundle;
- const msg_str = try eb.addString(msg.msg);
- if (report.ast) |ast| {
- const gpa = eb.gpa;
- const file_path = try report.directory.join(gpa, &.{Manifest.basename});
- defer gpa.free(file_path);
- return addErrorMessage(ast, file_path, eb, notes_len, msg_str, msg.tok, msg.off);
- } else {
- return eb.addRootErrorMessage(.{
- .msg = msg_str,
- .src_loc = .none,
- .notes_len = notes_len,
- });
- }
- }
-
- fn addErrorMessage(
- ast: *const std.zig.Ast,
- file_path: []const u8,
- eb: *std.zig.ErrorBundle.Wip,
- notes_len: u32,
- msg_str: u32,
- msg_tok: std.zig.Ast.TokenIndex,
- msg_off: u32,
- ) error{OutOfMemory}!void {
- const token_starts = ast.tokens.items(.start);
- const start_loc = ast.tokenLocation(0, msg_tok);
-
- try eb.addRootErrorMessage(.{
- .msg = msg_str,
- .src_loc = try eb.addSourceLocation(.{
- .src_path = try eb.addString(file_path),
- .span_start = token_starts[msg_tok],
- .span_end = @as(u32, @intCast(token_starts[msg_tok] + ast.tokenSlice(msg_tok).len)),
- .span_main = token_starts[msg_tok] + msg_off,
- .line = @intCast(start_loc.line),
- .column = @as(u32, @intCast(start_loc.column)),
- .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]),
- }),
- .notes_len = notes_len,
- });
- }
-};
-
-pub const FetchLocation = union(enum) {
- /// The relative path to a file or directory.
- /// This may be a file that requires unpacking (such as a .tar.gz),
- /// or the path to the root directory of a package.
- file: []const u8,
- directory: []const u8,
- http_request: std.Uri,
- git_request: std.Uri,
-
- pub fn init(
- gpa: Allocator,
- dep: Manifest.Dependency,
- root_dir: Compilation.Directory,
- report: Report,
- ) !FetchLocation {
- switch (dep.location) {
- .url => |url| {
- const uri = std.Uri.parse(url) catch |err| switch (err) {
- error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI", .{}),
- else => return err,
- };
- return initUri(uri, dep.location_tok, report);
- },
- .path => |path| {
- if (fs.path.isAbsolute(path)) {
- return report.fail(dep.location_tok, "absolute paths are not allowed. Use a relative path instead", .{});
- }
-
- const is_dir = isDirectory(root_dir, path) catch |err| switch (err) {
- error.FileNotFound => return report.fail(dep.location_tok, "file not found: {s}", .{path}),
- else => return err,
- };
-
- return if (is_dir)
- .{ .directory = try gpa.dupe(u8, path) }
- else
- .{ .file = try gpa.dupe(u8, path) };
- },
- }
- }
-
- pub fn initUri(uri: std.Uri, location_tok: std.zig.Ast.TokenIndex, report: Report) !FetchLocation {
- if (ascii.eqlIgnoreCase(uri.scheme, "file")) {
- return report.fail(location_tok, "'file' scheme is not allowed for URLs. Use '.path' instead", .{});
- } else if (ascii.eqlIgnoreCase(uri.scheme, "http") or ascii.eqlIgnoreCase(uri.scheme, "https")) {
- return .{ .http_request = uri };
- } else if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or ascii.eqlIgnoreCase(uri.scheme, "git+https")) {
- return .{ .git_request = uri };
- } else {
- return report.fail(location_tok, "unsupported URL scheme: {s}", .{uri.scheme});
- }
- }
-
- pub fn deinit(f: *FetchLocation, gpa: Allocator) void {
- switch (f.*) {
- .file, .directory => |path| gpa.free(path),
- .http_request, .git_request => {},
- }
- f.* = undefined;
- }
-
- pub fn fetch(
- f: FetchLocation,
- gpa: Allocator,
- root_dir: Compilation.Directory,
- http_client: *std.http.Client,
- dep_location_tok: std.zig.Ast.TokenIndex,
- report: Report,
- ) !ReadableResource {
- switch (f) {
- .file => |file| {
- const owned_path = try gpa.dupe(u8, file);
- errdefer gpa.free(owned_path);
- return .{
- .path = owned_path,
- .resource = .{ .file = try root_dir.handle.openFile(file, .{}) },
- };
- },
- .http_request => |uri| {
- var h = std.http.Headers{ .allocator = gpa };
- defer h.deinit();
-
- var req = try http_client.request(.GET, uri, h, .{});
- errdefer req.deinit();
-
- try req.start(.{});
- try req.wait();
-
- if (req.response.status != .ok) {
- return report.fail(dep_location_tok, "expected response status '200 OK' got '{} {s}'", .{
- @intFromEnum(req.response.status),
- req.response.status.phrase() orelse "",
- });
- }
-
- return .{
- .path = try gpa.dupe(u8, uri.path),
- .resource = .{ .http_request = req },
- };
- },
- .git_request => |uri| {
- var transport_uri = uri;
- transport_uri.scheme = uri.scheme["git+".len..];
- var redirect_uri: []u8 = undefined;
- var session: git.Session = .{ .transport = http_client, .uri = transport_uri };
- session.discoverCapabilities(gpa, &redirect_uri) catch |e| switch (e) {
- error.Redirected => {
- defer gpa.free(redirect_uri);
- return report.fail(dep_location_tok, "repository moved to {s}", .{redirect_uri});
- },
- else => |other| return other,
- };
-
- const want_oid = want_oid: {
- const want_ref = uri.fragment orelse "HEAD";
- if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {}
-
- const want_ref_head = try std.fmt.allocPrint(gpa, "refs/heads/{s}", .{want_ref});
- defer gpa.free(want_ref_head);
- const want_ref_tag = try std.fmt.allocPrint(gpa, "refs/tags/{s}", .{want_ref});
- defer gpa.free(want_ref_tag);
-
- var ref_iterator = try session.listRefs(gpa, .{
- .ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag },
- .include_peeled = true,
- });
- defer ref_iterator.deinit();
- while (try ref_iterator.next()) |ref| {
- if (mem.eql(u8, ref.name, want_ref) or
- mem.eql(u8, ref.name, want_ref_head) or
- mem.eql(u8, ref.name, want_ref_tag))
- {
- break :want_oid ref.peeled orelse ref.oid;
- }
- }
- return report.fail(dep_location_tok, "ref not found: {s}", .{want_ref});
- };
- if (uri.fragment == null) {
- const notes_len = 1;
- try report.addErrorWithNotes(notes_len, .{
- .tok = dep_location_tok,
- .off = 0,
- .msg = "url field is missing an explicit ref",
- });
- const eb = report.error_bundle;
- const notes_start = try eb.reserveNotes(notes_len);
- eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
- .msg = try eb.printString("try .url = \"{+/}#{}\",", .{ uri, std.fmt.fmtSliceHexLower(&want_oid) }),
- }));
- return error.PackageFetchFailed;
- }
-
- var want_oid_buf: [git.fmt_oid_length]u8 = undefined;
- _ = std.fmt.bufPrint(&want_oid_buf, "{}", .{std.fmt.fmtSliceHexLower(&want_oid)}) catch unreachable;
- var fetch_stream = try session.fetch(gpa, &.{&want_oid_buf});
- errdefer fetch_stream.deinit();
-
- return .{
- .path = try gpa.dupe(u8, &want_oid_buf),
- .resource = .{ .git_fetch_stream = fetch_stream },
- };
- },
- .directory => unreachable, // Directories do not require fetching
- }
- }
-};
-
-pub const ReadableResource = struct {
- path: []const u8,
- resource: union(enum) {
- file: fs.File,
- http_request: std.http.Client.Request,
- git_fetch_stream: git.Session.FetchStream,
- dir: fs.IterableDir,
- },
-
- /// Unpack the package into the global cache directory.
- /// If `ps` does not require unpacking (for example, if it is a directory), then no caching is performed.
- /// In either case, the hash is computed and returned along with the path to the package.
- pub fn unpack(
- rr: *ReadableResource,
- allocator: Allocator,
- thread_pool: *ThreadPool,
- global_cache_directory: Compilation.Directory,
- dep_location_tok: std.zig.Ast.TokenIndex,
- report: Report,
- pkg_prog_node: *std.Progress.Node,
- ) !PackageLocation {
- switch (rr.resource) {
- inline .file, .http_request, .git_fetch_stream, .dir => |*r, tag| {
- const s = fs.path.sep_str;
- const rand_int = std.crypto.random.int(u64);
- const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int);
-
- const actual_hash = h: {
- var tmp_directory: Compilation.Directory = d: {
- const path = try global_cache_directory.join(allocator, &.{tmp_dir_sub_path});
- errdefer allocator.free(path);
-
- const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{});
- errdefer iterable_dir.close();
-
- break :d .{
- .path = path,
- .handle = iterable_dir.dir,
- };
- };
- defer tmp_directory.closeAndFree(allocator);
-
- if (tag != .dir) {
- const opt_content_length = try rr.getSize();
-
- var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{
- .child_reader = r.reader(),
- .prog_node = pkg_prog_node,
- .unit = if (opt_content_length) |content_length| unit: {
- const kib = content_length / 1024;
- const mib = kib / 1024;
- if (mib > 0) {
- pkg_prog_node.setEstimatedTotalItems(@intCast(mib));
- pkg_prog_node.setUnit("MiB");
- break :unit .mib;
- } else {
- pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib)));
- pkg_prog_node.setUnit("KiB");
- break :unit .kib;
- }
- } else .any,
- };
-
- switch (try rr.getFileType(dep_location_tok, report)) {
- .tar => try unpackTarball(allocator, prog_reader.reader(), tmp_directory.handle, dep_location_tok, report),
- .@"tar.gz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.gzip),
- .@"tar.xz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.xz),
- .git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle, dep_location_tok, report),
- }
- } else {
- // Recursive directory copy.
- var it = try r.walk(allocator);
- defer it.deinit();
- while (try it.next()) |entry| {
- switch (entry.kind) {
- .directory => try tmp_directory.handle.makePath(entry.path),
- .file => try r.dir.copyFile(
- entry.path,
- tmp_directory.handle,
- entry.path,
- .{},
- ),
- .sym_link => {
- var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
- const link_name = try r.dir.readLink(entry.path, &buf);
- // TODO: if this would create a symlink to outside
- // the destination directory, fail with an error instead.
- try tmp_directory.handle.symLink(link_name, entry.path, .{});
- },
- else => return error.IllegalFileTypeInPackage,
- }
- }
- }
-
- break :h try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle });
- };
-
- const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash);
- const unpacked_path = try global_cache_directory.join(allocator, &.{pkg_dir_sub_path});
- defer allocator.free(unpacked_path);
-
- const relative_unpacked_path = try fs.path.relative(allocator, global_cache_directory.path.?, unpacked_path);
- errdefer allocator.free(relative_unpacked_path);
- try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, relative_unpacked_path);
-
- return .{
- .hash = actual_hash,
- .relative_unpacked_path = relative_unpacked_path,
- };
- },
- }
- }
-
- const FileType = enum {
- tar,
- @"tar.gz",
- @"tar.xz",
- git_pack,
- };
-
- pub fn getSize(rr: ReadableResource) !?u64 {
- switch (rr.resource) {
- .file => |f| return (try f.metadata()).size(),
- // TODO: Handle case of chunked content-length
- .http_request => |req| return req.response.content_length,
- .git_fetch_stream => |stream| return stream.request.response.content_length,
- .dir => unreachable,
- }
- }
-
- pub fn getFileType(
- rr: ReadableResource,
- dep_location_tok: std.zig.Ast.TokenIndex,
- report: Report,
- ) !FileType {
- switch (rr.resource) {
- .file => {
- return fileTypeFromPath(rr.path) orelse
- return report.fail(dep_location_tok, "unknown file type", .{});
- },
- .http_request => |req| {
- const content_type = req.response.headers.getFirstValue("Content-Type") orelse
- return report.fail(dep_location_tok, "missing 'Content-Type' header", .{});
-
- // If the response has a different content type than the URI indicates, override
- // the previously assumed file type.
- if (ascii.eqlIgnoreCase(content_type, "application/x-tar")) return .tar;
-
- return if (ascii.eqlIgnoreCase(content_type, "application/gzip") or
- ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
- ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
- .@"tar.gz"
- else if (ascii.eqlIgnoreCase(content_type, "application/x-xz"))
- .@"tar.xz"
- else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) ty: {
- // support gitlab tarball urls such as https://gitlab.com/<namespace>/<project>/-/archive/<sha>/<project>-<sha>.tar.gz
- // whose content-disposition header is: 'attachment; filename="<project>-<sha>.tar.gz"'
- const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse
- return report.fail(dep_location_tok, "missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{});
- break :ty getAttachmentType(content_disposition) orelse
- return report.fail(dep_location_tok, "unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition});
- } else return report.fail(dep_location_tok, "unrecognized value for 'Content-Type' header: {s}", .{content_type});
- },
- .git_fetch_stream => return .git_pack,
- .dir => unreachable,
- }
- }
-
- fn fileTypeFromPath(file_path: []const u8) ?FileType {
- if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar;
- if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) return .@"tar.gz";
- if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz";
- return null;
- }
-
- fn getAttachmentType(content_disposition: []const u8) ?FileType {
- const disposition_type_end = ascii.indexOfIgnoreCase(content_disposition, "attachment;") orelse return null;
-
- var value_start = ascii.indexOfIgnoreCasePos(content_disposition, disposition_type_end + 1, "filename") orelse return null;
- value_start += "filename".len;
- if (content_disposition[value_start] == '*') {
- value_start += 1;
- }
- if (content_disposition[value_start] != '=') return null;
- value_start += 1;
-
- var value_end = mem.indexOfPos(u8, content_disposition, value_start, ";") orelse content_disposition.len;
- if (content_disposition[value_end - 1] == '\"') {
- value_end -= 1;
- }
- return fileTypeFromPath(content_disposition[value_start..value_end]);
- }
-
- pub fn deinit(rr: *ReadableResource, gpa: Allocator) void {
- gpa.free(rr.path);
- switch (rr.resource) {
- .file => |file| file.close(),
- .http_request => |*req| req.deinit(),
- .git_fetch_stream => |*stream| stream.deinit(),
- .dir => |*dir| dir.close(),
- }
- rr.* = undefined;
- }
-};
-
-pub const PackageLocation = struct {
- /// For packages that require unpacking, this is the hash of the package contents.
- /// For directories, this is the hash of the absolute file path.
- hash: [Manifest.Hash.digest_length]u8,
- relative_unpacked_path: []const u8,
-
- pub fn deinit(pl: *PackageLocation, allocator: Allocator) void {
- allocator.free(pl.relative_unpacked_path);
- pl.* = undefined;
- }
-};
-
const hex_multihash_len = 2 * Manifest.multihash_len;
const MultiHashHexDigest = [hex_multihash_len]u8;
@@ -939,411 +249,3 @@ const DependencyModule = union(enum) {
/// If the value is `null`, the package is a known dependency, but has not yet
/// been fetched.
pub const AllModules = std.AutoHashMapUnmanaged(MultiHashHexDigest, ?DependencyModule);
-
-fn ProgressReader(comptime ReaderType: type) type {
- return struct {
- child_reader: ReaderType,
- bytes_read: u64 = 0,
- prog_node: *std.Progress.Node,
- unit: enum {
- kib,
- mib,
- any,
- },
-
- pub const Error = ReaderType.Error;
- pub const Reader = std.io.Reader(*@This(), Error, read);
-
- pub fn read(self: *@This(), buf: []u8) Error!usize {
- const amt = try self.child_reader.read(buf);
- self.bytes_read += amt;
- const kib = self.bytes_read / 1024;
- const mib = kib / 1024;
- switch (self.unit) {
- .kib => self.prog_node.setCompletedItems(@intCast(kib)),
- .mib => self.prog_node.setCompletedItems(@intCast(mib)),
- .any => {
- if (mib > 0) {
- self.prog_node.setUnit("MiB");
- self.prog_node.setCompletedItems(@intCast(mib));
- } else {
- self.prog_node.setUnit("KiB");
- self.prog_node.setCompletedItems(@intCast(kib));
- }
- },
- }
- self.prog_node.activate();
- return amt;
- }
-
- pub fn reader(self: *@This()) Reader {
- return .{ .context = self };
- }
- };
-}
-
-/// Get a cached package if it exists.
-/// Returns `null` if the package has not been cached
-/// If the package exists in the cache, returns a pointer to the package and a
-/// boolean indicating whether this package has already been seen in the build
-/// (i.e. whether or not its transitive dependencies have been fetched).
-fn getCachedPackage(
- gpa: Allocator,
- global_cache_directory: Compilation.Directory,
- dep: Manifest.Dependency,
- all_modules: *AllModules,
- root_prog_node: *std.Progress.Node,
-) !?struct { DependencyModule, bool } {
- const s = fs.path.sep_str;
- // Check if the expected_hash is already present in the global package
- // cache, and thereby avoid both fetching and unpacking.
- if (dep.hash) |h| {
- const hex_digest = h[0..hex_multihash_len];
- const pkg_dir_sub_path = "p" ++ s ++ hex_digest;
-
- var pkg_dir = global_cache_directory.handle.openDir(pkg_dir_sub_path, .{}) catch |err| switch (err) {
- error.FileNotFound => return null,
- else => |e| return e,
- };
- errdefer pkg_dir.close();
-
- // The compiler has a rule that a file must not be included in multiple modules,
- // so we must detect if a module has been created for this package and reuse it.
- const gop = try all_modules.getOrPut(gpa, hex_digest.*);
- if (gop.found_existing) {
- if (gop.value_ptr.*) |mod| {
- return .{ mod, true };
- }
- }
-
- root_prog_node.completeOne();
-
- const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false;
- const basename = if (is_zig_mod) build_zig_basename else "";
- const pkg = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, basename);
-
- const module: DependencyModule = if (is_zig_mod)
- .{ .zig_pkg = pkg }
- else
- .{ .non_zig_pkg = pkg };
-
- try all_modules.put(gpa, hex_digest.*, module);
- return .{ module, false };
- }
-
- return null;
-}
-
-fn getDirectoryModule(
- gpa: Allocator,
- fetch_location: FetchLocation,
- directory: Compilation.Directory,
- all_modules: *AllModules,
- dep: *Manifest.Dependency,
- report: Report,
-) !struct { DependencyModule, bool } {
- assert(fetch_location == .directory);
-
- if (dep.hash != null) {
- return report.fail(dep.hash_tok, "hash not allowed for directory package", .{});
- }
-
- const hash = try computePathHash(gpa, directory, fetch_location.directory);
- const hex_digest = Manifest.hexDigest(hash);
- dep.hash = try gpa.dupe(u8, &hex_digest);
-
- // There is no fixed location to check for directory modules.
- // Instead, check whether it is already listed in all_modules.
- if (all_modules.get(hex_digest)) |mod| return .{ mod.?, true };
-
- var pkg_dir = directory.handle.openDir(fetch_location.directory, .{}) catch |err| switch (err) {
- error.FileNotFound => return report.fail(dep.location_tok, "file not found: {s}", .{fetch_location.directory}),
- else => |e| return e,
- };
- defer pkg_dir.close();
-
- const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false;
- const basename = if (is_zig_mod) build_zig_basename else "";
-
- const pkg = try createWithDir(gpa, directory, fetch_location.directory, basename);
- const module: DependencyModule = if (is_zig_mod)
- .{ .zig_pkg = pkg }
- else
- .{ .non_zig_pkg = pkg };
-
- try all_modules.put(gpa, hex_digest, module);
- return .{ module, false };
-}
-
-fn fetchAndUnpack(
- fetch_location: FetchLocation,
- thread_pool: *ThreadPool,
- http_client: *std.http.Client,
- directory: Compilation.Directory,
- global_cache_directory: Compilation.Directory,
- dep: Manifest.Dependency,
- report: Report,
- all_modules: *AllModules,
- root_prog_node: *std.Progress.Node,
- /// This does not have to be any form of canonical or fully-qualified name: it
- /// is only intended to be human-readable for progress reporting.
- name_for_prog: []const u8,
-) !DependencyModule {
- assert(fetch_location != .directory);
-
- const gpa = http_client.allocator;
-
- var pkg_prog_node = root_prog_node.start(name_for_prog, 0);
- defer pkg_prog_node.end();
- pkg_prog_node.activate();
-
- var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep.location_tok, report);
- defer readable_resource.deinit(gpa);
-
- var package_location = try readable_resource.unpack(
- gpa,
- thread_pool,
- global_cache_directory,
- dep.location_tok,
- report,
- &pkg_prog_node,
- );
- defer package_location.deinit(gpa);
-
- const actual_hex = Manifest.hexDigest(package_location.hash);
- if (dep.hash) |h| {
- if (!mem.eql(u8, h, &actual_hex)) {
- return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{
- h, actual_hex,
- });
- }
- } else {
- const notes_len = 1;
- try report.addErrorWithNotes(notes_len, .{
- .tok = dep.location_tok,
- .off = 0,
- .msg = "dependency is missing hash field",
- });
- const eb = report.error_bundle;
- const notes_start = try eb.reserveNotes(notes_len);
- eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
- .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}),
- }));
- return error.PackageFetchFailed;
- }
-
- const build_zig_path = try fs.path.join(gpa, &.{ package_location.relative_unpacked_path, build_zig_basename });
- defer gpa.free(build_zig_path);
-
- const is_zig_mod = if (global_cache_directory.handle.access(build_zig_path, .{})) |_| true else |_| false;
- const basename = if (is_zig_mod) build_zig_basename else "";
- const pkg = try createWithDir(gpa, global_cache_directory, package_location.relative_unpacked_path, basename);
- const module: DependencyModule = if (is_zig_mod)
- .{ .zig_pkg = pkg }
- else
- .{ .non_zig_pkg = pkg };
-
- try all_modules.put(gpa, actual_hex, module);
- return module;
-}
-
-fn unpackTarballCompressed(
- gpa: Allocator,
- reader: anytype,
- out_dir: fs.Dir,
- dep_location_tok: std.zig.Ast.TokenIndex,
- report: Report,
- comptime Compression: type,
-) !void {
- var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader);
-
- var decompress = try Compression.decompress(gpa, br.reader());
- defer decompress.deinit();
-
- return unpackTarball(gpa, decompress.reader(), out_dir, dep_location_tok, report);
-}
-
-fn unpackTarball(
- gpa: Allocator,
- reader: anytype,
- out_dir: fs.Dir,
- dep_location_tok: std.zig.Ast.TokenIndex,
- report: Report,
-) !void {
- var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = gpa };
- defer diagnostics.deinit();
-
- try std.tar.pipeToFileSystem(out_dir, reader, .{
- .diagnostics = &diagnostics,
- .strip_components = 1,
- // TODO: we would like to set this to executable_bit_only, but two
- // things need to happen before that:
- // 1. the tar implementation needs to support it
- // 2. the hashing algorithm here needs to support detecting the is_executable
- // bit on Windows from the ACLs (see the isExecutable function).
- .mode_mode = .ignore,
- });
-
- if (diagnostics.errors.items.len > 0) {
- const notes_len: u32 = @intCast(diagnostics.errors.items.len);
- try report.addErrorWithNotes(notes_len, .{
- .tok = dep_location_tok,
- .off = 0,
- .msg = "unable to unpack tarball",
- });
- const eb = report.error_bundle;
- const notes_start = try eb.reserveNotes(notes_len);
- for (diagnostics.errors.items, notes_start..) |item, note_i| {
- switch (item) {
- .unable_to_create_sym_link => |info| {
- eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{
- .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{
- info.file_name, info.link_name, @errorName(info.code),
- }),
- }));
- },
- .unsupported_file_type => |info| {
- eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{
- .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{
- info.file_name, @intFromEnum(info.file_type),
- }),
- }));
- },
- }
- }
- return error.InvalidTarball;
- }
-}
-
-fn unpackGitPack(
- gpa: Allocator,
- reader: anytype,
- want_oid: git.Oid,
- out_dir: fs.Dir,
- dep_location_tok: std.zig.Ast.TokenIndex,
- report: Report,
-) !void {
- // The .git directory is used to store the packfile and associated index, but
- // we do not attempt to replicate the exact structure of a real .git
- // directory, since that isn't relevant for fetching a package.
- {
- var pack_dir = try out_dir.makeOpenPath(".git", .{});
- defer pack_dir.close();
- var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true });
- defer pack_file.close();
- var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
- try fifo.pump(reader.reader(), pack_file.writer());
- try pack_file.sync();
-
- var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });
- defer index_file.close();
- {
- var index_prog_node = reader.prog_node.start("Index pack", 0);
- defer index_prog_node.end();
- index_prog_node.activate();
- var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
- try git.indexPack(gpa, pack_file, index_buffered_writer.writer());
- try index_buffered_writer.flush();
- try index_file.sync();
- }
-
- {
- var checkout_prog_node = reader.prog_node.start("Checkout", 0);
- defer checkout_prog_node.end();
- checkout_prog_node.activate();
- var repository = try git.Repository.init(gpa, pack_file, index_file);
- defer repository.deinit();
- var diagnostics: git.Diagnostics = .{ .allocator = gpa };
- defer diagnostics.deinit();
- try repository.checkout(out_dir, want_oid, &diagnostics);
-
- if (diagnostics.errors.items.len > 0) {
- const notes_len: u32 = @intCast(diagnostics.errors.items.len);
- try report.addErrorWithNotes(notes_len, .{
- .tok = dep_location_tok,
- .off = 0,
- .msg = "unable to unpack packfile",
- });
- const eb = report.error_bundle;
- const notes_start = try eb.reserveNotes(notes_len);
- for (diagnostics.errors.items, notes_start..) |item, note_i| {
- switch (item) {
- .unable_to_create_sym_link => |info| {
- eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{
- .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{
- info.file_name, info.link_name, @errorName(info.code),
- }),
- }));
- },
- }
- }
- return error.InvalidGitPack;
- }
- }
- }
-
- try out_dir.deleteTree(".git");
-}
-
-/// Compute the hash of a file path.
-fn computePathHash(gpa: Allocator, dir: Compilation.Directory, path: []const u8) ![Manifest.Hash.digest_length]u8 {
- const resolved_path = try std.fs.path.resolve(gpa, &.{ dir.path.?, path });
- defer gpa.free(resolved_path);
- var hasher = Manifest.Hash.init(.{});
- hasher.update(resolved_path);
- return hasher.finalResult();
-}
-
-fn isDirectory(root_dir: Compilation.Directory, path: []const u8) !bool {
- var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) {
- error.NotDir => return false,
- else => return err,
- };
- defer dir.close();
- return true;
-}
-
-fn renameTmpIntoCache(
- cache_dir: fs.Dir,
- tmp_dir_sub_path: []const u8,
- dest_dir_sub_path: []const u8,
-) !void {
- assert(dest_dir_sub_path[1] == fs.path.sep);
- var handled_missing_dir = false;
- while (true) {
- cache_dir.rename(tmp_dir_sub_path, dest_dir_sub_path) catch |err| switch (err) {
- error.FileNotFound => {
- if (handled_missing_dir) return err;
- cache_dir.makeDir(dest_dir_sub_path[0..1]) catch |mkd_err| switch (mkd_err) {
- error.PathAlreadyExists => handled_missing_dir = true,
- else => |e| return e,
- };
- continue;
- },
- error.PathAlreadyExists, error.AccessDenied => {
- // Package has been already downloaded and may already be in use on the system.
- cache_dir.deleteTree(tmp_dir_sub_path) catch |del_err| {
- std.log.warn("unable to delete temp directory: {s}", .{@errorName(del_err)});
- };
- },
- else => |e| return e,
- };
- break;
- }
-}
-
-test "getAttachmentType" {
- try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attaChment; FILENAME=\"stuff.tar.gz\"; size=42"));
- try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; filename*=\"stuff.tar.gz\""));
- try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("ATTACHMENT; filename=\"stuff.tar.xz\""));
- try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar.xz\""));
- try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz"));
-
- try std.testing.expect(ReadableResource.getAttachmentType("attachment FileName=\"stuff.tar.gz\"") == null);
- try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar\"") == null);
- try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName\"stuff.gz\"") == null);
- try std.testing.expect(ReadableResource.getAttachmentType("attachment; size=42") == null);
- try std.testing.expect(ReadableResource.getAttachmentType("inline; size=42") == null);
- try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\"; attachment;") == null);
- try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\";") == null);
-}
CMakeLists.txt
@@ -528,7 +528,7 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/Liveness.zig"
"${CMAKE_SOURCE_DIR}/src/Module.zig"
"${CMAKE_SOURCE_DIR}/src/Package.zig"
- "${CMAKE_SOURCE_DIR}/src/Package/hash.zig"
+ "${CMAKE_SOURCE_DIR}/src/Package/Fetch.zig"
"${CMAKE_SOURCE_DIR}/src/RangeSet.zig"
"${CMAKE_SOURCE_DIR}/src/Sema.zig"
"${CMAKE_SOURCE_DIR}/src/TypedValue.zig"