Commit b3cad98534

Adam Goertz <adambgoertz@gmail.com>
2023-07-12 04:45:51
Support file:/// URIs and relative paths
1 parent ed19ebc
lib/std/os/windows/shlwapi.zig
@@ -0,0 +1,13 @@
+const std = @import("../../std.zig");
+const windows = std.os.windows;
+
+const DWORD = windows.DWORD;
+const WINAPI = windows.WINAPI;
+const HRESULT = windows.HRESULT;
+const LPCSTR = windows.LPCSTR;
+const LPSTR = windows.LPSTR;
+const LPWSTR = windows.LPWSTR;
+const LPCWSTR = windows.LPCWSTR;
+
+pub extern "shlwapi" fn PathCreateFromUrlW(pszUrl: LPCWSTR, pszPath: LPWSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT;
+pub extern "shlwapi" fn PathCreateFromUrlA(pszUrl: LPCSTR, pszPath: LPSTR, pcchPath: *DWORD, dwFlags: DWORD) callconv(WINAPI) HRESULT;
lib/std/os/windows.zig
@@ -30,6 +30,7 @@ pub const gdi32 = @import("windows/gdi32.zig");
 pub const winmm = @import("windows/winmm.zig");
 pub const crypt32 = @import("windows/crypt32.zig");
 pub const nls = @import("windows/nls.zig");
+pub const shlwapi = @import("windows/shlwapi.zig");
 
 pub const self_process_handle = @as(HANDLE, @ptrFromInt(maxInt(usize)));
 
lib/std/Uri.zig
@@ -134,6 +134,7 @@ pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort };
 /// original `text`. Each component that is provided, will be non-`null`.
 pub fn parseWithoutScheme(text: []const u8) ParseError!Uri {
     var reader = SliceReader{ .slice = text };
+
     var uri = Uri{
         .scheme = "",
         .user = null,
@@ -145,13 +146,14 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri {
         .fragment = null,
     };
 
-    if (reader.peekPrefix("//")) { // authority part
+    if (reader.peekPrefix("//")) a: { // authority part
         std.debug.assert(reader.get().? == '/');
         std.debug.assert(reader.get().? == '/');
 
-        const authority = reader.readUntil(isAuthoritySeparator);
-        if (authority.len == 0)
-            return error.InvalidFormat;
+        var authority = reader.readUntil(isAuthoritySeparator);
+        if (authority.len == 0) {
+            if (reader.peekPrefix("/")) break :a else return error.InvalidFormat;
+        }
 
         var start_of_host: usize = 0;
         if (std.mem.indexOf(u8, authority, "@")) |index| {
@@ -224,7 +226,6 @@ pub fn format(
         try writer.writeAll(":");
         if (uri.host) |host| {
             try writer.writeAll("//");
-
             if (uri.user) |user| {
                 try writer.writeAll(user);
                 if (uri.password) |password| {
@@ -486,6 +487,23 @@ test "should fail gracefully" {
     try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://"));
 }
 
+test "file" {
+    const parsed = try parse("file:///");
+    try std.testing.expectEqualSlices(u8, "file", parsed.scheme);
+    try std.testing.expectEqual(@as(?[]const u8, null), parsed.host);
+    try std.testing.expectEqualSlices(u8, "/", parsed.path);
+
+    const parsed2 = try parse("file:///an/absolute/path/to/something");
+    try std.testing.expectEqualSlices(u8, "file", parsed2.scheme);
+    try std.testing.expectEqual(@as(?[]const u8, null), parsed2.host);
+    try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/something", parsed2.path);
+
+    const parsed3 = try parse("file://localhost/an/absolute/path/to/another/thing/");
+    try std.testing.expectEqualSlices(u8, "file", parsed3.scheme);
+    try std.testing.expectEqualSlices(u8, "localhost", parsed3.host.?);
+    try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/another/thing/", parsed3.path);
+}
+
 test "scheme" {
     try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme);
     try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme);
@@ -695,3 +713,20 @@ test "URI query escaping" {
     defer std.testing.allocator.free(formatted_uri);
     try std.testing.expectEqualStrings("/?response-content-type=application%2Foctet-stream", formatted_uri);
 }
+
+test "format" {
+    const uri = Uri{
+        .scheme = "file",
+        .user = null,
+        .password = null,
+        .host = null,
+        .port = null,
+        .path = "/foo/bar/baz",
+        .query = null,
+        .fragment = null,
+    };
+    var buf = std.ArrayList(u8).init(std.testing.allocator);
+    defer buf.deinit();
+    try uri.format("+/", .{}, buf.writer());
+    try std.testing.expectEqualSlices(u8, "file:/foo/bar/baz", buf.items);
+}
src/Manifest.zig
@@ -2,8 +2,11 @@ pub const basename = "build.zig.zon";
 pub const Hash = std.crypto.hash.sha2.Sha256;
 
 pub const Dependency = struct {
-    url: []const u8,
-    url_tok: Ast.TokenIndex,
+    location: union(enum) {
+        url: []const u8,
+        path: []const u8,
+    },
+    location_tok: Ast.TokenIndex,
     hash: ?[]const u8,
     hash_tok: Ast.TokenIndex,
 };
@@ -218,12 +221,12 @@ const Parse = struct {
         };
 
         var dep: Dependency = .{
-            .url = undefined,
-            .url_tok = undefined,
+            .location = undefined,
+            .location_tok = undefined,
             .hash = null,
             .hash_tok = undefined,
         };
-        var have_url = false;
+        var has_location = false;
 
         for (struct_init.ast.fields) |field_init| {
             const name_token = ast.firstToken(field_init) - 2;
@@ -232,12 +235,29 @@ const Parse = struct {
             // things manually provides an opportunity to do any additional verification
             // that is desirable on a per-field basis.
             if (mem.eql(u8, field_name, "url")) {
-                dep.url = parseString(p, field_init) catch |err| switch (err) {
-                    error.ParseFailure => continue,
-                    else => |e| return e,
+                if (has_location) {
+                    return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{});
+                }
+                dep.location = .{
+                    .url = parseString(p, field_init) catch |err| switch (err) {
+                        error.ParseFailure => continue,
+                        else => |e| return e,
+                    },
+                };
+                has_location = true;
+                dep.location_tok = main_tokens[field_init];
+            } else if (mem.eql(u8, field_name, "path")) {
+                if (has_location) {
+                    return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{});
+                }
+                dep.location = .{
+                    .path = parseString(p, field_init) catch |err| switch (err) {
+                        error.ParseFailure => continue,
+                        else => |e| return e,
+                    },
                 };
-                dep.url_tok = main_tokens[field_init];
-                have_url = true;
+                has_location = true;
+                dep.location_tok = main_tokens[field_init];
             } else if (mem.eql(u8, field_name, "hash")) {
                 dep.hash = parseHash(p, field_init) catch |err| switch (err) {
                     error.ParseFailure => continue,
@@ -250,8 +270,8 @@ const Parse = struct {
             }
         }
 
-        if (!have_url) {
-            try appendError(p, main_tokens[node], "dependency is missing 'url' field", .{});
+        if (!has_location) {
+            try appendError(p, main_tokens[node], "dependency requires location field, one of 'url' or 'path'.", .{});
         }
 
         return dep;
src/Package.zig
@@ -316,56 +316,51 @@ pub fn fetchAndAddDependencies(
     for (manifest.dependencies.keys(), 0..) |name, i| {
         const dep = deps_list[i];
 
-        const sub = try fetchAndUnpack(
-            thread_pool,
-            http_client,
+        const sub_pkg = try getCachedPackage(
+            http_client.allocator,
             global_cache_directory,
             dep,
             report,
             all_modules,
             root_prog_node,
-            name,
-        );
+        ) orelse m: {
+            const mod = try fetchAndUnpack(
+                thread_pool,
+                http_client,
+                directory,
+                global_cache_directory,
+                dep,
+                report,
+                all_modules,
+                root_prog_node,
+                name,
+            );
 
-        if (sub.mod) |mod| {
-            if (!sub.found_existing) {
-                try mod.fetchAndAddDependencies(
-                    deps_pkg,
-                    arena,
-                    thread_pool,
-                    http_client,
-                    mod.root_src_directory,
-                    global_cache_directory,
-                    local_cache_directory,
-                    dependencies_source,
-                    error_bundle,
-                    all_modules,
-                    root_prog_node,
-                    dep.hash.?,
-                );
-            }
+            try mod.fetchAndAddDependencies(
+                deps_pkg,
+                arena,
+                thread_pool,
+                http_client,
+                mod.root_src_directory,
+                global_cache_directory,
+                local_cache_directory,
+                dependencies_source,
+                error_bundle,
+                all_modules,
+                root_prog_node,
+                dep.hash.?,
+            );
 
-            try pkg.add(gpa, name, mod);
-            if (deps_pkg.table.get(dep.hash.?)) |other_sub| {
-                // This should be the same package (and hence module) since it's the same hash
-                // TODO: dedup multiple versions of the same package
-                assert(other_sub == mod);
-            } else {
-                try deps_pkg.add(gpa, dep.hash.?, mod);
-            }
-        } else if (!sub.found_existing) {
-            const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ (dep.hash.?)[0..hex_multihash_len];
-            const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path});
-            try dependencies_source.writer().print(
-                \\    pub const {} = struct {{
-                \\        pub const build_root = "{}";
-                \\        pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}};
-                \\    }};
-                \\
-            , .{
-                std.zig.fmtId(dep.hash.?),
-                std.zig.fmtEscapes(build_root),
-            });
+            break :m mod;
+        };
+
+        try pkg.add(gpa, name, sub_pkg);
+        if (deps_pkg.table.get(dep.hash.?)) |other_sub| {
+            // This should be the same package (and hence module) since it's the same hash
+            // TODO: dedup multiple versions of the same package
+            assert(other_sub == sub_pkg);
+        } else {
+            try deps_pkg.add(gpa, dep.hash.?, sub_pkg);
         }
     }
 
@@ -490,6 +485,316 @@ const Report = struct {
     }
 };
 
+const FetchLocation = union(SourceType) {
+    /// The absolute path to a file or directory.
+    /// This may be a file that requires unpacking (such as a .tar.gz),
+    /// or the path to the root directory of a package.
+    file: []const u8,
+    http_request: std.Uri,
+
+    pub fn init(gpa: Allocator, uri: std.Uri, directory: Compilation.Directory, dep: Manifest.Dependency, report: Report) !FetchLocation {
+        const source_type = getPackageSourceType(uri) catch
+            return report.fail(dep.location_tok, "Unknown scheme: {s}", .{uri.scheme});
+
+        return switch (source_type) {
+            .file => f: {
+                const path = if (builtin.os.tag == .windows) p: {
+                    var uri_str = std.ArrayList(u8).init(gpa);
+                    defer uri_str.deinit();
+                    try uri.format("+/", .{}, uri_str.writer());
+                    const uri_str_z = try gpa.dupeZ(u8, uri_str.items);
+                    defer gpa.free(uri_str_z);
+
+                    var buf: [std.os.windows.MAX_PATH:0]u8 = undefined;
+                    var buf_len: std.os.windows.DWORD = std.os.windows.MAX_PATH;
+                    const result = std.os.windows.shlwapi.PathCreateFromUrlA(uri_str_z, &buf, &buf_len, 0);
+
+                    if (result != std.os.windows.S_OK) return report.fail(dep.location_tok, "Invalid URI", .{});
+
+                    break :p try gpa.dupe(u8, buf[0..buf_len]);
+                } else try std.Uri.unescapeString(gpa, uri.path);
+                defer gpa.free(path);
+
+                const new_path = try fs.path.resolve(gpa, &.{ directory.path.?, path });
+
+                break :f .{ .file = new_path };
+            },
+            .http_request => r: {
+                break :r .{ .http_request = uri };
+            },
+        };
+    }
+
+    pub fn deinit(f: *FetchLocation, gpa: Allocator) void {
+        switch (f.*) {
+            .file => |path| gpa.free(path),
+            .http_request => {},
+        }
+        f.* = undefined;
+    }
+
+    const SourceType = enum {
+        file,
+        http_request,
+    };
+
+    fn getPackageSourceType(uri: std.Uri) error{UnknownScheme}!SourceType {
+        const package_source_map = std.ComptimeStringMap(
+            SourceType,
+            .{
+                .{ "file", .file },
+                .{ "http", .http_request },
+                .{ "https", .http_request },
+            },
+        );
+        return package_source_map.get(uri.scheme) orelse error.UnknownScheme;
+    }
+
+    pub fn isDirectory(path: []const u8, root_dir: Compilation.Directory) !bool {
+        return if (mem.endsWith(u8, path, std.fs.path.sep_str))
+            true
+        else if (std.fs.path.extension(path).len > 0)
+            false
+        else d: {
+            // It's common to write directories without a trailing '/'.
+            // This is some special casing logic to detect directories if
+            // the file type cannot be determined from the extension.
+            var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) {
+                error.NotDir => break :d false,
+                else => break :d err,
+            };
+            defer dir.close();
+            break :d true;
+        };
+    }
+
+    pub fn fetch(
+        f: FetchLocation,
+        gpa: Allocator,
+        root_dir: Compilation.Directory,
+        http_client: *std.http.Client,
+        dep: Manifest.Dependency,
+        report: Report,
+    ) !ReadableResource {
+        switch (f) {
+            .file => |file| {
+                const is_dir = isDirectory(file, root_dir) catch
+                    return report.fail(dep.location_tok, "File not found: {s}", .{file});
+
+                return if (is_dir)
+                    .{
+                        .path = try gpa.dupe(u8, file),
+                        .resource = .{ .directory = try fs.openIterableDirAbsolute(file, .{}) },
+                    }
+                else
+                    .{
+                        .path = try gpa.dupe(u8, file),
+                        .resource = .{ .file = try fs.openFileAbsolute(file, .{}) },
+                    };
+            },
+            .http_request => |uri| {
+                var h = std.http.Headers{ .allocator = gpa };
+                defer h.deinit();
+
+                var req = try http_client.request(.GET, uri, h, .{});
+
+                try req.start(.{});
+                try req.wait();
+
+                if (req.response.status != .ok) {
+                    return report.fail(dep.location_tok, "Expected response status '200 OK' got '{} {s}'", .{
+                        @intFromEnum(req.response.status),
+                        req.response.status.phrase() orelse "",
+                    });
+                }
+
+                return .{
+                    .path = try gpa.dupe(u8, uri.path),
+                    .resource = .{ .http_request = req },
+                };
+            },
+        }
+    }
+};
+
+const ReadableResource = struct {
+    path: []const u8,
+    resource: union(enum) {
+        file: fs.File,
+        directory: fs.IterableDir,
+        http_request: std.http.Client.Request,
+    },
+
+    /// Unpack the package into the global cache directory.
+    /// If `ps` does not require unpacking (for example, if it is a directory), then no caching is performed.
+    /// In either case, the hash is computed and returned along with the path to the package.
+    pub fn unpack(
+        rr: *ReadableResource,
+        allocator: Allocator,
+        thread_pool: *ThreadPool,
+        global_cache_directory: Compilation.Directory,
+        dep: Manifest.Dependency,
+        report: Report,
+        pkg_prog_node: *std.Progress.Node,
+    ) !PackageLocation {
+        switch (rr.resource) {
+            .directory => |dir| {
+                const actual_hash = try computePackageHash(thread_pool, dir);
+                return .{
+                    .hash = actual_hash,
+                    .dir_path = try allocator.dupe(u8, rr.path),
+                };
+            },
+            inline .file, .http_request => |*r| {
+                const s = fs.path.sep_str;
+                const rand_int = std.crypto.random.int(u64);
+                const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int);
+
+                const actual_hash = h: {
+                    var tmp_directory: Compilation.Directory = d: {
+                        const path = try global_cache_directory.join(allocator, &.{tmp_dir_sub_path});
+                        errdefer allocator.free(path);
+
+                        const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{});
+                        errdefer iterable_dir.close();
+
+                        break :d .{
+                            .path = path,
+                            .handle = iterable_dir.dir,
+                        };
+                    };
+                    defer tmp_directory.closeAndFree(allocator);
+
+                    const opt_content_length = try rr.getSize();
+
+                    var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{
+                        .child_reader = r.reader(),
+                        .prog_node = pkg_prog_node,
+                        .unit = if (opt_content_length) |content_length| unit: {
+                            const kib = content_length / 1024;
+                            const mib = kib / 1024;
+                            if (mib > 0) {
+                                pkg_prog_node.setEstimatedTotalItems(@intCast(mib));
+                                pkg_prog_node.setUnit("MiB");
+                                break :unit .mib;
+                            } else {
+                                pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib)));
+                                pkg_prog_node.setUnit("KiB");
+                                break :unit .kib;
+                            }
+                        } else .any,
+                    };
+                    pkg_prog_node.context.refresh();
+
+                    switch (try rr.getFileType(dep, report)) {
+                        .@"tar.gz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.gzip),
+                        // I have not checked what buffer sizes the xz decompression implementation uses
+                        // by default, so the same logic applies for buffering the reader as for gzip.
+                        .@"tar.xz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.xz),
+                    }
+
+                    // Unpack completed - stop showing amount as progress
+                    pkg_prog_node.setEstimatedTotalItems(0);
+                    pkg_prog_node.setCompletedItems(0);
+                    pkg_prog_node.context.refresh();
+
+                    // TODO: delete files not included in the package prior to computing the package hash.
+                    // for example, if the ini file has directives to include/not include certain files,
+                    // apply those rules directly to the filesystem right here. This ensures that files
+                    // not protected by the hash are not present on the file system.
+
+                    break :h try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle });
+                };
+
+                const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash);
+                const unpacked_path = try global_cache_directory.join(allocator, &.{pkg_dir_sub_path});
+                errdefer allocator.free(unpacked_path);
+
+                const relative_unpacked_path = try fs.path.relative(allocator, global_cache_directory.path.?, unpacked_path);
+                defer allocator.free(relative_unpacked_path);
+                try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, relative_unpacked_path);
+
+                return .{
+                    .hash = actual_hash,
+                    .dir_path = unpacked_path,
+                };
+            },
+        }
+    }
+
+    const FileType = enum {
+        @"tar.gz",
+        @"tar.xz",
+    };
+
+    pub fn getSize(rr: ReadableResource) !?u64 {
+        switch (rr.resource) {
+            // TODO: Handle case of chunked content-length
+            .http_request => |req| return req.response.content_length,
+            .file => |f| return (try f.metadata()).size(),
+            .directory => unreachable,
+        }
+    }
+
+    pub fn getFileType(rr: ReadableResource, dep: Manifest.Dependency, report: Report) !FileType {
+        switch (rr.resource) {
+            .file => {
+                return if (mem.endsWith(u8, rr.path, ".tar.gz"))
+                    .@"tar.gz"
+                else if (mem.endsWith(u8, rr.path, ".tar.xz"))
+                    .@"tar.xz"
+                else
+                    return report.fail(dep.location_tok, "Unknown file type", .{});
+            },
+            .directory => return error.IsDir,
+            .http_request => |req| {
+                const content_type = req.response.headers.getFirstValue("Content-Type") orelse
+                    return report.fail(dep.location_tok, "Missing 'Content-Type' header", .{});
+
+                // If the response has a different content type than the URI indicates, override
+                // the previously assumed file type.
+                return if (ascii.eqlIgnoreCase(content_type, "application/gzip") or
+                    ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
+                    ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
+                    .@"tar.gz"
+                else if (ascii.eqlIgnoreCase(content_type, "application/x-xz"))
+                    .@"tar.xz"
+                else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) ty: {
+                    // support gitlab tarball urls such as https://gitlab.com/<namespace>/<project>/-/archive/<sha>/<project>-<sha>.tar.gz
+                    // whose content-disposition header is: 'attachment; filename="<project>-<sha>.tar.gz"'
+                    const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse
+                        return report.fail(dep.location_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{});
+                    if (mem.startsWith(u8, content_disposition, "attachment;") and
+                        mem.endsWith(u8, content_disposition, ".tar.gz\""))
+                    {
+                        break :ty .@"tar.gz";
+                    } else return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition});
+                } else return report.fail(dep.location_tok, "Unrecognized value for 'Content-Type' header: {s}", .{content_type});
+            },
+        }
+    }
+
+    pub fn deinit(rr: *ReadableResource, gpa: Allocator) void {
+        gpa.free(rr.path);
+        switch (rr.resource) {
+            .file => |file| file.close(),
+            .directory => |*dir| dir.close(),
+            .http_request => |*req| req.deinit(),
+        }
+        rr.* = undefined;
+    }
+};
+
+pub const PackageLocation = struct {
+    hash: [Manifest.Hash.digest_length]u8,
+    dir_path: []const u8,
+
+    pub fn deinit(pl: *PackageLocation, allocator: Allocator) void {
+        allocator.free(pl.dir_path);
+        pl.* = undefined;
+    }
+};
+
 const hex_multihash_len = 2 * Manifest.multihash_len;
 const MultiHashHexDigest = [hex_multihash_len]u8;
 /// This is to avoid creating multiple modules for the same build.zig file.
@@ -542,29 +847,24 @@ fn ProgressReader(comptime ReaderType: type) type {
     };
 }
 
-fn fetchAndUnpack(
-    thread_pool: *ThreadPool,
-    http_client: *std.http.Client,
+fn getCachedPackage(
+    gpa: Allocator,
     global_cache_directory: Compilation.Directory,
     dep: Manifest.Dependency,
     report: Report,
     all_modules: *AllModules,
     root_prog_node: *std.Progress.Node,
-    /// This does not have to be any form of canonical or fully-qualified name: it
-    /// is only intended to be human-readable for progress reporting.
-    name_for_prog: []const u8,
-) !struct { mod: ?*Package, found_existing: bool } {
-    const gpa = http_client.allocator;
+) !?*Package {
+    _ = report;
     const s = fs.path.sep_str;
-
     // Check if the expected_hash is already present in the global package
     // cache, and thereby avoid both fetching and unpacking.
-    if (dep.hash) |h| cached: {
+    if (dep.hash) |h| {
         const hex_digest = h[0..hex_multihash_len];
         const pkg_dir_sub_path = "p" ++ s ++ hex_digest;
 
         var pkg_dir = global_cache_directory.handle.openDir(pkg_dir_sub_path, .{}) catch |err| switch (err) {
-            error.FileNotFound => break :cached,
+            error.FileNotFound => return null,
             else => |e| return e,
         };
         errdefer pkg_dir.close();
@@ -574,16 +874,7 @@ fn fetchAndUnpack(
         const gop = try all_modules.getOrPut(gpa, hex_digest.*);
         if (gop.found_existing) {
             if (gop.value_ptr.*) |mod| {
-                return switch (mod) {
-                    .zig_pkg => |pkg| .{
-                        .mod = pkg,
-                        .found_existing = true,
-                    },
-                    .non_zig_pkg => .{
-                        .mod = null,
-                        .found_existing = true,
-                    },
-                };
+                return mod;
             }
         }
 
@@ -615,121 +906,60 @@ fn fetchAndUnpack(
             .root_src_path = owned_src_path,
         };
 
-        gop.value_ptr.* = .{ .zig_pkg = ptr };
-        return .{
-            .mod = ptr,
-            .found_existing = false,
-        };
+        gop.value_ptr.* = ptr;
+        return ptr;
     }
 
+    return null;
+}
+
+fn fetchAndUnpack(
+    thread_pool: *ThreadPool,
+    http_client: *std.http.Client,
+    directory: Compilation.Directory,
+    global_cache_directory: Compilation.Directory,
+    dep: Manifest.Dependency,
+    report: Report,
+    all_modules: *AllModules,
+    root_prog_node: *std.Progress.Node,
+    /// This does not have to be any form of canonical or fully-qualified name: it
+    /// is only intended to be human-readable for progress reporting.
+    name_for_prog: []const u8,
+) !*Package {
+    const gpa = http_client.allocator;
+
     var pkg_prog_node = root_prog_node.start(name_for_prog, 0);
     defer pkg_prog_node.end();
     pkg_prog_node.activate();
     pkg_prog_node.context.refresh();
 
-    const uri = try std.Uri.parse(dep.url);
-
-    const rand_int = std.crypto.random.int(u64);
-    const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int);
-
-    const actual_hash = a: {
-        var tmp_directory: Compilation.Directory = d: {
-            const path = try global_cache_directory.join(gpa, &.{tmp_dir_sub_path});
-            errdefer gpa.free(path);
-
-            const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{});
-            errdefer iterable_dir.close();
-
-            break :d .{
-                .path = path,
-                .handle = iterable_dir.dir,
-            };
-        };
-        defer tmp_directory.closeAndFree(gpa);
-
-        var h = std.http.Headers{ .allocator = gpa };
-        defer h.deinit();
-
-        var req = try http_client.request(.GET, uri, h, .{});
-        defer req.deinit();
-
-        try req.start(.{});
-        try req.wait();
-
-        if (req.response.status != .ok) {
-            return report.fail(dep.url_tok, "Expected response status '200 OK' got '{} {s}'", .{
-                @intFromEnum(req.response.status),
-                req.response.status.phrase() orelse "",
-            });
-        }
-
-        const content_type = req.response.headers.getFirstValue("Content-Type") orelse
-            return report.fail(dep.url_tok, "Missing 'Content-Type' header", .{});
-
-        var prog_reader: ProgressReader(std.http.Client.Request.Reader) = .{
-            .child_reader = req.reader(),
-            .prog_node = &pkg_prog_node,
-            .unit = if (req.response.content_length) |content_length| unit: {
-                const kib = content_length / 1024;
-                const mib = kib / 1024;
-                if (mib > 0) {
-                    pkg_prog_node.setEstimatedTotalItems(@intCast(mib));
-                    pkg_prog_node.setUnit("MiB");
-                    break :unit .mib;
-                } else {
-                    pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib)));
-                    pkg_prog_node.setUnit("KiB");
-                    break :unit .kib;
-                }
-            } else .any,
-        };
-        pkg_prog_node.context.refresh();
-
-        if (ascii.eqlIgnoreCase(content_type, "application/gzip") or
-            ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
-            ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
-        {
-            // I observed the gzip stream to read 1 byte at a time, so I am using a
-            // buffered reader on the front of it.
-            try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
-        } else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) {
-            // I have not checked what buffer sizes the xz decompression implementation uses
-            // by default, so the same logic applies for buffering the reader as for gzip.
-            try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.xz);
-        } else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) {
-            // support gitlab tarball urls such as https://gitlab.com/<namespace>/<project>/-/archive/<sha>/<project>-<sha>.tar.gz
-            // whose content-disposition header is: 'attachment; filename="<project>-<sha>.tar.gz"'
-            const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse
-                return report.fail(dep.url_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{});
-            if (isTarAttachment(content_disposition)) {
-                try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
-            } else return report.fail(dep.url_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition});
-        } else {
-            return report.fail(dep.url_tok, "Unsupported 'Content-Type' header value: '{s}'", .{content_type});
-        }
-
-        // Download completed - stop showing downloaded amount as progress
-        pkg_prog_node.setEstimatedTotalItems(0);
-        pkg_prog_node.setCompletedItems(0);
-        pkg_prog_node.context.refresh();
-
-        // TODO: delete files not included in the package prior to computing the package hash.
-        // for example, if the ini file has directives to include/not include certain files,
-        // apply those rules directly to the filesystem right here. This ensures that files
-        // not protected by the hash are not present on the file system.
+    const uri = switch (dep.location) {
+        .url => |url| std.Uri.parse(url) catch |err| switch (err) {
+            error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI.", .{}),
+            else => return err,
+        },
+        .path => |path| std.Uri{
+            .scheme = "file",
+            .user = null,
+            .password = null,
+            .host = null,
+            .port = null,
+            .path = path,
+            .query = null,
+            .fragment = null,
+        },
+    };
 
-        // TODO: raise an error for files that have illegal paths on some operating systems.
-        // For example, on Linux a path with a backslash should raise an error here.
-        // Of course, if the ignore rules above omit the file from the package, then everything
-        // is fine and no error should be raised.
+    var fetch_location = try FetchLocation.init(gpa, uri, directory, dep, report);
+    defer fetch_location.deinit(gpa);
 
-        break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle });
-    };
+    var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report);
+    defer readable_resource.deinit(gpa);
 
-    const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash);
-    try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path);
+    var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep, report, &pkg_prog_node);
+    defer package_location.deinit(gpa);
 
-    const actual_hex = Manifest.hexDigest(actual_hash);
+    const actual_hex = Manifest.hexDigest(package_location.hash);
     if (dep.hash) |h| {
         if (!mem.eql(u8, h, &actual_hex)) {
             return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{
@@ -743,9 +973,9 @@ fn fetchAndUnpack(
         const eb = report.error_bundle;
         const notes_len = 1;
         try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{
-            .tok = dep.url_tok,
+            .tok = dep.location_tok,
             .off = 0,
-            .msg = "url field is missing corresponding hash field",
+            .msg = "dependency is missing hash field",
         });
         const notes_start = try eb.reserveNotes(notes_len);
         eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
@@ -754,35 +984,24 @@ fn fetchAndUnpack(
         return error.PackageFetchFailed;
     }
 
-    const build_zig_path = try std.fs.path.join(gpa, &.{ pkg_dir_sub_path, build_zig_basename });
-    defer gpa.free(build_zig_path);
-
-    global_cache_directory.handle.access(build_zig_path, .{}) catch |err| switch (err) {
-        error.FileNotFound => {
-            try all_modules.put(gpa, actual_hex, .non_zig_pkg);
-            return .{
-                .mod = null,
-                .found_existing = false,
-            };
-        },
-        else => return err,
-    };
+    const gop = try all_modules.getOrPut(gpa, actual_hex);
 
-    const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, build_zig_basename);
-    try all_modules.put(gpa, actual_hex, .{ .zig_pkg = mod });
-    return .{
-        .mod = mod,
-        .found_existing = false,
-    };
+    if (gop.found_existing and gop.value_ptr.* != null) {
+        return gop.value_ptr.*.?;
+    } else {
+        const module = try create(gpa, package_location.dir_path, build_zig_basename);
+        gop.value_ptr.* = module;
+        return module;
+    }
 }
 
 fn unpackTarball(
     gpa: Allocator,
-    req_reader: anytype,
+    reader: anytype,
     out_dir: fs.Dir,
     comptime compression: type,
 ) !void {
-    var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req_reader);
+    var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader);
 
     var decompress = try compression.decompress(gpa, br.reader());
     defer decompress.deinit();
build.zig
@@ -336,6 +336,7 @@ pub fn build(b: *std.Build) !void {
                 artifact.linkSystemLibrary("version");
                 artifact.linkSystemLibrary("uuid");
                 artifact.linkSystemLibrary("ole32");
+                artifact.linkSystemLibrary("shlwapi");
             }
         }
     }
@@ -712,6 +713,7 @@ fn addStaticLlvmOptionsToExe(exe: *std.Build.Step.Compile) !void {
         exe.linkSystemLibrary("version");
         exe.linkSystemLibrary("uuid");
         exe.linkSystemLibrary("ole32");
+        exe.linkSystemLibrary("shlwapi");
     }
 }