zig/src/Package/Fetch/git.zig at master

   1//! Git support for package fetching.
   2//!
   3//! This is not intended to support all features of Git: it is limited to the
   4//! basic functionality needed to clone a repository for the purpose of fetching
   5//! a package.
   6
   7const std = @import("std");
   8const Io = std.Io;
   9const mem = std.mem;
  10const testing = std.testing;
  11const Allocator = mem.Allocator;
  12const Sha1 = std.crypto.hash.Sha1;
  13const Sha256 = std.crypto.hash.sha2.Sha256;
  14const assert = std.debug.assert;
  15
  16/// The ID of a Git object.
  17pub const Oid = union(Format) {
  18    sha1: [Sha1.digest_length]u8,
  19    sha256: [Sha256.digest_length]u8,
  20
  21    pub const max_formatted_length = len: {
  22        var max: usize = 0;
  23        for (std.enums.values(Format)) |f| {
  24            max = @max(max, f.formattedLength());
  25        }
  26        break :len max;
  27    };
  28
  29    pub const Format = enum {
  30        sha1,
  31        sha256,
  32
  33        pub fn byteLength(f: Format) usize {
  34            return switch (f) {
  35                .sha1 => Sha1.digest_length,
  36                .sha256 => Sha256.digest_length,
  37            };
  38        }
  39
  40        pub fn formattedLength(f: Format) usize {
  41            return 2 * f.byteLength();
  42        }
  43    };
  44
  45    const Hasher = union(Format) {
  46        sha1: Sha1,
  47        sha256: Sha256,
  48
  49        fn init(oid_format: Format) Hasher {
  50            return switch (oid_format) {
  51                .sha1 => .{ .sha1 = Sha1.init(.{}) },
  52                .sha256 => .{ .sha256 = Sha256.init(.{}) },
  53            };
  54        }
  55
  56        // Must be public for use from HashedReader and HashedWriter.
  57        pub fn update(hasher: *Hasher, b: []const u8) void {
  58            switch (hasher.*) {
  59                inline else => |*inner| inner.update(b),
  60            }
  61        }
  62
  63        fn finalResult(hasher: *Hasher) Oid {
  64            return switch (hasher.*) {
  65                inline else => |*inner, tag| @unionInit(Oid, @tagName(tag), inner.finalResult()),
  66            };
  67        }
  68    };
  69
  70    const Hashing = union(Format) {
  71        sha1: Io.Writer.Hashing(Sha1),
  72        sha256: Io.Writer.Hashing(Sha256),
  73
  74        fn init(oid_format: Format, buffer: []u8) Hashing {
  75            return switch (oid_format) {
  76                .sha1 => .{ .sha1 = .init(buffer) },
  77                .sha256 => .{ .sha256 = .init(buffer) },
  78            };
  79        }
  80
  81        fn writer(h: *@This()) *Io.Writer {
  82            return switch (h.*) {
  83                inline else => |*inner| &inner.writer,
  84            };
  85        }
  86
  87        fn final(h: *@This()) Oid {
  88            switch (h.*) {
  89                inline else => |*inner, tag| {
  90                    inner.writer.flush() catch unreachable; // hashers cannot fail
  91                    return @unionInit(Oid, @tagName(tag), inner.hasher.finalResult());
  92                },
  93            }
  94        }
  95    };
  96
  97    pub fn fromBytes(oid_format: Format, bytes: []const u8) Oid {
  98        assert(bytes.len == oid_format.byteLength());
  99        return switch (oid_format) {
 100            inline else => |tag| @unionInit(Oid, @tagName(tag), bytes[0..comptime tag.byteLength()].*),
 101        };
 102    }
 103
 104    pub fn readBytes(oid_format: Format, reader: *Io.Reader) !Oid {
 105        return switch (oid_format) {
 106            inline else => |tag| @unionInit(Oid, @tagName(tag), (try reader.takeArray(tag.byteLength())).*),
 107        };
 108    }
 109
 110    pub fn parse(oid_format: Format, s: []const u8) error{InvalidOid}!Oid {
 111        switch (oid_format) {
 112            inline else => |tag| {
 113                if (s.len != tag.formattedLength()) return error.InvalidOid;
 114                var bytes: [tag.byteLength()]u8 = undefined;
 115                for (&bytes, 0..) |*b, i| {
 116                    b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
 117                }
 118                return @unionInit(Oid, @tagName(tag), bytes);
 119            },
 120        }
 121    }
 122
 123    test parse {
 124        try testing.expectEqualSlices(
 125            u8,
 126            &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
 127            &(try parse(.sha1, "ce919ccf45951856a762ffdb8ef850301cd8c588")).sha1,
 128        );
 129        try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf45951856a762ffdb8ef850301cd8c588"));
 130        try testing.expectError(error.InvalidOid, parse(.sha1, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a"));
 131        try testing.expectEqualSlices(
 132            u8,
 133            &.{ 0x7F, 0x44, 0x4A, 0x92, 0xBD, 0x45, 0x72, 0xEE, 0x4A, 0x28, 0xB2, 0xC6, 0x30, 0x59, 0x92, 0x4A, 0x9C, 0xA1, 0x82, 0x91, 0x38, 0x55, 0x3E, 0xF3, 0xE7, 0xC4, 0x1E, 0xE1, 0x59, 0xAF, 0xAE, 0x7A },
 134            &(try parse(.sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a")).sha256,
 135        );
 136        try testing.expectError(error.InvalidOid, parse(.sha1, "ce919ccf"));
 137        try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf"));
 138        try testing.expectError(error.InvalidOid, parse(.sha1, "master"));
 139        try testing.expectError(error.InvalidOid, parse(.sha256, "master"));
 140        try testing.expectError(error.InvalidOid, parse(.sha1, "HEAD"));
 141        try testing.expectError(error.InvalidOid, parse(.sha256, "HEAD"));
 142    }
 143
 144    pub fn parseAny(s: []const u8) error{InvalidOid}!Oid {
 145        return for (std.enums.values(Format)) |f| {
 146            if (s.len == f.formattedLength()) break parse(f, s);
 147        } else error.InvalidOid;
 148    }
 149
 150    pub fn format(oid: Oid, writer: *Io.Writer) Io.Writer.Error!void {
 151        try writer.print("{x}", .{oid.slice()});
 152    }
 153
 154    pub fn slice(oid: *const Oid) []const u8 {
 155        return switch (oid.*) {
 156            inline else => |*bytes| bytes,
 157        };
 158    }
 159};
 160
 161pub const Diagnostics = struct {
 162    allocator: Allocator,
 163    errors: std.ArrayList(Error) = .empty,
 164
 165    pub const Error = union(enum) {
 166        unable_to_create_sym_link: struct {
 167            code: anyerror,
 168            file_name: []const u8,
 169            link_name: []const u8,
 170        },
 171        unable_to_create_file: struct {
 172            code: anyerror,
 173            file_name: []const u8,
 174        },
 175    };
 176
 177    pub fn deinit(d: *Diagnostics) void {
 178        for (d.errors.items) |item| {
 179            switch (item) {
 180                .unable_to_create_sym_link => |info| {
 181                    d.allocator.free(info.file_name);
 182                    d.allocator.free(info.link_name);
 183                },
 184                .unable_to_create_file => |info| {
 185                    d.allocator.free(info.file_name);
 186                },
 187            }
 188        }
 189        d.errors.deinit(d.allocator);
 190        d.* = undefined;
 191    }
 192};
 193
 194pub const Repository = struct {
 195    odb: Odb,
 196
 197    pub fn init(
 198        repo: *Repository,
 199        allocator: Allocator,
 200        format: Oid.Format,
 201        pack_file: *std.fs.File.Reader,
 202        index_file: *std.fs.File.Reader,
 203    ) !void {
 204        repo.* = .{ .odb = undefined };
 205        try repo.odb.init(allocator, format, pack_file, index_file);
 206    }
 207
 208    pub fn deinit(repository: *Repository) void {
 209        repository.odb.deinit();
 210        repository.* = undefined;
 211    }
 212
 213    /// Checks out the repository at `commit_oid` to `worktree`.
 214    pub fn checkout(
 215        repository: *Repository,
 216        worktree: std.fs.Dir,
 217        commit_oid: Oid,
 218        diagnostics: *Diagnostics,
 219    ) !void {
 220        try repository.odb.seekOid(commit_oid);
 221        const tree_oid = tree_oid: {
 222            const commit_object = try repository.odb.readObject();
 223            if (commit_object.type != .commit) return error.NotACommit;
 224            break :tree_oid try getCommitTree(repository.odb.format, commit_object.data);
 225        };
 226        try repository.checkoutTree(worktree, tree_oid, "", diagnostics);
 227    }
 228
 229    /// Checks out the tree at `tree_oid` to `worktree`.
 230    fn checkoutTree(
 231        repository: *Repository,
 232        dir: std.fs.Dir,
 233        tree_oid: Oid,
 234        current_path: []const u8,
 235        diagnostics: *Diagnostics,
 236    ) !void {
 237        try repository.odb.seekOid(tree_oid);
 238        const tree_object = try repository.odb.readObject();
 239        if (tree_object.type != .tree) return error.NotATree;
 240        // The tree object may be evicted from the object cache while we're
 241        // iterating over it, so we can make a defensive copy here to make sure
 242        // it remains valid until we're done with it
 243        const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data);
 244        defer repository.odb.allocator.free(tree_data);
 245
 246        var tree_iter: TreeIterator = .{
 247            .format = repository.odb.format,
 248            .data = tree_data,
 249            .pos = 0,
 250        };
 251        while (try tree_iter.next()) |entry| {
 252            switch (entry.type) {
 253                .directory => {
 254                    try dir.makeDir(entry.name);
 255                    var subdir = try dir.openDir(entry.name, .{});
 256                    defer subdir.close();
 257                    const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name });
 258                    defer repository.odb.allocator.free(sub_path);
 259                    try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics);
 260                },
 261                .file => {
 262                    try repository.odb.seekOid(entry.oid);
 263                    const file_object = try repository.odb.readObject();
 264                    if (file_object.type != .blob) return error.InvalidFile;
 265                    var file = dir.createFile(entry.name, .{ .exclusive = true }) catch |e| {
 266                        const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name });
 267                        errdefer diagnostics.allocator.free(file_name);
 268                        try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_file = .{
 269                            .code = e,
 270                            .file_name = file_name,
 271                        } });
 272                        continue;
 273                    };
 274                    defer file.close();
 275                    try file.writeAll(file_object.data);
 276                },
 277                .symlink => {
 278                    try repository.odb.seekOid(entry.oid);
 279                    const symlink_object = try repository.odb.readObject();
 280                    if (symlink_object.type != .blob) return error.InvalidFile;
 281                    const link_name = symlink_object.data;
 282                    dir.symLink(link_name, entry.name, .{}) catch |e| {
 283                        const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name });
 284                        errdefer diagnostics.allocator.free(file_name);
 285                        const link_name_dup = try diagnostics.allocator.dupe(u8, link_name);
 286                        errdefer diagnostics.allocator.free(link_name_dup);
 287                        try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{
 288                            .code = e,
 289                            .file_name = file_name,
 290                            .link_name = link_name_dup,
 291                        } });
 292                    };
 293                },
 294                .gitlink => {
 295                    // Consistent with git archive behavior, create the directory but
 296                    // do nothing else
 297                    try dir.makeDir(entry.name);
 298                },
 299            }
 300        }
 301    }
 302
 303    /// Returns the ID of the tree associated with the given commit (provided as
 304    /// raw object data).
 305    fn getCommitTree(format: Oid.Format, commit_data: []const u8) !Oid {
 306        if (!mem.startsWith(u8, commit_data, "tree ") or
 307            commit_data.len < "tree ".len + format.formattedLength() + "\n".len or
 308            commit_data["tree ".len + format.formattedLength()] != '\n')
 309        {
 310            return error.InvalidCommit;
 311        }
 312        return try .parse(format, commit_data["tree ".len..][0..format.formattedLength()]);
 313    }
 314
 315    const TreeIterator = struct {
 316        format: Oid.Format,
 317        data: []const u8,
 318        pos: usize,
 319
 320        const Entry = struct {
 321            type: Type,
 322            executable: bool,
 323            name: [:0]const u8,
 324            oid: Oid,
 325
 326            const Type = enum(u4) {
 327                directory = 0o4,
 328                file = 0o10,
 329                symlink = 0o12,
 330                gitlink = 0o16,
 331            };
 332        };
 333
 334        fn next(iterator: *TreeIterator) !?Entry {
 335            if (iterator.pos == iterator.data.len) return null;
 336
 337            const mode_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, ' ') orelse return error.InvalidTree;
 338            const mode: packed struct {
 339                permission: u9,
 340                unused: u3,
 341                type: u4,
 342            } = @bitCast(std.fmt.parseUnsigned(u16, iterator.data[iterator.pos..mode_end], 8) catch return error.InvalidTree);
 343            const @"type" = std.enums.fromInt(Entry.Type, mode.type) orelse return error.InvalidTree;
 344            const executable = switch (mode.permission) {
 345                0 => if (@"type" == .file) return error.InvalidTree else false,
 346                0o644 => if (@"type" != .file) return error.InvalidTree else false,
 347                0o755 => if (@"type" != .file) return error.InvalidTree else true,
 348                else => return error.InvalidTree,
 349            };
 350            iterator.pos = mode_end + 1;
 351
 352            const name_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, 0) orelse return error.InvalidTree;
 353            const name = iterator.data[iterator.pos..name_end :0];
 354            iterator.pos = name_end + 1;
 355
 356            const oid_length = iterator.format.byteLength();
 357            if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree;
 358            const oid: Oid = .fromBytes(iterator.format, iterator.data[iterator.pos..][0..oid_length]);
 359            iterator.pos += oid_length;
 360
 361            return .{ .type = @"type", .executable = executable, .name = name, .oid = oid };
 362        }
 363    };
 364};
 365
 366/// A Git object database backed by a packfile. A packfile index is also used
 367/// for efficient access to objects in the packfile.
 368///
 369/// The format of the packfile and its associated index are documented in
 370/// [pack-format](https://git-scm.com/docs/pack-format).
 371const Odb = struct {
 372    format: Oid.Format,
 373    pack_file: *std.fs.File.Reader,
 374    index_header: IndexHeader,
 375    index_file: *std.fs.File.Reader,
 376    cache: ObjectCache = .{},
 377    allocator: Allocator,
 378
 379    /// Initializes the database from open pack and index files.
 380    fn init(
 381        odb: *Odb,
 382        allocator: Allocator,
 383        format: Oid.Format,
 384        pack_file: *std.fs.File.Reader,
 385        index_file: *std.fs.File.Reader,
 386    ) !void {
 387        try pack_file.seekTo(0);
 388        try index_file.seekTo(0);
 389        odb.* = .{
 390            .format = format,
 391            .pack_file = pack_file,
 392            .index_header = undefined,
 393            .index_file = index_file,
 394            .allocator = allocator,
 395        };
 396        try odb.index_header.read(&index_file.interface);
 397    }
 398
 399    fn deinit(odb: *Odb) void {
 400        odb.cache.deinit(odb.allocator);
 401        odb.* = undefined;
 402    }
 403
 404    /// Reads the object at the current position in the database.
 405    fn readObject(odb: *Odb) !Object {
 406        var base_offset = odb.pack_file.logicalPos();
 407        var base_header: EntryHeader = undefined;
 408        var delta_offsets: std.ArrayList(u64) = .empty;
 409        defer delta_offsets.deinit(odb.allocator);
 410        const base_object = while (true) {
 411            if (odb.cache.get(base_offset)) |base_object| break base_object;
 412
 413            base_header = try EntryHeader.read(odb.format, &odb.pack_file.interface);
 414            switch (base_header) {
 415                .ofs_delta => |ofs_delta| {
 416                    try delta_offsets.append(odb.allocator, base_offset);
 417                    base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidFormat;
 418                    try odb.pack_file.seekTo(base_offset);
 419                },
 420                .ref_delta => |ref_delta| {
 421                    try delta_offsets.append(odb.allocator, base_offset);
 422                    try odb.seekOid(ref_delta.base_object);
 423                    base_offset = odb.pack_file.logicalPos();
 424                },
 425                else => {
 426                    const base_data = try readObjectRaw(odb.allocator, &odb.pack_file.interface, base_header.uncompressedLength());
 427                    errdefer odb.allocator.free(base_data);
 428                    const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
 429                    try odb.cache.put(odb.allocator, base_offset, base_object);
 430                    break base_object;
 431                },
 432            }
 433        };
 434
 435        const base_data = try resolveDeltaChain(
 436            odb.allocator,
 437            odb.format,
 438            odb.pack_file,
 439            base_object,
 440            delta_offsets.items,
 441            &odb.cache,
 442        );
 443
 444        return .{ .type = base_object.type, .data = base_data };
 445    }
 446
 447    /// Seeks to the beginning of the object with the given ID.
 448    fn seekOid(odb: *Odb, oid: Oid) !void {
 449        const oid_length = odb.format.byteLength();
 450        const key = oid.slice()[0];
 451        var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0;
 452        var end_index = odb.index_header.fan_out_table[key];
 453        const found_index = while (start_index < end_index) {
 454            const mid_index = start_index + (end_index - start_index) / 2;
 455            try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
 456            const mid_oid = try Oid.readBytes(odb.format, &odb.index_file.interface);
 457            switch (mem.order(u8, mid_oid.slice(), oid.slice())) {
 458                .lt => start_index = mid_index + 1,
 459                .gt => end_index = mid_index,
 460                .eq => break mid_index,
 461            }
 462        } else return error.ObjectNotFound;
 463
 464        const n_objects = odb.index_header.fan_out_table[255];
 465        const offset_values_start = IndexHeader.size + n_objects * (oid_length + 4);
 466        try odb.index_file.seekTo(offset_values_start + found_index * 4);
 467        const l1_offset: packed struct { value: u31, big: bool } = @bitCast(try odb.index_file.interface.takeInt(u32, .big));
 468        const pack_offset = pack_offset: {
 469            if (l1_offset.big) {
 470                const l2_offset_values_start = offset_values_start + n_objects * 4;
 471                try odb.index_file.seekTo(l2_offset_values_start + l1_offset.value * 4);
 472                break :pack_offset try odb.index_file.interface.takeInt(u64, .big);
 473            } else {
 474                break :pack_offset l1_offset.value;
 475            }
 476        };
 477
 478        try odb.pack_file.seekTo(pack_offset);
 479    }
 480};
 481
 482const Object = struct {
 483    type: Type,
 484    data: []const u8,
 485
 486    const Type = enum {
 487        commit,
 488        tree,
 489        blob,
 490        tag,
 491    };
 492};
 493
 494/// A cache for object data.
 495///
 496/// The purpose of this cache is to speed up resolution of deltas by caching the
 497/// results of resolving delta objects, while maintaining a maximum cache size
 498/// to avoid excessive memory usage. If the total size of the objects in the
 499/// cache exceeds the maximum, the cache will begin evicting the least recently
 500/// used objects: when resolving delta chains, the most recently used objects
 501/// will likely be more helpful as they will be further along in the chain
 502/// (skipping earlier reconstruction steps).
 503///
 504/// Object data stored in the cache is managed by the cache. It should not be
 505/// freed by the caller at any point after inserting it into the cache. Any
 506/// objects remaining in the cache will be freed when the cache itself is freed.
 507const ObjectCache = struct {
 508    objects: std.AutoHashMapUnmanaged(u64, CacheEntry) = .empty,
 509    lru_nodes: std.DoublyLinkedList = .{},
 510    lru_nodes_len: usize = 0,
 511    byte_size: usize = 0,
 512
 513    const max_byte_size = 128 * 1024 * 1024; // 128MiB
 514    /// A list of offsets stored in the cache, with the most recently used
 515    /// entries at the end.
 516    const LruListNode = struct {
 517        data: u64,
 518        node: std.DoublyLinkedList.Node,
 519    };
 520    const CacheEntry = struct { object: Object, lru_node: *LruListNode };
 521
 522    fn deinit(cache: *ObjectCache, allocator: Allocator) void {
 523        var object_iterator = cache.objects.iterator();
 524        while (object_iterator.next()) |object| {
 525            allocator.free(object.value_ptr.object.data);
 526            allocator.destroy(object.value_ptr.lru_node);
 527        }
 528        cache.objects.deinit(allocator);
 529        cache.* = undefined;
 530    }
 531
 532    /// Gets an object from the cache, moving it to the most recently used
 533    /// position if it is present.
 534    fn get(cache: *ObjectCache, offset: u64) ?Object {
 535        if (cache.objects.get(offset)) |entry| {
 536            cache.lru_nodes.remove(&entry.lru_node.node);
 537            cache.lru_nodes.append(&entry.lru_node.node);
 538            return entry.object;
 539        } else {
 540            return null;
 541        }
 542    }
 543
 544    /// Puts an object in the cache, possibly evicting older entries if the
 545    /// cache exceeds its maximum size. Note that, although old objects may
 546    /// be evicted, the object just added to the cache with this function
 547    /// will not be evicted before the next call to `put` or `deinit` even if
 548    /// it exceeds the maximum cache size.
 549    fn put(cache: *ObjectCache, allocator: Allocator, offset: u64, object: Object) !void {
 550        const lru_node = try allocator.create(LruListNode);
 551        errdefer allocator.destroy(lru_node);
 552        lru_node.data = offset;
 553
 554        const gop = try cache.objects.getOrPut(allocator, offset);
 555        if (gop.found_existing) {
 556            cache.byte_size -= gop.value_ptr.object.data.len;
 557            cache.lru_nodes.remove(&gop.value_ptr.lru_node.node);
 558            cache.lru_nodes_len -= 1;
 559            allocator.destroy(gop.value_ptr.lru_node);
 560            allocator.free(gop.value_ptr.object.data);
 561        }
 562        gop.value_ptr.* = .{ .object = object, .lru_node = lru_node };
 563        cache.byte_size += object.data.len;
 564        cache.lru_nodes.append(&lru_node.node);
 565        cache.lru_nodes_len += 1;
 566
 567        while (cache.byte_size > max_byte_size and cache.lru_nodes_len > 1) {
 568            // The > 1 check is to make sure that we don't evict the most
 569            // recently added node, even if it by itself happens to exceed the
 570            // maximum size of the cache.
 571            const evict_node: *LruListNode = @alignCast(@fieldParentPtr("node", cache.lru_nodes.popFirst().?));
 572            cache.lru_nodes_len -= 1;
 573            const evict_offset = evict_node.data;
 574            allocator.destroy(evict_node);
 575            const evict_object = cache.objects.get(evict_offset).?.object;
 576            cache.byte_size -= evict_object.data.len;
 577            allocator.free(evict_object.data);
 578            _ = cache.objects.remove(evict_offset);
 579        }
 580    }
 581};
 582
 583/// A single pkt-line in the Git protocol.
 584///
 585/// The format of a pkt-line is documented in
 586/// [protocol-common](https://git-scm.com/docs/protocol-common). The special
 587/// meanings of the delimiter and response-end packets are documented in
 588/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
 589pub const Packet = union(enum) {
 590    flush,
 591    delimiter,
 592    response_end,
 593    data: []const u8,
 594
 595    pub const max_data_length = 65516;
 596
 597    /// Reads a packet in pkt-line format.
 598    fn read(reader: *Io.Reader) !Packet {
 599        const packet: Packet = try .peek(reader);
 600        switch (packet) {
 601            .data => |data| reader.toss(data.len),
 602            else => {},
 603        }
 604        return packet;
 605    }
 606
 607    /// Consumes the header of a pkt-line packet and reads any associated data
 608    /// into the reader's buffer, but does not consume the data.
 609    fn peek(reader: *Io.Reader) !Packet {
 610        const length = std.fmt.parseUnsigned(u16, try reader.take(4), 16) catch return error.InvalidPacket;
 611        switch (length) {
 612            0 => return .flush,
 613            1 => return .delimiter,
 614            2 => return .response_end,
 615            3 => return error.InvalidPacket,
 616            else => if (length - 4 > max_data_length) return error.InvalidPacket,
 617        }
 618        return .{ .data = try reader.peek(length - 4) };
 619    }
 620
 621    /// Writes a packet in pkt-line format.
 622    fn write(packet: Packet, writer: *Io.Writer) !void {
 623        switch (packet) {
 624            .flush => try writer.writeAll("0000"),
 625            .delimiter => try writer.writeAll("0001"),
 626            .response_end => try writer.writeAll("0002"),
 627            .data => |data| {
 628                assert(data.len <= max_data_length);
 629                try writer.print("{x:0>4}", .{data.len + 4});
 630                try writer.writeAll(data);
 631            },
 632        }
 633    }
 634
 635    /// Returns the normalized form of textual packet data, stripping any
 636    /// trailing '\n'.
 637    ///
 638    /// As documented in
 639    /// [protocol-common](https://git-scm.com/docs/protocol-common#_pkt_line_format),
 640    /// non-binary (textual) pkt-line data should contain a trailing '\n', but
 641    /// is not required to do so (implementations must support both forms).
 642    fn normalizeText(data: []const u8) []const u8 {
 643        return if (mem.endsWith(u8, data, "\n"))
 644            data[0 .. data.len - 1]
 645        else
 646            data;
 647    }
 648};
 649
 650/// A client session for the Git protocol, currently limited to an HTTP(S)
 651/// transport. Only protocol version 2 is supported, as documented in
 652/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
 653pub const Session = struct {
 654    transport: *std.http.Client,
 655    location: Location,
 656    supports_agent: bool,
 657    supports_shallow: bool,
 658    object_format: Oid.Format,
 659    arena: Allocator,
 660
 661    const agent = "zig/" ++ @import("builtin").zig_version_string;
 662    const agent_capability = std.fmt.comptimePrint("agent={s}\n", .{agent});
 663
 664    /// Initializes a client session and discovers the capabilities of the
 665    /// server for optimal transport.
 666    pub fn init(
 667        arena: Allocator,
 668        transport: *std.http.Client,
 669        uri: std.Uri,
 670        /// Asserted to be at least `Packet.max_data_length`
 671        response_buffer: []u8,
 672    ) !Session {
 673        assert(response_buffer.len >= Packet.max_data_length);
 674        var session: Session = .{
 675            .transport = transport,
 676            .location = try .init(arena, uri),
 677            .supports_agent = false,
 678            .supports_shallow = false,
 679            .object_format = .sha1,
 680            .arena = arena,
 681        };
 682        var capability_iterator: CapabilityIterator = undefined;
 683        try session.getCapabilities(&capability_iterator, response_buffer);
 684        defer capability_iterator.deinit();
 685        while (try capability_iterator.next()) |capability| {
 686            if (mem.eql(u8, capability.key, "agent")) {
 687                session.supports_agent = true;
 688            } else if (mem.eql(u8, capability.key, "fetch")) {
 689                var feature_iterator = mem.splitScalar(u8, capability.value orelse continue, ' ');
 690                while (feature_iterator.next()) |feature| {
 691                    if (mem.eql(u8, feature, "shallow")) {
 692                        session.supports_shallow = true;
 693                    }
 694                }
 695            } else if (mem.eql(u8, capability.key, "object-format")) {
 696                if (std.meta.stringToEnum(Oid.Format, capability.value orelse continue)) |format| {
 697                    session.object_format = format;
 698                }
 699            }
 700        }
 701        return session;
 702    }
 703
 704    /// An owned `std.Uri` representing the location of the server (base URI).
 705    const Location = struct {
 706        uri: std.Uri,
 707
 708        fn init(arena: Allocator, uri: std.Uri) !Location {
 709            const scheme = try arena.dupe(u8, uri.scheme);
 710            const user = if (uri.user) |user| try std.fmt.allocPrint(arena, "{f}", .{
 711                std.fmt.alt(user, .formatUser),
 712            }) else null;
 713            const password = if (uri.password) |password| try std.fmt.allocPrint(arena, "{f}", .{
 714                std.fmt.alt(password, .formatPassword),
 715            }) else null;
 716            const host = if (uri.host) |host| try std.fmt.allocPrint(arena, "{f}", .{
 717                std.fmt.alt(host, .formatHost),
 718            }) else null;
 719            const path = try std.fmt.allocPrint(arena, "{f}", .{
 720                std.fmt.alt(uri.path, .formatPath),
 721            });
 722            // The query and fragment are not used as part of the base server URI.
 723            return .{
 724                .uri = .{
 725                    .scheme = scheme,
 726                    .user = if (user) |s| .{ .percent_encoded = s } else null,
 727                    .password = if (password) |s| .{ .percent_encoded = s } else null,
 728                    .host = if (host) |s| .{ .percent_encoded = s } else null,
 729                    .port = uri.port,
 730                    .path = .{ .percent_encoded = path },
 731                },
 732            };
 733        }
 734    };
 735
 736    /// Returns an iterator over capabilities supported by the server.
 737    ///
 738    /// The `session.location` is updated if the server returns a redirect, so
 739    /// that subsequent session functions do not need to handle redirects.
 740    fn getCapabilities(session: *Session, it: *CapabilityIterator, response_buffer: []u8) !void {
 741        const arena = session.arena;
 742        assert(response_buffer.len >= Packet.max_data_length);
 743        var info_refs_uri = session.location.uri;
 744        {
 745            const session_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
 746                std.fmt.alt(session.location.uri.path, .formatPath),
 747            });
 748            info_refs_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(arena, &.{
 749                "/", session_uri_path, "info/refs",
 750            }) };
 751        }
 752        info_refs_uri.query = .{ .percent_encoded = "service=git-upload-pack" };
 753        info_refs_uri.fragment = null;
 754
 755        const max_redirects = 3;
 756        it.* = .{
 757            .request = try session.transport.request(.GET, info_refs_uri, .{
 758                .redirect_behavior = .init(max_redirects),
 759                .extra_headers = &.{
 760                    .{ .name = "Git-Protocol", .value = "version=2" },
 761                },
 762            }),
 763            .reader = undefined,
 764            .decompress = undefined,
 765        };
 766        errdefer it.deinit();
 767        const request = &it.request;
 768        try request.sendBodiless();
 769
 770        var redirect_buffer: [1024]u8 = undefined;
 771        var response = try request.receiveHead(&redirect_buffer);
 772        if (response.head.status != .ok) return error.ProtocolError;
 773        const any_redirects_occurred = request.redirect_behavior.remaining() < max_redirects;
 774        if (any_redirects_occurred) {
 775            const request_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
 776                std.fmt.alt(request.uri.path, .formatPath),
 777            });
 778            if (!mem.endsWith(u8, request_uri_path, "/info/refs")) return error.UnparseableRedirect;
 779            var new_uri = request.uri;
 780            new_uri.path = .{ .percent_encoded = request_uri_path[0 .. request_uri_path.len - "/info/refs".len] };
 781            session.location = try .init(arena, new_uri);
 782        }
 783
 784        const decompress_buffer = try arena.alloc(u8, response.head.content_encoding.minBufferCapacity());
 785        it.reader = response.readerDecompressing(response_buffer, &it.decompress, decompress_buffer);
 786        var state: enum { response_start, response_content } = .response_start;
 787        while (true) {
 788            // Some Git servers (at least GitHub) include an additional
 789            // '# service=git-upload-pack' informative response before sending
 790            // the expected 'version 2' packet and capability information.
 791            // This is not universal: SourceHut, for example, does not do this.
 792            // Thus, we need to skip any such useless additional responses
 793            // before we get the one we're actually looking for. The responses
 794            // will be delimited by flush packets.
 795            const packet = Packet.read(it.reader) catch |err| switch (err) {
 796                error.EndOfStream => return error.UnsupportedProtocol, // 'version 2' packet not found
 797                else => |e| return e,
 798            };
 799            switch (packet) {
 800                .flush => state = .response_start,
 801                .data => |data| switch (state) {
 802                    .response_start => if (mem.eql(u8, Packet.normalizeText(data), "version 2")) {
 803                        return;
 804                    } else {
 805                        state = .response_content;
 806                    },
 807                    else => {},
 808                },
 809                else => return error.UnexpectedPacket,
 810            }
 811        }
 812    }
 813
 814    const CapabilityIterator = struct {
 815        request: std.http.Client.Request,
 816        reader: *Io.Reader,
 817        decompress: std.http.Decompress,
 818
 819        const Capability = struct {
 820            key: []const u8,
 821            value: ?[]const u8 = null,
 822
 823            fn parse(data: []const u8) Capability {
 824                return if (mem.indexOfScalar(u8, data, '=')) |separator_pos|
 825                    .{ .key = data[0..separator_pos], .value = data[separator_pos + 1 ..] }
 826                else
 827                    .{ .key = data };
 828            }
 829        };
 830
 831        fn deinit(it: *CapabilityIterator) void {
 832            it.request.deinit();
 833            it.* = undefined;
 834        }
 835
 836        fn next(it: *CapabilityIterator) !?Capability {
 837            switch (try Packet.read(it.reader)) {
 838                .flush => return null,
 839                .data => |data| return Capability.parse(Packet.normalizeText(data)),
 840                else => return error.UnexpectedPacket,
 841            }
 842        }
 843    };
 844
 845    const ListRefsOptions = struct {
 846        /// The ref prefixes (if any) to use to filter the refs available on the
 847        /// server. Note that the client must still check the returned refs
 848        /// against its desired filters itself: the server is not required to
 849        /// respect these prefix filters and may return other refs as well.
 850        ref_prefixes: []const []const u8 = &.{},
 851        /// Whether to include symref targets for returned symbolic refs.
 852        include_symrefs: bool = false,
 853        /// Whether to include the peeled object ID for returned tag refs.
 854        include_peeled: bool = false,
 855        /// Asserted to be at least `Packet.max_data_length`.
 856        buffer: []u8,
 857    };
 858
 859    /// Returns an iterator over refs known to the server.
 860    pub fn listRefs(session: Session, it: *RefIterator, options: ListRefsOptions) !void {
 861        const arena = session.arena;
 862        assert(options.buffer.len >= Packet.max_data_length);
 863        var upload_pack_uri = session.location.uri;
 864        {
 865            const session_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
 866                std.fmt.alt(session.location.uri.path, .formatPath),
 867            });
 868            upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(arena, &.{ "/", session_uri_path, "git-upload-pack" }) };
 869        }
 870        upload_pack_uri.query = null;
 871        upload_pack_uri.fragment = null;
 872
 873        var body: Io.Writer = .fixed(options.buffer);
 874        try Packet.write(.{ .data = "command=ls-refs\n" }, &body);
 875        if (session.supports_agent) {
 876            try Packet.write(.{ .data = agent_capability }, &body);
 877        }
 878        {
 879            const object_format_packet = try std.fmt.allocPrint(arena, "object-format={t}\n", .{
 880                session.object_format,
 881            });
 882            try Packet.write(.{ .data = object_format_packet }, &body);
 883        }
 884        try Packet.write(.delimiter, &body);
 885        for (options.ref_prefixes) |ref_prefix| {
 886            const ref_prefix_packet = try std.fmt.allocPrint(arena, "ref-prefix {s}\n", .{ref_prefix});
 887            try Packet.write(.{ .data = ref_prefix_packet }, &body);
 888        }
 889        if (options.include_symrefs) {
 890            try Packet.write(.{ .data = "symrefs\n" }, &body);
 891        }
 892        if (options.include_peeled) {
 893            try Packet.write(.{ .data = "peel\n" }, &body);
 894        }
 895        try Packet.write(.flush, &body);
 896
 897        it.* = .{
 898            .request = try session.transport.request(.POST, upload_pack_uri, .{
 899                .redirect_behavior = .unhandled,
 900                .extra_headers = &.{
 901                    .{ .name = "Content-Type", .value = "application/x-git-upload-pack-request" },
 902                    .{ .name = "Git-Protocol", .value = "version=2" },
 903                },
 904            }),
 905            .reader = undefined,
 906            .format = session.object_format,
 907            .decompress = undefined,
 908        };
 909        const request = &it.request;
 910        errdefer request.deinit();
 911        try request.sendBodyComplete(body.buffered());
 912
 913        var response = try request.receiveHead(options.buffer);
 914        if (response.head.status != .ok) return error.ProtocolError;
 915        const decompress_buffer = try arena.alloc(u8, response.head.content_encoding.minBufferCapacity());
 916        it.reader = response.readerDecompressing(options.buffer, &it.decompress, decompress_buffer);
 917    }
 918
 919    pub const RefIterator = struct {
 920        format: Oid.Format,
 921        request: std.http.Client.Request,
 922        reader: *Io.Reader,
 923        decompress: std.http.Decompress,
 924
 925        pub const Ref = struct {
 926            oid: Oid,
 927            name: []const u8,
 928            symref_target: ?[]const u8,
 929            peeled: ?Oid,
 930        };
 931
 932        pub fn deinit(iterator: *RefIterator) void {
 933            iterator.request.deinit();
 934            iterator.* = undefined;
 935        }
 936
 937        pub fn next(it: *RefIterator) !?Ref {
 938            switch (try Packet.read(it.reader)) {
 939                .flush => return null,
 940                .data => |data| {
 941                    const ref_data = Packet.normalizeText(data);
 942                    const oid_sep_pos = mem.indexOfScalar(u8, ref_data, ' ') orelse return error.InvalidRefPacket;
 943                    const oid = Oid.parse(it.format, data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
 944
 945                    const name_sep_pos = mem.indexOfScalarPos(u8, ref_data, oid_sep_pos + 1, ' ') orelse ref_data.len;
 946                    const name = ref_data[oid_sep_pos + 1 .. name_sep_pos];
 947
 948                    var symref_target: ?[]const u8 = null;
 949                    var peeled: ?Oid = null;
 950                    var last_sep_pos = name_sep_pos;
 951                    while (last_sep_pos < ref_data.len) {
 952                        const next_sep_pos = mem.indexOfScalarPos(u8, ref_data, last_sep_pos + 1, ' ') orelse ref_data.len;
 953                        const attribute = ref_data[last_sep_pos + 1 .. next_sep_pos];
 954                        if (mem.startsWith(u8, attribute, "symref-target:")) {
 955                            symref_target = attribute["symref-target:".len..];
 956                        } else if (mem.startsWith(u8, attribute, "peeled:")) {
 957                            peeled = Oid.parse(it.format, attribute["peeled:".len..]) catch return error.InvalidRefPacket;
 958                        }
 959                        last_sep_pos = next_sep_pos;
 960                    }
 961
 962                    return .{ .oid = oid, .name = name, .symref_target = symref_target, .peeled = peeled };
 963                },
 964                else => return error.UnexpectedPacket,
 965            }
 966        }
 967    };
 968
 969    /// Fetches the given refs from the server. A shallow fetch (depth 1) is
 970    /// performed if the server supports it.
 971    pub fn fetch(
 972        session: Session,
 973        fs: *FetchStream,
 974        wants: []const []const u8,
 975        /// Asserted to be at least `Packet.max_data_length`.
 976        response_buffer: []u8,
 977    ) !void {
 978        const arena = session.arena;
 979        assert(response_buffer.len >= Packet.max_data_length);
 980        var upload_pack_uri = session.location.uri;
 981        {
 982            const session_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
 983                std.fmt.alt(session.location.uri.path, .formatPath),
 984            });
 985            upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(arena, &.{ "/", session_uri_path, "git-upload-pack" }) };
 986        }
 987        upload_pack_uri.query = null;
 988        upload_pack_uri.fragment = null;
 989
 990        var body: Io.Writer = .fixed(response_buffer);
 991        try Packet.write(.{ .data = "command=fetch\n" }, &body);
 992        if (session.supports_agent) {
 993            try Packet.write(.{ .data = agent_capability }, &body);
 994        }
 995        {
 996            const object_format_packet = try std.fmt.allocPrint(arena, "object-format={s}\n", .{@tagName(session.object_format)});
 997            try Packet.write(.{ .data = object_format_packet }, &body);
 998        }
 999        try Packet.write(.delimiter, &body);
1000        // Our packfile parser supports the OFS_DELTA object type
1001        try Packet.write(.{ .data = "ofs-delta\n" }, &body);
1002        // We do not currently convey server progress information to the user
1003        try Packet.write(.{ .data = "no-progress\n" }, &body);
1004        if (session.supports_shallow) {
1005            try Packet.write(.{ .data = "deepen 1\n" }, &body);
1006        }
1007        for (wants) |want| {
1008            var buf: [Packet.max_data_length]u8 = undefined;
1009            const arg = std.fmt.bufPrint(&buf, "want {s}\n", .{want}) catch unreachable;
1010            try Packet.write(.{ .data = arg }, &body);
1011        }
1012        try Packet.write(.{ .data = "done\n" }, &body);
1013        try Packet.write(.flush, &body);
1014
1015        fs.* = .{
1016            .request = try session.transport.request(.POST, upload_pack_uri, .{
1017                .redirect_behavior = .not_allowed,
1018                .extra_headers = &.{
1019                    .{ .name = "Content-Type", .value = "application/x-git-upload-pack-request" },
1020                    .{ .name = "Git-Protocol", .value = "version=2" },
1021                },
1022            }),
1023            .input = undefined,
1024            .reader = undefined,
1025            .remaining_len = undefined,
1026            .decompress = undefined,
1027        };
1028        const request = &fs.request;
1029        errdefer request.deinit();
1030
1031        try request.sendBodyComplete(body.buffered());
1032
1033        var response = try request.receiveHead(&.{});
1034        if (response.head.status != .ok) return error.ProtocolError;
1035
1036        const decompress_buffer = try arena.alloc(u8, response.head.content_encoding.minBufferCapacity());
1037        const reader = response.readerDecompressing(response_buffer, &fs.decompress, decompress_buffer);
1038        // We are not interested in any of the sections of the returned fetch
1039        // data other than the packfile section, since we aren't doing anything
1040        // complex like ref negotiation (this is a fresh clone).
1041        var state: enum { section_start, section_content } = .section_start;
1042        while (true) {
1043            const packet = try Packet.read(reader);
1044            switch (state) {
1045                .section_start => switch (packet) {
1046                    .data => |data| if (mem.eql(u8, Packet.normalizeText(data), "packfile")) {
1047                        fs.input = reader;
1048                        fs.reader = .{
1049                            .buffer = &.{},
1050                            .vtable = &.{ .stream = FetchStream.stream },
1051                            .seek = 0,
1052                            .end = 0,
1053                        };
1054                        fs.remaining_len = 0;
1055                        return;
1056                    } else {
1057                        state = .section_content;
1058                    },
1059                    else => return error.UnexpectedPacket,
1060                },
1061                .section_content => switch (packet) {
1062                    .delimiter => state = .section_start,
1063                    .data => {},
1064                    else => return error.UnexpectedPacket,
1065                },
1066            }
1067        }
1068    }
1069
1070    pub const FetchStream = struct {
1071        request: std.http.Client.Request,
1072        input: *Io.Reader,
1073        reader: Io.Reader,
1074        err: ?Error = null,
1075        remaining_len: usize,
1076        decompress: std.http.Decompress,
1077
1078        pub fn deinit(fs: *FetchStream) void {
1079            fs.request.deinit();
1080        }
1081
1082        pub const Error = error{
1083            InvalidPacket,
1084            ProtocolError,
1085            UnexpectedPacket,
1086            WriteFailed,
1087            ReadFailed,
1088            EndOfStream,
1089        };
1090
1091        const StreamCode = enum(u8) {
1092            pack_data = 1,
1093            progress = 2,
1094            fatal_error = 3,
1095            _,
1096        };
1097
1098        pub fn stream(r: *Io.Reader, w: *Io.Writer, limit: Io.Limit) Io.Reader.StreamError!usize {
1099            const fs: *FetchStream = @alignCast(@fieldParentPtr("reader", r));
1100            const input = fs.input;
1101            if (fs.remaining_len == 0) {
1102                while (true) {
1103                    switch (Packet.peek(input) catch |err| {
1104                        fs.err = err;
1105                        return error.ReadFailed;
1106                    }) {
1107                        .flush => return error.EndOfStream,
1108                        .data => |data| if (data.len > 1) switch (@as(StreamCode, @enumFromInt(data[0]))) {
1109                            .pack_data => {
1110                                input.toss(1);
1111                                fs.remaining_len = data.len - 1;
1112                                break;
1113                            },
1114                            .fatal_error => {
1115                                fs.err = error.ProtocolError;
1116                                return error.ReadFailed;
1117                            },
1118                            else => {},
1119                        },
1120                        else => {
1121                            fs.err = error.UnexpectedPacket;
1122                            return error.ReadFailed;
1123                        },
1124                    }
1125                }
1126            }
1127            const buf = limit.slice(try w.writableSliceGreedy(1));
1128            const n = @min(buf.len, fs.remaining_len);
1129            try input.readSliceAll(buf[0..n]);
1130            w.advance(n);
1131            fs.remaining_len -= n;
1132            return n;
1133        }
1134    };
1135};
1136
1137const PackHeader = struct {
1138    total_objects: u32,
1139
1140    const signature = "PACK";
1141    const supported_version = 2;
1142
1143    fn read(reader: *Io.Reader) !PackHeader {
1144        const actual_signature = reader.take(4) catch |e| switch (e) {
1145            error.EndOfStream => return error.InvalidHeader,
1146            else => |other| return other,
1147        };
1148        if (!mem.eql(u8, actual_signature, signature)) return error.InvalidHeader;
1149        const version = reader.takeInt(u32, .big) catch |e| switch (e) {
1150            error.EndOfStream => return error.InvalidHeader,
1151            else => |other| return other,
1152        };
1153        if (version != supported_version) return error.UnsupportedVersion;
1154        const total_objects = reader.takeInt(u32, .big) catch |e| switch (e) {
1155            error.EndOfStream => return error.InvalidHeader,
1156            else => |other| return other,
1157        };
1158        return .{ .total_objects = total_objects };
1159    }
1160};
1161
1162const EntryHeader = union(Type) {
1163    commit: Undeltified,
1164    tree: Undeltified,
1165    blob: Undeltified,
1166    tag: Undeltified,
1167    ofs_delta: OfsDelta,
1168    ref_delta: RefDelta,
1169
1170    const Type = enum(u3) {
1171        commit = 1,
1172        tree = 2,
1173        blob = 3,
1174        tag = 4,
1175        ofs_delta = 6,
1176        ref_delta = 7,
1177    };
1178
1179    const Undeltified = struct {
1180        uncompressed_length: u64,
1181    };
1182
1183    const OfsDelta = struct {
1184        offset: u64,
1185        uncompressed_length: u64,
1186    };
1187
1188    const RefDelta = struct {
1189        base_object: Oid,
1190        uncompressed_length: u64,
1191    };
1192
1193    fn objectType(header: EntryHeader) Object.Type {
1194        return switch (header) {
1195            inline .commit, .tree, .blob, .tag => |_, tag| @field(Object.Type, @tagName(tag)),
1196            else => unreachable,
1197        };
1198    }
1199
1200    fn uncompressedLength(header: EntryHeader) u64 {
1201        return switch (header) {
1202            inline else => |entry| entry.uncompressed_length,
1203        };
1204    }
1205
1206    fn read(format: Oid.Format, reader: *Io.Reader) !EntryHeader {
1207        const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
1208        const initial: InitialByte = @bitCast(reader.takeByte() catch |e| switch (e) {
1209            error.EndOfStream => return error.InvalidFormat,
1210            else => |other| return other,
1211        });
1212        const rest_len = if (initial.has_next) try reader.takeLeb128(u64) else 0;
1213        var uncompressed_length: u64 = initial.len;
1214        uncompressed_length |= std.math.shlExact(u64, rest_len, 4) catch return error.InvalidFormat;
1215        const @"type" = std.enums.fromInt(EntryHeader.Type, initial.type) orelse return error.InvalidFormat;
1216        return switch (@"type") {
1217            inline .commit, .tree, .blob, .tag => |tag| @unionInit(EntryHeader, @tagName(tag), .{
1218                .uncompressed_length = uncompressed_length,
1219            }),
1220            .ofs_delta => .{ .ofs_delta = .{
1221                .offset = try readOffsetVarInt(reader),
1222                .uncompressed_length = uncompressed_length,
1223            } },
1224            .ref_delta => .{ .ref_delta = .{
1225                .base_object = Oid.readBytes(format, reader) catch |e| switch (e) {
1226                    error.EndOfStream => return error.InvalidFormat,
1227                    else => |other| return other,
1228                },
1229                .uncompressed_length = uncompressed_length,
1230            } },
1231        };
1232    }
1233};
1234
1235fn readOffsetVarInt(r: *Io.Reader) !u64 {
1236    const Byte = packed struct { value: u7, has_next: bool };
1237    var b: Byte = @bitCast(try r.takeByte());
1238    var value: u64 = b.value;
1239    while (b.has_next) {
1240        b = @bitCast(try r.takeByte());
1241        value = std.math.shlExact(u64, value + 1, 7) catch return error.InvalidFormat;
1242        value |= b.value;
1243    }
1244    return value;
1245}
1246
1247const IndexHeader = struct {
1248    fan_out_table: [256]u32,
1249
1250    const signature = "\xFFtOc";
1251    const supported_version = 2;
1252    const size = 4 + 4 + @sizeOf([256]u32);
1253
1254    fn read(index_header: *IndexHeader, reader: *Io.Reader) !void {
1255        const sig = try reader.take(4);
1256        if (!mem.eql(u8, sig, signature)) return error.InvalidHeader;
1257        const version = try reader.takeInt(u32, .big);
1258        if (version != supported_version) return error.UnsupportedVersion;
1259        try reader.readSliceEndian(u32, &index_header.fan_out_table, .big);
1260    }
1261};
1262
1263const IndexEntry = struct {
1264    offset: u64,
1265    crc32: u32,
1266};
1267
1268/// Writes out a version 2 index for the given packfile, as documented in
1269/// [pack-format](https://git-scm.com/docs/pack-format).
1270pub fn indexPack(
1271    allocator: Allocator,
1272    format: Oid.Format,
1273    pack: *std.fs.File.Reader,
1274    index_writer: *std.fs.File.Writer,
1275) !void {
1276    try pack.seekTo(0);
1277
1278    var index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry) = .empty;
1279    defer index_entries.deinit(allocator);
1280    var pending_deltas: std.ArrayList(IndexEntry) = .empty;
1281    defer pending_deltas.deinit(allocator);
1282
1283    const pack_checksum = try indexPackFirstPass(allocator, format, pack, &index_entries, &pending_deltas);
1284
1285    var cache: ObjectCache = .{};
1286    defer cache.deinit(allocator);
1287    var remaining_deltas = pending_deltas.items.len;
1288    while (remaining_deltas > 0) {
1289        var i: usize = remaining_deltas;
1290        while (i > 0) {
1291            i -= 1;
1292            const delta = pending_deltas.items[i];
1293            if (try indexPackHashDelta(allocator, format, pack, delta, index_entries, &cache)) |oid| {
1294                try index_entries.put(allocator, oid, delta);
1295                _ = pending_deltas.swapRemove(i);
1296            }
1297        }
1298        if (pending_deltas.items.len == remaining_deltas) return error.IncompletePack;
1299        remaining_deltas = pending_deltas.items.len;
1300    }
1301
1302    var oids: std.ArrayList(Oid) = .empty;
1303    defer oids.deinit(allocator);
1304    try oids.ensureTotalCapacityPrecise(allocator, index_entries.count());
1305    var index_entries_iter = index_entries.iterator();
1306    while (index_entries_iter.next()) |entry| {
1307        oids.appendAssumeCapacity(entry.key_ptr.*);
1308    }
1309    mem.sortUnstable(Oid, oids.items, {}, struct {
1310        fn lessThan(_: void, o1: Oid, o2: Oid) bool {
1311            return mem.lessThan(u8, o1.slice(), o2.slice());
1312        }
1313    }.lessThan);
1314
1315    var fan_out_table: [256]u32 = undefined;
1316    var count: u32 = 0;
1317    var fan_out_index: u8 = 0;
1318    for (oids.items) |oid| {
1319        const key = oid.slice()[0];
1320        if (key > fan_out_index) {
1321            @memset(fan_out_table[fan_out_index..key], count);
1322            fan_out_index = key;
1323        }
1324        count += 1;
1325    }
1326    @memset(fan_out_table[fan_out_index..], count);
1327
1328    var index_hashed_writer = Io.Writer.hashed(&index_writer.interface, Oid.Hasher.init(format), &.{});
1329    const writer = &index_hashed_writer.writer;
1330    try writer.writeAll(IndexHeader.signature);
1331    try writer.writeInt(u32, IndexHeader.supported_version, .big);
1332    for (fan_out_table) |fan_out_entry| {
1333        try writer.writeInt(u32, fan_out_entry, .big);
1334    }
1335
1336    for (oids.items) |oid| {
1337        try writer.writeAll(oid.slice());
1338    }
1339
1340    for (oids.items) |oid| {
1341        try writer.writeInt(u32, index_entries.get(oid).?.crc32, .big);
1342    }
1343
1344    var big_offsets: std.ArrayList(u64) = .empty;
1345    defer big_offsets.deinit(allocator);
1346    for (oids.items) |oid| {
1347        const offset = index_entries.get(oid).?.offset;
1348        if (offset <= std.math.maxInt(u31)) {
1349            try writer.writeInt(u32, @intCast(offset), .big);
1350        } else {
1351            const index = big_offsets.items.len;
1352            try big_offsets.append(allocator, offset);
1353            try writer.writeInt(u32, @as(u32, @intCast(index)) | (1 << 31), .big);
1354        }
1355    }
1356    for (big_offsets.items) |offset| {
1357        try writer.writeInt(u64, offset, .big);
1358    }
1359
1360    try writer.writeAll(pack_checksum.slice());
1361    const index_checksum = index_hashed_writer.hasher.finalResult();
1362    try index_writer.interface.writeAll(index_checksum.slice());
1363    try index_writer.end();
1364}
1365
1366/// Performs the first pass over the packfile data for index construction.
1367/// This will index all non-delta objects, queue delta objects for further
1368/// processing, and return the pack checksum (which is part of the index
1369/// format).
1370fn indexPackFirstPass(
1371    allocator: Allocator,
1372    format: Oid.Format,
1373    pack: *std.fs.File.Reader,
1374    index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
1375    pending_deltas: *std.ArrayList(IndexEntry),
1376) !Oid {
1377    var flate_buffer: [std.compress.flate.max_window_len]u8 = undefined;
1378    var pack_buffer: [2048]u8 = undefined; // Reasonably large buffer for file system.
1379    var pack_hashed = pack.interface.hashed(Oid.Hasher.init(format), &pack_buffer);
1380
1381    const pack_header = try PackHeader.read(&pack_hashed.reader);
1382
1383    for (0..pack_header.total_objects) |_| {
1384        const entry_offset = pack.logicalPos() - pack_hashed.reader.bufferedLen();
1385        const entry_header = try EntryHeader.read(format, &pack_hashed.reader);
1386        switch (entry_header) {
1387            .commit, .tree, .blob, .tag => |object| {
1388                var entry_decompress: std.compress.flate.Decompress = .init(&pack_hashed.reader, .zlib, &.{});
1389                var oid_hasher: Oid.Hashing = .init(format, &flate_buffer);
1390                const oid_hasher_w = oid_hasher.writer();
1391                // The object header is not included in the pack data but is
1392                // part of the object's ID
1393                try oid_hasher_w.print("{t} {d}\x00", .{ entry_header, object.uncompressed_length });
1394                const n = try entry_decompress.reader.streamRemaining(oid_hasher_w);
1395                if (n != object.uncompressed_length) return error.InvalidObject;
1396                const oid = oid_hasher.final();
1397                if (!skip_checksums) @compileError("TODO");
1398                try index_entries.put(allocator, oid, .{
1399                    .offset = entry_offset,
1400                    .crc32 = 0,
1401                });
1402            },
1403            inline .ofs_delta, .ref_delta => |delta| {
1404                var entry_decompress: std.compress.flate.Decompress = .init(&pack_hashed.reader, .zlib, &flate_buffer);
1405                const n = try entry_decompress.reader.discardRemaining();
1406                if (n != delta.uncompressed_length) return error.InvalidObject;
1407                if (!skip_checksums) @compileError("TODO");
1408                try pending_deltas.append(allocator, .{
1409                    .offset = entry_offset,
1410                    .crc32 = 0,
1411                });
1412            },
1413        }
1414    }
1415
1416    if (!skip_checksums) @compileError("TODO");
1417    return pack_hashed.hasher.finalResult();
1418}
1419
1420/// Attempts to determine the final object ID of the given deltified object.
1421/// May return null if this is not yet possible (if the delta is a ref-based
1422/// delta and we do not yet know the offset of the base object).
1423fn indexPackHashDelta(
1424    allocator: Allocator,
1425    format: Oid.Format,
1426    pack: *std.fs.File.Reader,
1427    delta: IndexEntry,
1428    index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
1429    cache: *ObjectCache,
1430) !?Oid {
1431    // Figure out the chain of deltas to resolve
1432    var base_offset = delta.offset;
1433    var base_header: EntryHeader = undefined;
1434    var delta_offsets: std.ArrayList(u64) = .empty;
1435    defer delta_offsets.deinit(allocator);
1436    const base_object = while (true) {
1437        if (cache.get(base_offset)) |base_object| break base_object;
1438
1439        try pack.seekTo(base_offset);
1440        base_header = try EntryHeader.read(format, &pack.interface);
1441        switch (base_header) {
1442            .ofs_delta => |ofs_delta| {
1443                try delta_offsets.append(allocator, base_offset);
1444                base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidObject;
1445            },
1446            .ref_delta => |ref_delta| {
1447                try delta_offsets.append(allocator, base_offset);
1448                base_offset = (index_entries.get(ref_delta.base_object) orelse return null).offset;
1449            },
1450            else => {
1451                const base_data = try readObjectRaw(allocator, &pack.interface, base_header.uncompressedLength());
1452                errdefer allocator.free(base_data);
1453                const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
1454                try cache.put(allocator, base_offset, base_object);
1455                break base_object;
1456            },
1457        }
1458    };
1459
1460    const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache);
1461
1462    var entry_hasher_buffer: [64]u8 = undefined;
1463    var entry_hasher: Oid.Hashing = .init(format, &entry_hasher_buffer);
1464    const entry_hasher_w = entry_hasher.writer();
1465    // Writes to hashers cannot fail.
1466    entry_hasher_w.print("{t} {d}\x00", .{ base_object.type, base_data.len }) catch unreachable;
1467    entry_hasher_w.writeAll(base_data) catch unreachable;
1468    return entry_hasher.final();
1469}
1470
1471/// Resolves a chain of deltas, returning the final base object data. `pack` is
1472/// assumed to be looking at the start of the object data for the base object of
1473/// the chain, and will then apply the deltas in `delta_offsets` in reverse order
1474/// to obtain the final object.
1475fn resolveDeltaChain(
1476    allocator: Allocator,
1477    format: Oid.Format,
1478    pack: *std.fs.File.Reader,
1479    base_object: Object,
1480    delta_offsets: []const u64,
1481    cache: *ObjectCache,
1482) ![]const u8 {
1483    var base_data = base_object.data;
1484    var i: usize = delta_offsets.len;
1485    while (i > 0) {
1486        i -= 1;
1487
1488        const delta_offset = delta_offsets[i];
1489        try pack.seekTo(delta_offset);
1490        const delta_header = try EntryHeader.read(format, &pack.interface);
1491        const delta_data = try readObjectRaw(allocator, &pack.interface, delta_header.uncompressedLength());
1492        defer allocator.free(delta_data);
1493        var delta_reader: Io.Reader = .fixed(delta_data);
1494        _ = try delta_reader.takeLeb128(u64); // base object size
1495        const expanded_size = try delta_reader.takeLeb128(u64);
1496
1497        const expanded_alloc_size = std.math.cast(usize, expanded_size) orelse return error.ObjectTooLarge;
1498        const expanded_data = try allocator.alloc(u8, expanded_alloc_size);
1499        errdefer allocator.free(expanded_data);
1500        var expanded_delta_stream: Io.Writer = .fixed(expanded_data);
1501        try expandDelta(base_data, &delta_reader, &expanded_delta_stream);
1502        if (expanded_delta_stream.end != expanded_size) return error.InvalidObject;
1503
1504        try cache.put(allocator, delta_offset, .{ .type = base_object.type, .data = expanded_data });
1505        base_data = expanded_data;
1506    }
1507    return base_data;
1508}
1509
1510/// Reads the complete contents of an object from `reader`. This function may
1511/// read more bytes than required from `reader`, so the reader position after
1512/// returning is not reliable.
1513fn readObjectRaw(allocator: Allocator, reader: *Io.Reader, size: u64) ![]u8 {
1514    const alloc_size = std.math.cast(usize, size) orelse return error.ObjectTooLarge;
1515    var aw: Io.Writer.Allocating = .init(allocator);
1516    try aw.ensureTotalCapacity(alloc_size + std.compress.flate.max_window_len);
1517    defer aw.deinit();
1518    var decompress: std.compress.flate.Decompress = .init(reader, .zlib, &.{});
1519    try decompress.reader.streamExact(&aw.writer, alloc_size);
1520    return aw.toOwnedSlice();
1521}
1522
1523/// Expands delta data from `delta_reader` to `writer`.
1524///
1525/// The format of the delta data is documented in
1526/// [pack-format](https://git-scm.com/docs/pack-format).
1527fn expandDelta(base_object: []const u8, delta_reader: *Io.Reader, writer: *Io.Writer) !void {
1528    while (true) {
1529        const inst: packed struct { value: u7, copy: bool } = @bitCast(delta_reader.takeByte() catch |e| switch (e) {
1530            error.EndOfStream => return,
1531            else => |other| return other,
1532        });
1533        if (inst.copy) {
1534            const available: packed struct {
1535                offset1: bool,
1536                offset2: bool,
1537                offset3: bool,
1538                offset4: bool,
1539                size1: bool,
1540                size2: bool,
1541                size3: bool,
1542            } = @bitCast(inst.value);
1543            const offset_parts: packed struct { offset1: u8, offset2: u8, offset3: u8, offset4: u8 } = .{
1544                .offset1 = if (available.offset1) try delta_reader.takeByte() else 0,
1545                .offset2 = if (available.offset2) try delta_reader.takeByte() else 0,
1546                .offset3 = if (available.offset3) try delta_reader.takeByte() else 0,
1547                .offset4 = if (available.offset4) try delta_reader.takeByte() else 0,
1548            };
1549            const base_offset: u32 = @bitCast(offset_parts);
1550            const size_parts: packed struct { size1: u8, size2: u8, size3: u8 } = .{
1551                .size1 = if (available.size1) try delta_reader.takeByte() else 0,
1552                .size2 = if (available.size2) try delta_reader.takeByte() else 0,
1553                .size3 = if (available.size3) try delta_reader.takeByte() else 0,
1554            };
1555            var size: u24 = @bitCast(size_parts);
1556            if (size == 0) size = 0x10000;
1557            try writer.writeAll(base_object[base_offset..][0..size]);
1558        } else if (inst.value != 0) {
1559            try delta_reader.streamExact(writer, inst.value);
1560        } else {
1561            return error.InvalidDeltaInstruction;
1562        }
1563    }
1564}
1565
1566/// Runs the packfile indexing and checkout test.
1567///
1568/// The two testrepo repositories under testdata contain identical commit
1569/// histories and contents.
1570///
1571/// To verify the contents of the packfiles using Git alone, run the
1572/// following commands in an empty directory:
1573///
1574/// 1. `git init --object-format=(sha1|sha256)`
1575/// 2. `git unpack-objects <path/to/testrepo.pack`
1576/// 3. `git fsck` - will print one "dangling commit":
1577///    - SHA-1: `dd582c0720819ab7130b103635bd7271b9fd4feb`
1578///    - SHA-256: `7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a`
1579/// 4. `git checkout $commit`
1580fn runRepositoryTest(io: Io, comptime format: Oid.Format, head_commit: []const u8) !void {
1581    const testrepo_pack = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".pack");
1582
1583    var git_dir = testing.tmpDir(.{});
1584    defer git_dir.cleanup();
1585    var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true });
1586    defer pack_file.close();
1587    try pack_file.writeAll(testrepo_pack);
1588
1589    var pack_file_buffer: [2000]u8 = undefined;
1590    var pack_file_reader = pack_file.reader(io, &pack_file_buffer);
1591
1592    var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
1593    defer index_file.close();
1594    var index_file_buffer: [2000]u8 = undefined;
1595    var index_file_writer = index_file.writer(&index_file_buffer);
1596    try indexPack(testing.allocator, format, &pack_file_reader, &index_file_writer);
1597
1598    // Arbitrary size limit on files read while checking the repository contents
1599    // (all files in the test repo are known to be smaller than this)
1600    const max_file_size = 8192;
1601
1602    if (!skip_checksums) {
1603        const index_file_data = try git_dir.dir.readFileAlloc("testrepo.idx", testing.allocator, .limited(max_file_size));
1604        defer testing.allocator.free(index_file_data);
1605        // testrepo.idx is generated by Git. The index created by this file should
1606        // match it exactly. Running `git verify-pack -v testrepo.pack` can verify
1607        // this.
1608        const testrepo_idx = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".idx");
1609        try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
1610    }
1611
1612    var index_file_reader = index_file.reader(io, &index_file_buffer);
1613    var repository: Repository = undefined;
1614    try repository.init(testing.allocator, format, &pack_file_reader, &index_file_reader);
1615    defer repository.deinit();
1616
1617    var worktree = testing.tmpDir(.{ .iterate = true });
1618    defer worktree.cleanup();
1619
1620    const commit_id = try Oid.parse(format, head_commit);
1621
1622    var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1623    defer diagnostics.deinit();
1624    try repository.checkout(worktree.dir, commit_id, &diagnostics);
1625    try testing.expect(diagnostics.errors.items.len == 0);
1626
1627    const expected_files: []const []const u8 = &.{
1628        "dir/file",
1629        "dir/subdir/file",
1630        "dir/subdir/file2",
1631        "dir2/file",
1632        "dir3/file",
1633        "dir3/file2",
1634        "file",
1635        "file2",
1636        "file3",
1637        "file4",
1638        "file5",
1639        "file6",
1640        "file7",
1641        "file8",
1642        "file9",
1643    };
1644    var actual_files: std.ArrayList([]u8) = .empty;
1645    defer actual_files.deinit(testing.allocator);
1646    defer for (actual_files.items) |file| testing.allocator.free(file);
1647    var walker = try worktree.dir.walk(testing.allocator);
1648    defer walker.deinit();
1649    while (try walker.next()) |entry| {
1650        if (entry.kind != .file) continue;
1651        const path = try testing.allocator.dupe(u8, entry.path);
1652        errdefer testing.allocator.free(path);
1653        mem.replaceScalar(u8, path, std.fs.path.sep, '/');
1654        try actual_files.append(testing.allocator, path);
1655    }
1656    mem.sortUnstable([]u8, actual_files.items, {}, struct {
1657        fn lessThan(_: void, a: []u8, b: []u8) bool {
1658            return mem.lessThan(u8, a, b);
1659        }
1660    }.lessThan);
1661    try testing.expectEqualDeep(expected_files, actual_files.items);
1662
1663    const expected_file_contents =
1664        \\revision 1
1665        \\revision 2
1666        \\revision 4
1667        \\revision 5
1668        \\revision 7
1669        \\revision 8
1670        \\revision 9
1671        \\revision 10
1672        \\revision 12
1673        \\revision 13
1674        \\revision 14
1675        \\revision 18
1676        \\revision 19
1677        \\
1678    ;
1679    const actual_file_contents = try worktree.dir.readFileAlloc("file", testing.allocator, .limited(max_file_size));
1680    defer testing.allocator.free(actual_file_contents);
1681    try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
1682}
1683
1684/// Checksum calculation is useful for troubleshooting and debugging, but it's
1685/// redundant since the package manager already does content hashing at the
1686/// end. Let's save time by not doing that work, but, I left a cookie crumb
1687/// trail here if you want to restore the functionality for tinkering purposes.
1688const skip_checksums = true;
1689
1690test "SHA-1 packfile indexing and checkout" {
1691    try runRepositoryTest(std.testing.io, .sha1, "dd582c0720819ab7130b103635bd7271b9fd4feb");
1692}
1693
1694test "SHA-256 packfile indexing and checkout" {
1695    try runRepositoryTest(std.testing.io, .sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a");
1696}
1697
1698/// Checks out a commit of a packfile. Intended for experimenting with and
1699/// benchmarking possible optimizations to the indexing and checkout behavior.
1700pub fn main() !void {
1701    const allocator = std.heap.smp_allocator;
1702
1703    var threaded: Io.Threaded = .init(allocator);
1704    defer threaded.deinit();
1705    const io = threaded.io();
1706
1707    const args = try std.process.argsAlloc(allocator);
1708    defer std.process.argsFree(allocator, args);
1709    if (args.len != 5) {
1710        return error.InvalidArguments; // Arguments: format packfile commit worktree
1711    }
1712
1713    const format = std.meta.stringToEnum(Oid.Format, args[1]) orelse return error.InvalidFormat;
1714
1715    var pack_file = try std.fs.cwd().openFile(args[2], .{});
1716    defer pack_file.close();
1717    var pack_file_buffer: [4096]u8 = undefined;
1718    var pack_file_reader = pack_file.reader(io, &pack_file_buffer);
1719
1720    const commit = try Oid.parse(format, args[3]);
1721    var worktree = try std.fs.cwd().makeOpenPath(args[4], .{});
1722    defer worktree.close();
1723
1724    var git_dir = try worktree.makeOpenPath(".git", .{});
1725    defer git_dir.close();
1726
1727    std.debug.print("Starting index...\n", .{});
1728    var index_file = try git_dir.createFile("idx", .{ .read = true });
1729    defer index_file.close();
1730    var index_file_buffer: [4096]u8 = undefined;
1731    var index_file_writer = index_file.writer(&index_file_buffer);
1732    try indexPack(allocator, format, &pack_file_reader, &index_file_writer);
1733
1734    std.debug.print("Starting checkout...\n", .{});
1735    var index_file_reader = index_file.reader(io, &index_file_buffer);
1736    var repository: Repository = undefined;
1737    try repository.init(allocator, format, &pack_file_reader, &index_file_reader);
1738    defer repository.deinit();
1739    var diagnostics: Diagnostics = .{ .allocator = allocator };
1740    defer diagnostics.deinit();
1741    try repository.checkout(worktree, commit, &diagnostics);
1742
1743    for (diagnostics.errors.items) |err| {
1744        std.debug.print("Diagnostic: {}\n", .{err});
1745    }
1746}