master
1//! Git support for package fetching.
2//!
3//! This is not intended to support all features of Git: it is limited to the
4//! basic functionality needed to clone a repository for the purpose of fetching
5//! a package.
6
7const std = @import("std");
8const Io = std.Io;
9const mem = std.mem;
10const testing = std.testing;
11const Allocator = mem.Allocator;
12const Sha1 = std.crypto.hash.Sha1;
13const Sha256 = std.crypto.hash.sha2.Sha256;
14const assert = std.debug.assert;
15
16/// The ID of a Git object.
17pub const Oid = union(Format) {
18 sha1: [Sha1.digest_length]u8,
19 sha256: [Sha256.digest_length]u8,
20
21 pub const max_formatted_length = len: {
22 var max: usize = 0;
23 for (std.enums.values(Format)) |f| {
24 max = @max(max, f.formattedLength());
25 }
26 break :len max;
27 };
28
29 pub const Format = enum {
30 sha1,
31 sha256,
32
33 pub fn byteLength(f: Format) usize {
34 return switch (f) {
35 .sha1 => Sha1.digest_length,
36 .sha256 => Sha256.digest_length,
37 };
38 }
39
40 pub fn formattedLength(f: Format) usize {
41 return 2 * f.byteLength();
42 }
43 };
44
45 const Hasher = union(Format) {
46 sha1: Sha1,
47 sha256: Sha256,
48
49 fn init(oid_format: Format) Hasher {
50 return switch (oid_format) {
51 .sha1 => .{ .sha1 = Sha1.init(.{}) },
52 .sha256 => .{ .sha256 = Sha256.init(.{}) },
53 };
54 }
55
56 // Must be public for use from HashedReader and HashedWriter.
57 pub fn update(hasher: *Hasher, b: []const u8) void {
58 switch (hasher.*) {
59 inline else => |*inner| inner.update(b),
60 }
61 }
62
63 fn finalResult(hasher: *Hasher) Oid {
64 return switch (hasher.*) {
65 inline else => |*inner, tag| @unionInit(Oid, @tagName(tag), inner.finalResult()),
66 };
67 }
68 };
69
70 const Hashing = union(Format) {
71 sha1: Io.Writer.Hashing(Sha1),
72 sha256: Io.Writer.Hashing(Sha256),
73
74 fn init(oid_format: Format, buffer: []u8) Hashing {
75 return switch (oid_format) {
76 .sha1 => .{ .sha1 = .init(buffer) },
77 .sha256 => .{ .sha256 = .init(buffer) },
78 };
79 }
80
81 fn writer(h: *@This()) *Io.Writer {
82 return switch (h.*) {
83 inline else => |*inner| &inner.writer,
84 };
85 }
86
87 fn final(h: *@This()) Oid {
88 switch (h.*) {
89 inline else => |*inner, tag| {
90 inner.writer.flush() catch unreachable; // hashers cannot fail
91 return @unionInit(Oid, @tagName(tag), inner.hasher.finalResult());
92 },
93 }
94 }
95 };
96
97 pub fn fromBytes(oid_format: Format, bytes: []const u8) Oid {
98 assert(bytes.len == oid_format.byteLength());
99 return switch (oid_format) {
100 inline else => |tag| @unionInit(Oid, @tagName(tag), bytes[0..comptime tag.byteLength()].*),
101 };
102 }
103
104 pub fn readBytes(oid_format: Format, reader: *Io.Reader) !Oid {
105 return switch (oid_format) {
106 inline else => |tag| @unionInit(Oid, @tagName(tag), (try reader.takeArray(tag.byteLength())).*),
107 };
108 }
109
110 pub fn parse(oid_format: Format, s: []const u8) error{InvalidOid}!Oid {
111 switch (oid_format) {
112 inline else => |tag| {
113 if (s.len != tag.formattedLength()) return error.InvalidOid;
114 var bytes: [tag.byteLength()]u8 = undefined;
115 for (&bytes, 0..) |*b, i| {
116 b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
117 }
118 return @unionInit(Oid, @tagName(tag), bytes);
119 },
120 }
121 }
122
123 test parse {
124 try testing.expectEqualSlices(
125 u8,
126 &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
127 &(try parse(.sha1, "ce919ccf45951856a762ffdb8ef850301cd8c588")).sha1,
128 );
129 try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf45951856a762ffdb8ef850301cd8c588"));
130 try testing.expectError(error.InvalidOid, parse(.sha1, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a"));
131 try testing.expectEqualSlices(
132 u8,
133 &.{ 0x7F, 0x44, 0x4A, 0x92, 0xBD, 0x45, 0x72, 0xEE, 0x4A, 0x28, 0xB2, 0xC6, 0x30, 0x59, 0x92, 0x4A, 0x9C, 0xA1, 0x82, 0x91, 0x38, 0x55, 0x3E, 0xF3, 0xE7, 0xC4, 0x1E, 0xE1, 0x59, 0xAF, 0xAE, 0x7A },
134 &(try parse(.sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a")).sha256,
135 );
136 try testing.expectError(error.InvalidOid, parse(.sha1, "ce919ccf"));
137 try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf"));
138 try testing.expectError(error.InvalidOid, parse(.sha1, "master"));
139 try testing.expectError(error.InvalidOid, parse(.sha256, "master"));
140 try testing.expectError(error.InvalidOid, parse(.sha1, "HEAD"));
141 try testing.expectError(error.InvalidOid, parse(.sha256, "HEAD"));
142 }
143
144 pub fn parseAny(s: []const u8) error{InvalidOid}!Oid {
145 return for (std.enums.values(Format)) |f| {
146 if (s.len == f.formattedLength()) break parse(f, s);
147 } else error.InvalidOid;
148 }
149
150 pub fn format(oid: Oid, writer: *Io.Writer) Io.Writer.Error!void {
151 try writer.print("{x}", .{oid.slice()});
152 }
153
154 pub fn slice(oid: *const Oid) []const u8 {
155 return switch (oid.*) {
156 inline else => |*bytes| bytes,
157 };
158 }
159};
160
161pub const Diagnostics = struct {
162 allocator: Allocator,
163 errors: std.ArrayList(Error) = .empty,
164
165 pub const Error = union(enum) {
166 unable_to_create_sym_link: struct {
167 code: anyerror,
168 file_name: []const u8,
169 link_name: []const u8,
170 },
171 unable_to_create_file: struct {
172 code: anyerror,
173 file_name: []const u8,
174 },
175 };
176
177 pub fn deinit(d: *Diagnostics) void {
178 for (d.errors.items) |item| {
179 switch (item) {
180 .unable_to_create_sym_link => |info| {
181 d.allocator.free(info.file_name);
182 d.allocator.free(info.link_name);
183 },
184 .unable_to_create_file => |info| {
185 d.allocator.free(info.file_name);
186 },
187 }
188 }
189 d.errors.deinit(d.allocator);
190 d.* = undefined;
191 }
192};
193
194pub const Repository = struct {
195 odb: Odb,
196
197 pub fn init(
198 repo: *Repository,
199 allocator: Allocator,
200 format: Oid.Format,
201 pack_file: *std.fs.File.Reader,
202 index_file: *std.fs.File.Reader,
203 ) !void {
204 repo.* = .{ .odb = undefined };
205 try repo.odb.init(allocator, format, pack_file, index_file);
206 }
207
208 pub fn deinit(repository: *Repository) void {
209 repository.odb.deinit();
210 repository.* = undefined;
211 }
212
213 /// Checks out the repository at `commit_oid` to `worktree`.
214 pub fn checkout(
215 repository: *Repository,
216 worktree: std.fs.Dir,
217 commit_oid: Oid,
218 diagnostics: *Diagnostics,
219 ) !void {
220 try repository.odb.seekOid(commit_oid);
221 const tree_oid = tree_oid: {
222 const commit_object = try repository.odb.readObject();
223 if (commit_object.type != .commit) return error.NotACommit;
224 break :tree_oid try getCommitTree(repository.odb.format, commit_object.data);
225 };
226 try repository.checkoutTree(worktree, tree_oid, "", diagnostics);
227 }
228
229 /// Checks out the tree at `tree_oid` to `worktree`.
230 fn checkoutTree(
231 repository: *Repository,
232 dir: std.fs.Dir,
233 tree_oid: Oid,
234 current_path: []const u8,
235 diagnostics: *Diagnostics,
236 ) !void {
237 try repository.odb.seekOid(tree_oid);
238 const tree_object = try repository.odb.readObject();
239 if (tree_object.type != .tree) return error.NotATree;
240 // The tree object may be evicted from the object cache while we're
241 // iterating over it, so we can make a defensive copy here to make sure
242 // it remains valid until we're done with it
243 const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data);
244 defer repository.odb.allocator.free(tree_data);
245
246 var tree_iter: TreeIterator = .{
247 .format = repository.odb.format,
248 .data = tree_data,
249 .pos = 0,
250 };
251 while (try tree_iter.next()) |entry| {
252 switch (entry.type) {
253 .directory => {
254 try dir.makeDir(entry.name);
255 var subdir = try dir.openDir(entry.name, .{});
256 defer subdir.close();
257 const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name });
258 defer repository.odb.allocator.free(sub_path);
259 try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics);
260 },
261 .file => {
262 try repository.odb.seekOid(entry.oid);
263 const file_object = try repository.odb.readObject();
264 if (file_object.type != .blob) return error.InvalidFile;
265 var file = dir.createFile(entry.name, .{ .exclusive = true }) catch |e| {
266 const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name });
267 errdefer diagnostics.allocator.free(file_name);
268 try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_file = .{
269 .code = e,
270 .file_name = file_name,
271 } });
272 continue;
273 };
274 defer file.close();
275 try file.writeAll(file_object.data);
276 },
277 .symlink => {
278 try repository.odb.seekOid(entry.oid);
279 const symlink_object = try repository.odb.readObject();
280 if (symlink_object.type != .blob) return error.InvalidFile;
281 const link_name = symlink_object.data;
282 dir.symLink(link_name, entry.name, .{}) catch |e| {
283 const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name });
284 errdefer diagnostics.allocator.free(file_name);
285 const link_name_dup = try diagnostics.allocator.dupe(u8, link_name);
286 errdefer diagnostics.allocator.free(link_name_dup);
287 try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{
288 .code = e,
289 .file_name = file_name,
290 .link_name = link_name_dup,
291 } });
292 };
293 },
294 .gitlink => {
295 // Consistent with git archive behavior, create the directory but
296 // do nothing else
297 try dir.makeDir(entry.name);
298 },
299 }
300 }
301 }
302
303 /// Returns the ID of the tree associated with the given commit (provided as
304 /// raw object data).
305 fn getCommitTree(format: Oid.Format, commit_data: []const u8) !Oid {
306 if (!mem.startsWith(u8, commit_data, "tree ") or
307 commit_data.len < "tree ".len + format.formattedLength() + "\n".len or
308 commit_data["tree ".len + format.formattedLength()] != '\n')
309 {
310 return error.InvalidCommit;
311 }
312 return try .parse(format, commit_data["tree ".len..][0..format.formattedLength()]);
313 }
314
315 const TreeIterator = struct {
316 format: Oid.Format,
317 data: []const u8,
318 pos: usize,
319
320 const Entry = struct {
321 type: Type,
322 executable: bool,
323 name: [:0]const u8,
324 oid: Oid,
325
326 const Type = enum(u4) {
327 directory = 0o4,
328 file = 0o10,
329 symlink = 0o12,
330 gitlink = 0o16,
331 };
332 };
333
334 fn next(iterator: *TreeIterator) !?Entry {
335 if (iterator.pos == iterator.data.len) return null;
336
337 const mode_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, ' ') orelse return error.InvalidTree;
338 const mode: packed struct {
339 permission: u9,
340 unused: u3,
341 type: u4,
342 } = @bitCast(std.fmt.parseUnsigned(u16, iterator.data[iterator.pos..mode_end], 8) catch return error.InvalidTree);
343 const @"type" = std.enums.fromInt(Entry.Type, mode.type) orelse return error.InvalidTree;
344 const executable = switch (mode.permission) {
345 0 => if (@"type" == .file) return error.InvalidTree else false,
346 0o644 => if (@"type" != .file) return error.InvalidTree else false,
347 0o755 => if (@"type" != .file) return error.InvalidTree else true,
348 else => return error.InvalidTree,
349 };
350 iterator.pos = mode_end + 1;
351
352 const name_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, 0) orelse return error.InvalidTree;
353 const name = iterator.data[iterator.pos..name_end :0];
354 iterator.pos = name_end + 1;
355
356 const oid_length = iterator.format.byteLength();
357 if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree;
358 const oid: Oid = .fromBytes(iterator.format, iterator.data[iterator.pos..][0..oid_length]);
359 iterator.pos += oid_length;
360
361 return .{ .type = @"type", .executable = executable, .name = name, .oid = oid };
362 }
363 };
364};
365
366/// A Git object database backed by a packfile. A packfile index is also used
367/// for efficient access to objects in the packfile.
368///
369/// The format of the packfile and its associated index are documented in
370/// [pack-format](https://git-scm.com/docs/pack-format).
371const Odb = struct {
372 format: Oid.Format,
373 pack_file: *std.fs.File.Reader,
374 index_header: IndexHeader,
375 index_file: *std.fs.File.Reader,
376 cache: ObjectCache = .{},
377 allocator: Allocator,
378
379 /// Initializes the database from open pack and index files.
380 fn init(
381 odb: *Odb,
382 allocator: Allocator,
383 format: Oid.Format,
384 pack_file: *std.fs.File.Reader,
385 index_file: *std.fs.File.Reader,
386 ) !void {
387 try pack_file.seekTo(0);
388 try index_file.seekTo(0);
389 odb.* = .{
390 .format = format,
391 .pack_file = pack_file,
392 .index_header = undefined,
393 .index_file = index_file,
394 .allocator = allocator,
395 };
396 try odb.index_header.read(&index_file.interface);
397 }
398
399 fn deinit(odb: *Odb) void {
400 odb.cache.deinit(odb.allocator);
401 odb.* = undefined;
402 }
403
404 /// Reads the object at the current position in the database.
405 fn readObject(odb: *Odb) !Object {
406 var base_offset = odb.pack_file.logicalPos();
407 var base_header: EntryHeader = undefined;
408 var delta_offsets: std.ArrayList(u64) = .empty;
409 defer delta_offsets.deinit(odb.allocator);
410 const base_object = while (true) {
411 if (odb.cache.get(base_offset)) |base_object| break base_object;
412
413 base_header = try EntryHeader.read(odb.format, &odb.pack_file.interface);
414 switch (base_header) {
415 .ofs_delta => |ofs_delta| {
416 try delta_offsets.append(odb.allocator, base_offset);
417 base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidFormat;
418 try odb.pack_file.seekTo(base_offset);
419 },
420 .ref_delta => |ref_delta| {
421 try delta_offsets.append(odb.allocator, base_offset);
422 try odb.seekOid(ref_delta.base_object);
423 base_offset = odb.pack_file.logicalPos();
424 },
425 else => {
426 const base_data = try readObjectRaw(odb.allocator, &odb.pack_file.interface, base_header.uncompressedLength());
427 errdefer odb.allocator.free(base_data);
428 const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
429 try odb.cache.put(odb.allocator, base_offset, base_object);
430 break base_object;
431 },
432 }
433 };
434
435 const base_data = try resolveDeltaChain(
436 odb.allocator,
437 odb.format,
438 odb.pack_file,
439 base_object,
440 delta_offsets.items,
441 &odb.cache,
442 );
443
444 return .{ .type = base_object.type, .data = base_data };
445 }
446
447 /// Seeks to the beginning of the object with the given ID.
448 fn seekOid(odb: *Odb, oid: Oid) !void {
449 const oid_length = odb.format.byteLength();
450 const key = oid.slice()[0];
451 var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0;
452 var end_index = odb.index_header.fan_out_table[key];
453 const found_index = while (start_index < end_index) {
454 const mid_index = start_index + (end_index - start_index) / 2;
455 try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
456 const mid_oid = try Oid.readBytes(odb.format, &odb.index_file.interface);
457 switch (mem.order(u8, mid_oid.slice(), oid.slice())) {
458 .lt => start_index = mid_index + 1,
459 .gt => end_index = mid_index,
460 .eq => break mid_index,
461 }
462 } else return error.ObjectNotFound;
463
464 const n_objects = odb.index_header.fan_out_table[255];
465 const offset_values_start = IndexHeader.size + n_objects * (oid_length + 4);
466 try odb.index_file.seekTo(offset_values_start + found_index * 4);
467 const l1_offset: packed struct { value: u31, big: bool } = @bitCast(try odb.index_file.interface.takeInt(u32, .big));
468 const pack_offset = pack_offset: {
469 if (l1_offset.big) {
470 const l2_offset_values_start = offset_values_start + n_objects * 4;
471 try odb.index_file.seekTo(l2_offset_values_start + l1_offset.value * 4);
472 break :pack_offset try odb.index_file.interface.takeInt(u64, .big);
473 } else {
474 break :pack_offset l1_offset.value;
475 }
476 };
477
478 try odb.pack_file.seekTo(pack_offset);
479 }
480};
481
482const Object = struct {
483 type: Type,
484 data: []const u8,
485
486 const Type = enum {
487 commit,
488 tree,
489 blob,
490 tag,
491 };
492};
493
494/// A cache for object data.
495///
496/// The purpose of this cache is to speed up resolution of deltas by caching the
497/// results of resolving delta objects, while maintaining a maximum cache size
498/// to avoid excessive memory usage. If the total size of the objects in the
499/// cache exceeds the maximum, the cache will begin evicting the least recently
500/// used objects: when resolving delta chains, the most recently used objects
501/// will likely be more helpful as they will be further along in the chain
502/// (skipping earlier reconstruction steps).
503///
504/// Object data stored in the cache is managed by the cache. It should not be
505/// freed by the caller at any point after inserting it into the cache. Any
506/// objects remaining in the cache will be freed when the cache itself is freed.
507const ObjectCache = struct {
508 objects: std.AutoHashMapUnmanaged(u64, CacheEntry) = .empty,
509 lru_nodes: std.DoublyLinkedList = .{},
510 lru_nodes_len: usize = 0,
511 byte_size: usize = 0,
512
513 const max_byte_size = 128 * 1024 * 1024; // 128MiB
514 /// A list of offsets stored in the cache, with the most recently used
515 /// entries at the end.
516 const LruListNode = struct {
517 data: u64,
518 node: std.DoublyLinkedList.Node,
519 };
520 const CacheEntry = struct { object: Object, lru_node: *LruListNode };
521
522 fn deinit(cache: *ObjectCache, allocator: Allocator) void {
523 var object_iterator = cache.objects.iterator();
524 while (object_iterator.next()) |object| {
525 allocator.free(object.value_ptr.object.data);
526 allocator.destroy(object.value_ptr.lru_node);
527 }
528 cache.objects.deinit(allocator);
529 cache.* = undefined;
530 }
531
532 /// Gets an object from the cache, moving it to the most recently used
533 /// position if it is present.
534 fn get(cache: *ObjectCache, offset: u64) ?Object {
535 if (cache.objects.get(offset)) |entry| {
536 cache.lru_nodes.remove(&entry.lru_node.node);
537 cache.lru_nodes.append(&entry.lru_node.node);
538 return entry.object;
539 } else {
540 return null;
541 }
542 }
543
544 /// Puts an object in the cache, possibly evicting older entries if the
545 /// cache exceeds its maximum size. Note that, although old objects may
546 /// be evicted, the object just added to the cache with this function
547 /// will not be evicted before the next call to `put` or `deinit` even if
548 /// it exceeds the maximum cache size.
549 fn put(cache: *ObjectCache, allocator: Allocator, offset: u64, object: Object) !void {
550 const lru_node = try allocator.create(LruListNode);
551 errdefer allocator.destroy(lru_node);
552 lru_node.data = offset;
553
554 const gop = try cache.objects.getOrPut(allocator, offset);
555 if (gop.found_existing) {
556 cache.byte_size -= gop.value_ptr.object.data.len;
557 cache.lru_nodes.remove(&gop.value_ptr.lru_node.node);
558 cache.lru_nodes_len -= 1;
559 allocator.destroy(gop.value_ptr.lru_node);
560 allocator.free(gop.value_ptr.object.data);
561 }
562 gop.value_ptr.* = .{ .object = object, .lru_node = lru_node };
563 cache.byte_size += object.data.len;
564 cache.lru_nodes.append(&lru_node.node);
565 cache.lru_nodes_len += 1;
566
567 while (cache.byte_size > max_byte_size and cache.lru_nodes_len > 1) {
568 // The > 1 check is to make sure that we don't evict the most
569 // recently added node, even if it by itself happens to exceed the
570 // maximum size of the cache.
571 const evict_node: *LruListNode = @alignCast(@fieldParentPtr("node", cache.lru_nodes.popFirst().?));
572 cache.lru_nodes_len -= 1;
573 const evict_offset = evict_node.data;
574 allocator.destroy(evict_node);
575 const evict_object = cache.objects.get(evict_offset).?.object;
576 cache.byte_size -= evict_object.data.len;
577 allocator.free(evict_object.data);
578 _ = cache.objects.remove(evict_offset);
579 }
580 }
581};
582
583/// A single pkt-line in the Git protocol.
584///
585/// The format of a pkt-line is documented in
586/// [protocol-common](https://git-scm.com/docs/protocol-common). The special
587/// meanings of the delimiter and response-end packets are documented in
588/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
589pub const Packet = union(enum) {
590 flush,
591 delimiter,
592 response_end,
593 data: []const u8,
594
595 pub const max_data_length = 65516;
596
597 /// Reads a packet in pkt-line format.
598 fn read(reader: *Io.Reader) !Packet {
599 const packet: Packet = try .peek(reader);
600 switch (packet) {
601 .data => |data| reader.toss(data.len),
602 else => {},
603 }
604 return packet;
605 }
606
607 /// Consumes the header of a pkt-line packet and reads any associated data
608 /// into the reader's buffer, but does not consume the data.
609 fn peek(reader: *Io.Reader) !Packet {
610 const length = std.fmt.parseUnsigned(u16, try reader.take(4), 16) catch return error.InvalidPacket;
611 switch (length) {
612 0 => return .flush,
613 1 => return .delimiter,
614 2 => return .response_end,
615 3 => return error.InvalidPacket,
616 else => if (length - 4 > max_data_length) return error.InvalidPacket,
617 }
618 return .{ .data = try reader.peek(length - 4) };
619 }
620
621 /// Writes a packet in pkt-line format.
622 fn write(packet: Packet, writer: *Io.Writer) !void {
623 switch (packet) {
624 .flush => try writer.writeAll("0000"),
625 .delimiter => try writer.writeAll("0001"),
626 .response_end => try writer.writeAll("0002"),
627 .data => |data| {
628 assert(data.len <= max_data_length);
629 try writer.print("{x:0>4}", .{data.len + 4});
630 try writer.writeAll(data);
631 },
632 }
633 }
634
635 /// Returns the normalized form of textual packet data, stripping any
636 /// trailing '\n'.
637 ///
638 /// As documented in
639 /// [protocol-common](https://git-scm.com/docs/protocol-common#_pkt_line_format),
640 /// non-binary (textual) pkt-line data should contain a trailing '\n', but
641 /// is not required to do so (implementations must support both forms).
642 fn normalizeText(data: []const u8) []const u8 {
643 return if (mem.endsWith(u8, data, "\n"))
644 data[0 .. data.len - 1]
645 else
646 data;
647 }
648};
649
650/// A client session for the Git protocol, currently limited to an HTTP(S)
651/// transport. Only protocol version 2 is supported, as documented in
652/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
653pub const Session = struct {
654 transport: *std.http.Client,
655 location: Location,
656 supports_agent: bool,
657 supports_shallow: bool,
658 object_format: Oid.Format,
659 arena: Allocator,
660
661 const agent = "zig/" ++ @import("builtin").zig_version_string;
662 const agent_capability = std.fmt.comptimePrint("agent={s}\n", .{agent});
663
664 /// Initializes a client session and discovers the capabilities of the
665 /// server for optimal transport.
666 pub fn init(
667 arena: Allocator,
668 transport: *std.http.Client,
669 uri: std.Uri,
670 /// Asserted to be at least `Packet.max_data_length`
671 response_buffer: []u8,
672 ) !Session {
673 assert(response_buffer.len >= Packet.max_data_length);
674 var session: Session = .{
675 .transport = transport,
676 .location = try .init(arena, uri),
677 .supports_agent = false,
678 .supports_shallow = false,
679 .object_format = .sha1,
680 .arena = arena,
681 };
682 var capability_iterator: CapabilityIterator = undefined;
683 try session.getCapabilities(&capability_iterator, response_buffer);
684 defer capability_iterator.deinit();
685 while (try capability_iterator.next()) |capability| {
686 if (mem.eql(u8, capability.key, "agent")) {
687 session.supports_agent = true;
688 } else if (mem.eql(u8, capability.key, "fetch")) {
689 var feature_iterator = mem.splitScalar(u8, capability.value orelse continue, ' ');
690 while (feature_iterator.next()) |feature| {
691 if (mem.eql(u8, feature, "shallow")) {
692 session.supports_shallow = true;
693 }
694 }
695 } else if (mem.eql(u8, capability.key, "object-format")) {
696 if (std.meta.stringToEnum(Oid.Format, capability.value orelse continue)) |format| {
697 session.object_format = format;
698 }
699 }
700 }
701 return session;
702 }
703
704 /// An owned `std.Uri` representing the location of the server (base URI).
705 const Location = struct {
706 uri: std.Uri,
707
708 fn init(arena: Allocator, uri: std.Uri) !Location {
709 const scheme = try arena.dupe(u8, uri.scheme);
710 const user = if (uri.user) |user| try std.fmt.allocPrint(arena, "{f}", .{
711 std.fmt.alt(user, .formatUser),
712 }) else null;
713 const password = if (uri.password) |password| try std.fmt.allocPrint(arena, "{f}", .{
714 std.fmt.alt(password, .formatPassword),
715 }) else null;
716 const host = if (uri.host) |host| try std.fmt.allocPrint(arena, "{f}", .{
717 std.fmt.alt(host, .formatHost),
718 }) else null;
719 const path = try std.fmt.allocPrint(arena, "{f}", .{
720 std.fmt.alt(uri.path, .formatPath),
721 });
722 // The query and fragment are not used as part of the base server URI.
723 return .{
724 .uri = .{
725 .scheme = scheme,
726 .user = if (user) |s| .{ .percent_encoded = s } else null,
727 .password = if (password) |s| .{ .percent_encoded = s } else null,
728 .host = if (host) |s| .{ .percent_encoded = s } else null,
729 .port = uri.port,
730 .path = .{ .percent_encoded = path },
731 },
732 };
733 }
734 };
735
736 /// Returns an iterator over capabilities supported by the server.
737 ///
738 /// The `session.location` is updated if the server returns a redirect, so
739 /// that subsequent session functions do not need to handle redirects.
740 fn getCapabilities(session: *Session, it: *CapabilityIterator, response_buffer: []u8) !void {
741 const arena = session.arena;
742 assert(response_buffer.len >= Packet.max_data_length);
743 var info_refs_uri = session.location.uri;
744 {
745 const session_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
746 std.fmt.alt(session.location.uri.path, .formatPath),
747 });
748 info_refs_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(arena, &.{
749 "/", session_uri_path, "info/refs",
750 }) };
751 }
752 info_refs_uri.query = .{ .percent_encoded = "service=git-upload-pack" };
753 info_refs_uri.fragment = null;
754
755 const max_redirects = 3;
756 it.* = .{
757 .request = try session.transport.request(.GET, info_refs_uri, .{
758 .redirect_behavior = .init(max_redirects),
759 .extra_headers = &.{
760 .{ .name = "Git-Protocol", .value = "version=2" },
761 },
762 }),
763 .reader = undefined,
764 .decompress = undefined,
765 };
766 errdefer it.deinit();
767 const request = &it.request;
768 try request.sendBodiless();
769
770 var redirect_buffer: [1024]u8 = undefined;
771 var response = try request.receiveHead(&redirect_buffer);
772 if (response.head.status != .ok) return error.ProtocolError;
773 const any_redirects_occurred = request.redirect_behavior.remaining() < max_redirects;
774 if (any_redirects_occurred) {
775 const request_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
776 std.fmt.alt(request.uri.path, .formatPath),
777 });
778 if (!mem.endsWith(u8, request_uri_path, "/info/refs")) return error.UnparseableRedirect;
779 var new_uri = request.uri;
780 new_uri.path = .{ .percent_encoded = request_uri_path[0 .. request_uri_path.len - "/info/refs".len] };
781 session.location = try .init(arena, new_uri);
782 }
783
784 const decompress_buffer = try arena.alloc(u8, response.head.content_encoding.minBufferCapacity());
785 it.reader = response.readerDecompressing(response_buffer, &it.decompress, decompress_buffer);
786 var state: enum { response_start, response_content } = .response_start;
787 while (true) {
788 // Some Git servers (at least GitHub) include an additional
789 // '# service=git-upload-pack' informative response before sending
790 // the expected 'version 2' packet and capability information.
791 // This is not universal: SourceHut, for example, does not do this.
792 // Thus, we need to skip any such useless additional responses
793 // before we get the one we're actually looking for. The responses
794 // will be delimited by flush packets.
795 const packet = Packet.read(it.reader) catch |err| switch (err) {
796 error.EndOfStream => return error.UnsupportedProtocol, // 'version 2' packet not found
797 else => |e| return e,
798 };
799 switch (packet) {
800 .flush => state = .response_start,
801 .data => |data| switch (state) {
802 .response_start => if (mem.eql(u8, Packet.normalizeText(data), "version 2")) {
803 return;
804 } else {
805 state = .response_content;
806 },
807 else => {},
808 },
809 else => return error.UnexpectedPacket,
810 }
811 }
812 }
813
814 const CapabilityIterator = struct {
815 request: std.http.Client.Request,
816 reader: *Io.Reader,
817 decompress: std.http.Decompress,
818
819 const Capability = struct {
820 key: []const u8,
821 value: ?[]const u8 = null,
822
823 fn parse(data: []const u8) Capability {
824 return if (mem.indexOfScalar(u8, data, '=')) |separator_pos|
825 .{ .key = data[0..separator_pos], .value = data[separator_pos + 1 ..] }
826 else
827 .{ .key = data };
828 }
829 };
830
831 fn deinit(it: *CapabilityIterator) void {
832 it.request.deinit();
833 it.* = undefined;
834 }
835
836 fn next(it: *CapabilityIterator) !?Capability {
837 switch (try Packet.read(it.reader)) {
838 .flush => return null,
839 .data => |data| return Capability.parse(Packet.normalizeText(data)),
840 else => return error.UnexpectedPacket,
841 }
842 }
843 };
844
845 const ListRefsOptions = struct {
846 /// The ref prefixes (if any) to use to filter the refs available on the
847 /// server. Note that the client must still check the returned refs
848 /// against its desired filters itself: the server is not required to
849 /// respect these prefix filters and may return other refs as well.
850 ref_prefixes: []const []const u8 = &.{},
851 /// Whether to include symref targets for returned symbolic refs.
852 include_symrefs: bool = false,
853 /// Whether to include the peeled object ID for returned tag refs.
854 include_peeled: bool = false,
855 /// Asserted to be at least `Packet.max_data_length`.
856 buffer: []u8,
857 };
858
859 /// Returns an iterator over refs known to the server.
860 pub fn listRefs(session: Session, it: *RefIterator, options: ListRefsOptions) !void {
861 const arena = session.arena;
862 assert(options.buffer.len >= Packet.max_data_length);
863 var upload_pack_uri = session.location.uri;
864 {
865 const session_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
866 std.fmt.alt(session.location.uri.path, .formatPath),
867 });
868 upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(arena, &.{ "/", session_uri_path, "git-upload-pack" }) };
869 }
870 upload_pack_uri.query = null;
871 upload_pack_uri.fragment = null;
872
873 var body: Io.Writer = .fixed(options.buffer);
874 try Packet.write(.{ .data = "command=ls-refs\n" }, &body);
875 if (session.supports_agent) {
876 try Packet.write(.{ .data = agent_capability }, &body);
877 }
878 {
879 const object_format_packet = try std.fmt.allocPrint(arena, "object-format={t}\n", .{
880 session.object_format,
881 });
882 try Packet.write(.{ .data = object_format_packet }, &body);
883 }
884 try Packet.write(.delimiter, &body);
885 for (options.ref_prefixes) |ref_prefix| {
886 const ref_prefix_packet = try std.fmt.allocPrint(arena, "ref-prefix {s}\n", .{ref_prefix});
887 try Packet.write(.{ .data = ref_prefix_packet }, &body);
888 }
889 if (options.include_symrefs) {
890 try Packet.write(.{ .data = "symrefs\n" }, &body);
891 }
892 if (options.include_peeled) {
893 try Packet.write(.{ .data = "peel\n" }, &body);
894 }
895 try Packet.write(.flush, &body);
896
897 it.* = .{
898 .request = try session.transport.request(.POST, upload_pack_uri, .{
899 .redirect_behavior = .unhandled,
900 .extra_headers = &.{
901 .{ .name = "Content-Type", .value = "application/x-git-upload-pack-request" },
902 .{ .name = "Git-Protocol", .value = "version=2" },
903 },
904 }),
905 .reader = undefined,
906 .format = session.object_format,
907 .decompress = undefined,
908 };
909 const request = &it.request;
910 errdefer request.deinit();
911 try request.sendBodyComplete(body.buffered());
912
913 var response = try request.receiveHead(options.buffer);
914 if (response.head.status != .ok) return error.ProtocolError;
915 const decompress_buffer = try arena.alloc(u8, response.head.content_encoding.minBufferCapacity());
916 it.reader = response.readerDecompressing(options.buffer, &it.decompress, decompress_buffer);
917 }
918
919 pub const RefIterator = struct {
920 format: Oid.Format,
921 request: std.http.Client.Request,
922 reader: *Io.Reader,
923 decompress: std.http.Decompress,
924
925 pub const Ref = struct {
926 oid: Oid,
927 name: []const u8,
928 symref_target: ?[]const u8,
929 peeled: ?Oid,
930 };
931
932 pub fn deinit(iterator: *RefIterator) void {
933 iterator.request.deinit();
934 iterator.* = undefined;
935 }
936
937 pub fn next(it: *RefIterator) !?Ref {
938 switch (try Packet.read(it.reader)) {
939 .flush => return null,
940 .data => |data| {
941 const ref_data = Packet.normalizeText(data);
942 const oid_sep_pos = mem.indexOfScalar(u8, ref_data, ' ') orelse return error.InvalidRefPacket;
943 const oid = Oid.parse(it.format, data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
944
945 const name_sep_pos = mem.indexOfScalarPos(u8, ref_data, oid_sep_pos + 1, ' ') orelse ref_data.len;
946 const name = ref_data[oid_sep_pos + 1 .. name_sep_pos];
947
948 var symref_target: ?[]const u8 = null;
949 var peeled: ?Oid = null;
950 var last_sep_pos = name_sep_pos;
951 while (last_sep_pos < ref_data.len) {
952 const next_sep_pos = mem.indexOfScalarPos(u8, ref_data, last_sep_pos + 1, ' ') orelse ref_data.len;
953 const attribute = ref_data[last_sep_pos + 1 .. next_sep_pos];
954 if (mem.startsWith(u8, attribute, "symref-target:")) {
955 symref_target = attribute["symref-target:".len..];
956 } else if (mem.startsWith(u8, attribute, "peeled:")) {
957 peeled = Oid.parse(it.format, attribute["peeled:".len..]) catch return error.InvalidRefPacket;
958 }
959 last_sep_pos = next_sep_pos;
960 }
961
962 return .{ .oid = oid, .name = name, .symref_target = symref_target, .peeled = peeled };
963 },
964 else => return error.UnexpectedPacket,
965 }
966 }
967 };
968
969 /// Fetches the given refs from the server. A shallow fetch (depth 1) is
970 /// performed if the server supports it.
971 pub fn fetch(
972 session: Session,
973 fs: *FetchStream,
974 wants: []const []const u8,
975 /// Asserted to be at least `Packet.max_data_length`.
976 response_buffer: []u8,
977 ) !void {
978 const arena = session.arena;
979 assert(response_buffer.len >= Packet.max_data_length);
980 var upload_pack_uri = session.location.uri;
981 {
982 const session_uri_path = try std.fmt.allocPrint(arena, "{f}", .{
983 std.fmt.alt(session.location.uri.path, .formatPath),
984 });
985 upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(arena, &.{ "/", session_uri_path, "git-upload-pack" }) };
986 }
987 upload_pack_uri.query = null;
988 upload_pack_uri.fragment = null;
989
990 var body: Io.Writer = .fixed(response_buffer);
991 try Packet.write(.{ .data = "command=fetch\n" }, &body);
992 if (session.supports_agent) {
993 try Packet.write(.{ .data = agent_capability }, &body);
994 }
995 {
996 const object_format_packet = try std.fmt.allocPrint(arena, "object-format={s}\n", .{@tagName(session.object_format)});
997 try Packet.write(.{ .data = object_format_packet }, &body);
998 }
999 try Packet.write(.delimiter, &body);
1000 // Our packfile parser supports the OFS_DELTA object type
1001 try Packet.write(.{ .data = "ofs-delta\n" }, &body);
1002 // We do not currently convey server progress information to the user
1003 try Packet.write(.{ .data = "no-progress\n" }, &body);
1004 if (session.supports_shallow) {
1005 try Packet.write(.{ .data = "deepen 1\n" }, &body);
1006 }
1007 for (wants) |want| {
1008 var buf: [Packet.max_data_length]u8 = undefined;
1009 const arg = std.fmt.bufPrint(&buf, "want {s}\n", .{want}) catch unreachable;
1010 try Packet.write(.{ .data = arg }, &body);
1011 }
1012 try Packet.write(.{ .data = "done\n" }, &body);
1013 try Packet.write(.flush, &body);
1014
1015 fs.* = .{
1016 .request = try session.transport.request(.POST, upload_pack_uri, .{
1017 .redirect_behavior = .not_allowed,
1018 .extra_headers = &.{
1019 .{ .name = "Content-Type", .value = "application/x-git-upload-pack-request" },
1020 .{ .name = "Git-Protocol", .value = "version=2" },
1021 },
1022 }),
1023 .input = undefined,
1024 .reader = undefined,
1025 .remaining_len = undefined,
1026 .decompress = undefined,
1027 };
1028 const request = &fs.request;
1029 errdefer request.deinit();
1030
1031 try request.sendBodyComplete(body.buffered());
1032
1033 var response = try request.receiveHead(&.{});
1034 if (response.head.status != .ok) return error.ProtocolError;
1035
1036 const decompress_buffer = try arena.alloc(u8, response.head.content_encoding.minBufferCapacity());
1037 const reader = response.readerDecompressing(response_buffer, &fs.decompress, decompress_buffer);
1038 // We are not interested in any of the sections of the returned fetch
1039 // data other than the packfile section, since we aren't doing anything
1040 // complex like ref negotiation (this is a fresh clone).
1041 var state: enum { section_start, section_content } = .section_start;
1042 while (true) {
1043 const packet = try Packet.read(reader);
1044 switch (state) {
1045 .section_start => switch (packet) {
1046 .data => |data| if (mem.eql(u8, Packet.normalizeText(data), "packfile")) {
1047 fs.input = reader;
1048 fs.reader = .{
1049 .buffer = &.{},
1050 .vtable = &.{ .stream = FetchStream.stream },
1051 .seek = 0,
1052 .end = 0,
1053 };
1054 fs.remaining_len = 0;
1055 return;
1056 } else {
1057 state = .section_content;
1058 },
1059 else => return error.UnexpectedPacket,
1060 },
1061 .section_content => switch (packet) {
1062 .delimiter => state = .section_start,
1063 .data => {},
1064 else => return error.UnexpectedPacket,
1065 },
1066 }
1067 }
1068 }
1069
1070 pub const FetchStream = struct {
1071 request: std.http.Client.Request,
1072 input: *Io.Reader,
1073 reader: Io.Reader,
1074 err: ?Error = null,
1075 remaining_len: usize,
1076 decompress: std.http.Decompress,
1077
1078 pub fn deinit(fs: *FetchStream) void {
1079 fs.request.deinit();
1080 }
1081
1082 pub const Error = error{
1083 InvalidPacket,
1084 ProtocolError,
1085 UnexpectedPacket,
1086 WriteFailed,
1087 ReadFailed,
1088 EndOfStream,
1089 };
1090
1091 const StreamCode = enum(u8) {
1092 pack_data = 1,
1093 progress = 2,
1094 fatal_error = 3,
1095 _,
1096 };
1097
1098 pub fn stream(r: *Io.Reader, w: *Io.Writer, limit: Io.Limit) Io.Reader.StreamError!usize {
1099 const fs: *FetchStream = @alignCast(@fieldParentPtr("reader", r));
1100 const input = fs.input;
1101 if (fs.remaining_len == 0) {
1102 while (true) {
1103 switch (Packet.peek(input) catch |err| {
1104 fs.err = err;
1105 return error.ReadFailed;
1106 }) {
1107 .flush => return error.EndOfStream,
1108 .data => |data| if (data.len > 1) switch (@as(StreamCode, @enumFromInt(data[0]))) {
1109 .pack_data => {
1110 input.toss(1);
1111 fs.remaining_len = data.len - 1;
1112 break;
1113 },
1114 .fatal_error => {
1115 fs.err = error.ProtocolError;
1116 return error.ReadFailed;
1117 },
1118 else => {},
1119 },
1120 else => {
1121 fs.err = error.UnexpectedPacket;
1122 return error.ReadFailed;
1123 },
1124 }
1125 }
1126 }
1127 const buf = limit.slice(try w.writableSliceGreedy(1));
1128 const n = @min(buf.len, fs.remaining_len);
1129 try input.readSliceAll(buf[0..n]);
1130 w.advance(n);
1131 fs.remaining_len -= n;
1132 return n;
1133 }
1134 };
1135};
1136
1137const PackHeader = struct {
1138 total_objects: u32,
1139
1140 const signature = "PACK";
1141 const supported_version = 2;
1142
1143 fn read(reader: *Io.Reader) !PackHeader {
1144 const actual_signature = reader.take(4) catch |e| switch (e) {
1145 error.EndOfStream => return error.InvalidHeader,
1146 else => |other| return other,
1147 };
1148 if (!mem.eql(u8, actual_signature, signature)) return error.InvalidHeader;
1149 const version = reader.takeInt(u32, .big) catch |e| switch (e) {
1150 error.EndOfStream => return error.InvalidHeader,
1151 else => |other| return other,
1152 };
1153 if (version != supported_version) return error.UnsupportedVersion;
1154 const total_objects = reader.takeInt(u32, .big) catch |e| switch (e) {
1155 error.EndOfStream => return error.InvalidHeader,
1156 else => |other| return other,
1157 };
1158 return .{ .total_objects = total_objects };
1159 }
1160};
1161
1162const EntryHeader = union(Type) {
1163 commit: Undeltified,
1164 tree: Undeltified,
1165 blob: Undeltified,
1166 tag: Undeltified,
1167 ofs_delta: OfsDelta,
1168 ref_delta: RefDelta,
1169
1170 const Type = enum(u3) {
1171 commit = 1,
1172 tree = 2,
1173 blob = 3,
1174 tag = 4,
1175 ofs_delta = 6,
1176 ref_delta = 7,
1177 };
1178
1179 const Undeltified = struct {
1180 uncompressed_length: u64,
1181 };
1182
1183 const OfsDelta = struct {
1184 offset: u64,
1185 uncompressed_length: u64,
1186 };
1187
1188 const RefDelta = struct {
1189 base_object: Oid,
1190 uncompressed_length: u64,
1191 };
1192
1193 fn objectType(header: EntryHeader) Object.Type {
1194 return switch (header) {
1195 inline .commit, .tree, .blob, .tag => |_, tag| @field(Object.Type, @tagName(tag)),
1196 else => unreachable,
1197 };
1198 }
1199
1200 fn uncompressedLength(header: EntryHeader) u64 {
1201 return switch (header) {
1202 inline else => |entry| entry.uncompressed_length,
1203 };
1204 }
1205
1206 fn read(format: Oid.Format, reader: *Io.Reader) !EntryHeader {
1207 const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
1208 const initial: InitialByte = @bitCast(reader.takeByte() catch |e| switch (e) {
1209 error.EndOfStream => return error.InvalidFormat,
1210 else => |other| return other,
1211 });
1212 const rest_len = if (initial.has_next) try reader.takeLeb128(u64) else 0;
1213 var uncompressed_length: u64 = initial.len;
1214 uncompressed_length |= std.math.shlExact(u64, rest_len, 4) catch return error.InvalidFormat;
1215 const @"type" = std.enums.fromInt(EntryHeader.Type, initial.type) orelse return error.InvalidFormat;
1216 return switch (@"type") {
1217 inline .commit, .tree, .blob, .tag => |tag| @unionInit(EntryHeader, @tagName(tag), .{
1218 .uncompressed_length = uncompressed_length,
1219 }),
1220 .ofs_delta => .{ .ofs_delta = .{
1221 .offset = try readOffsetVarInt(reader),
1222 .uncompressed_length = uncompressed_length,
1223 } },
1224 .ref_delta => .{ .ref_delta = .{
1225 .base_object = Oid.readBytes(format, reader) catch |e| switch (e) {
1226 error.EndOfStream => return error.InvalidFormat,
1227 else => |other| return other,
1228 },
1229 .uncompressed_length = uncompressed_length,
1230 } },
1231 };
1232 }
1233};
1234
1235fn readOffsetVarInt(r: *Io.Reader) !u64 {
1236 const Byte = packed struct { value: u7, has_next: bool };
1237 var b: Byte = @bitCast(try r.takeByte());
1238 var value: u64 = b.value;
1239 while (b.has_next) {
1240 b = @bitCast(try r.takeByte());
1241 value = std.math.shlExact(u64, value + 1, 7) catch return error.InvalidFormat;
1242 value |= b.value;
1243 }
1244 return value;
1245}
1246
1247const IndexHeader = struct {
1248 fan_out_table: [256]u32,
1249
1250 const signature = "\xFFtOc";
1251 const supported_version = 2;
1252 const size = 4 + 4 + @sizeOf([256]u32);
1253
1254 fn read(index_header: *IndexHeader, reader: *Io.Reader) !void {
1255 const sig = try reader.take(4);
1256 if (!mem.eql(u8, sig, signature)) return error.InvalidHeader;
1257 const version = try reader.takeInt(u32, .big);
1258 if (version != supported_version) return error.UnsupportedVersion;
1259 try reader.readSliceEndian(u32, &index_header.fan_out_table, .big);
1260 }
1261};
1262
1263const IndexEntry = struct {
1264 offset: u64,
1265 crc32: u32,
1266};
1267
1268/// Writes out a version 2 index for the given packfile, as documented in
1269/// [pack-format](https://git-scm.com/docs/pack-format).
1270pub fn indexPack(
1271 allocator: Allocator,
1272 format: Oid.Format,
1273 pack: *std.fs.File.Reader,
1274 index_writer: *std.fs.File.Writer,
1275) !void {
1276 try pack.seekTo(0);
1277
1278 var index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry) = .empty;
1279 defer index_entries.deinit(allocator);
1280 var pending_deltas: std.ArrayList(IndexEntry) = .empty;
1281 defer pending_deltas.deinit(allocator);
1282
1283 const pack_checksum = try indexPackFirstPass(allocator, format, pack, &index_entries, &pending_deltas);
1284
1285 var cache: ObjectCache = .{};
1286 defer cache.deinit(allocator);
1287 var remaining_deltas = pending_deltas.items.len;
1288 while (remaining_deltas > 0) {
1289 var i: usize = remaining_deltas;
1290 while (i > 0) {
1291 i -= 1;
1292 const delta = pending_deltas.items[i];
1293 if (try indexPackHashDelta(allocator, format, pack, delta, index_entries, &cache)) |oid| {
1294 try index_entries.put(allocator, oid, delta);
1295 _ = pending_deltas.swapRemove(i);
1296 }
1297 }
1298 if (pending_deltas.items.len == remaining_deltas) return error.IncompletePack;
1299 remaining_deltas = pending_deltas.items.len;
1300 }
1301
1302 var oids: std.ArrayList(Oid) = .empty;
1303 defer oids.deinit(allocator);
1304 try oids.ensureTotalCapacityPrecise(allocator, index_entries.count());
1305 var index_entries_iter = index_entries.iterator();
1306 while (index_entries_iter.next()) |entry| {
1307 oids.appendAssumeCapacity(entry.key_ptr.*);
1308 }
1309 mem.sortUnstable(Oid, oids.items, {}, struct {
1310 fn lessThan(_: void, o1: Oid, o2: Oid) bool {
1311 return mem.lessThan(u8, o1.slice(), o2.slice());
1312 }
1313 }.lessThan);
1314
1315 var fan_out_table: [256]u32 = undefined;
1316 var count: u32 = 0;
1317 var fan_out_index: u8 = 0;
1318 for (oids.items) |oid| {
1319 const key = oid.slice()[0];
1320 if (key > fan_out_index) {
1321 @memset(fan_out_table[fan_out_index..key], count);
1322 fan_out_index = key;
1323 }
1324 count += 1;
1325 }
1326 @memset(fan_out_table[fan_out_index..], count);
1327
1328 var index_hashed_writer = Io.Writer.hashed(&index_writer.interface, Oid.Hasher.init(format), &.{});
1329 const writer = &index_hashed_writer.writer;
1330 try writer.writeAll(IndexHeader.signature);
1331 try writer.writeInt(u32, IndexHeader.supported_version, .big);
1332 for (fan_out_table) |fan_out_entry| {
1333 try writer.writeInt(u32, fan_out_entry, .big);
1334 }
1335
1336 for (oids.items) |oid| {
1337 try writer.writeAll(oid.slice());
1338 }
1339
1340 for (oids.items) |oid| {
1341 try writer.writeInt(u32, index_entries.get(oid).?.crc32, .big);
1342 }
1343
1344 var big_offsets: std.ArrayList(u64) = .empty;
1345 defer big_offsets.deinit(allocator);
1346 for (oids.items) |oid| {
1347 const offset = index_entries.get(oid).?.offset;
1348 if (offset <= std.math.maxInt(u31)) {
1349 try writer.writeInt(u32, @intCast(offset), .big);
1350 } else {
1351 const index = big_offsets.items.len;
1352 try big_offsets.append(allocator, offset);
1353 try writer.writeInt(u32, @as(u32, @intCast(index)) | (1 << 31), .big);
1354 }
1355 }
1356 for (big_offsets.items) |offset| {
1357 try writer.writeInt(u64, offset, .big);
1358 }
1359
1360 try writer.writeAll(pack_checksum.slice());
1361 const index_checksum = index_hashed_writer.hasher.finalResult();
1362 try index_writer.interface.writeAll(index_checksum.slice());
1363 try index_writer.end();
1364}
1365
1366/// Performs the first pass over the packfile data for index construction.
1367/// This will index all non-delta objects, queue delta objects for further
1368/// processing, and return the pack checksum (which is part of the index
1369/// format).
1370fn indexPackFirstPass(
1371 allocator: Allocator,
1372 format: Oid.Format,
1373 pack: *std.fs.File.Reader,
1374 index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
1375 pending_deltas: *std.ArrayList(IndexEntry),
1376) !Oid {
1377 var flate_buffer: [std.compress.flate.max_window_len]u8 = undefined;
1378 var pack_buffer: [2048]u8 = undefined; // Reasonably large buffer for file system.
1379 var pack_hashed = pack.interface.hashed(Oid.Hasher.init(format), &pack_buffer);
1380
1381 const pack_header = try PackHeader.read(&pack_hashed.reader);
1382
1383 for (0..pack_header.total_objects) |_| {
1384 const entry_offset = pack.logicalPos() - pack_hashed.reader.bufferedLen();
1385 const entry_header = try EntryHeader.read(format, &pack_hashed.reader);
1386 switch (entry_header) {
1387 .commit, .tree, .blob, .tag => |object| {
1388 var entry_decompress: std.compress.flate.Decompress = .init(&pack_hashed.reader, .zlib, &.{});
1389 var oid_hasher: Oid.Hashing = .init(format, &flate_buffer);
1390 const oid_hasher_w = oid_hasher.writer();
1391 // The object header is not included in the pack data but is
1392 // part of the object's ID
1393 try oid_hasher_w.print("{t} {d}\x00", .{ entry_header, object.uncompressed_length });
1394 const n = try entry_decompress.reader.streamRemaining(oid_hasher_w);
1395 if (n != object.uncompressed_length) return error.InvalidObject;
1396 const oid = oid_hasher.final();
1397 if (!skip_checksums) @compileError("TODO");
1398 try index_entries.put(allocator, oid, .{
1399 .offset = entry_offset,
1400 .crc32 = 0,
1401 });
1402 },
1403 inline .ofs_delta, .ref_delta => |delta| {
1404 var entry_decompress: std.compress.flate.Decompress = .init(&pack_hashed.reader, .zlib, &flate_buffer);
1405 const n = try entry_decompress.reader.discardRemaining();
1406 if (n != delta.uncompressed_length) return error.InvalidObject;
1407 if (!skip_checksums) @compileError("TODO");
1408 try pending_deltas.append(allocator, .{
1409 .offset = entry_offset,
1410 .crc32 = 0,
1411 });
1412 },
1413 }
1414 }
1415
1416 if (!skip_checksums) @compileError("TODO");
1417 return pack_hashed.hasher.finalResult();
1418}
1419
1420/// Attempts to determine the final object ID of the given deltified object.
1421/// May return null if this is not yet possible (if the delta is a ref-based
1422/// delta and we do not yet know the offset of the base object).
1423fn indexPackHashDelta(
1424 allocator: Allocator,
1425 format: Oid.Format,
1426 pack: *std.fs.File.Reader,
1427 delta: IndexEntry,
1428 index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
1429 cache: *ObjectCache,
1430) !?Oid {
1431 // Figure out the chain of deltas to resolve
1432 var base_offset = delta.offset;
1433 var base_header: EntryHeader = undefined;
1434 var delta_offsets: std.ArrayList(u64) = .empty;
1435 defer delta_offsets.deinit(allocator);
1436 const base_object = while (true) {
1437 if (cache.get(base_offset)) |base_object| break base_object;
1438
1439 try pack.seekTo(base_offset);
1440 base_header = try EntryHeader.read(format, &pack.interface);
1441 switch (base_header) {
1442 .ofs_delta => |ofs_delta| {
1443 try delta_offsets.append(allocator, base_offset);
1444 base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidObject;
1445 },
1446 .ref_delta => |ref_delta| {
1447 try delta_offsets.append(allocator, base_offset);
1448 base_offset = (index_entries.get(ref_delta.base_object) orelse return null).offset;
1449 },
1450 else => {
1451 const base_data = try readObjectRaw(allocator, &pack.interface, base_header.uncompressedLength());
1452 errdefer allocator.free(base_data);
1453 const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
1454 try cache.put(allocator, base_offset, base_object);
1455 break base_object;
1456 },
1457 }
1458 };
1459
1460 const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache);
1461
1462 var entry_hasher_buffer: [64]u8 = undefined;
1463 var entry_hasher: Oid.Hashing = .init(format, &entry_hasher_buffer);
1464 const entry_hasher_w = entry_hasher.writer();
1465 // Writes to hashers cannot fail.
1466 entry_hasher_w.print("{t} {d}\x00", .{ base_object.type, base_data.len }) catch unreachable;
1467 entry_hasher_w.writeAll(base_data) catch unreachable;
1468 return entry_hasher.final();
1469}
1470
1471/// Resolves a chain of deltas, returning the final base object data. `pack` is
1472/// assumed to be looking at the start of the object data for the base object of
1473/// the chain, and will then apply the deltas in `delta_offsets` in reverse order
1474/// to obtain the final object.
1475fn resolveDeltaChain(
1476 allocator: Allocator,
1477 format: Oid.Format,
1478 pack: *std.fs.File.Reader,
1479 base_object: Object,
1480 delta_offsets: []const u64,
1481 cache: *ObjectCache,
1482) ![]const u8 {
1483 var base_data = base_object.data;
1484 var i: usize = delta_offsets.len;
1485 while (i > 0) {
1486 i -= 1;
1487
1488 const delta_offset = delta_offsets[i];
1489 try pack.seekTo(delta_offset);
1490 const delta_header = try EntryHeader.read(format, &pack.interface);
1491 const delta_data = try readObjectRaw(allocator, &pack.interface, delta_header.uncompressedLength());
1492 defer allocator.free(delta_data);
1493 var delta_reader: Io.Reader = .fixed(delta_data);
1494 _ = try delta_reader.takeLeb128(u64); // base object size
1495 const expanded_size = try delta_reader.takeLeb128(u64);
1496
1497 const expanded_alloc_size = std.math.cast(usize, expanded_size) orelse return error.ObjectTooLarge;
1498 const expanded_data = try allocator.alloc(u8, expanded_alloc_size);
1499 errdefer allocator.free(expanded_data);
1500 var expanded_delta_stream: Io.Writer = .fixed(expanded_data);
1501 try expandDelta(base_data, &delta_reader, &expanded_delta_stream);
1502 if (expanded_delta_stream.end != expanded_size) return error.InvalidObject;
1503
1504 try cache.put(allocator, delta_offset, .{ .type = base_object.type, .data = expanded_data });
1505 base_data = expanded_data;
1506 }
1507 return base_data;
1508}
1509
1510/// Reads the complete contents of an object from `reader`. This function may
1511/// read more bytes than required from `reader`, so the reader position after
1512/// returning is not reliable.
1513fn readObjectRaw(allocator: Allocator, reader: *Io.Reader, size: u64) ![]u8 {
1514 const alloc_size = std.math.cast(usize, size) orelse return error.ObjectTooLarge;
1515 var aw: Io.Writer.Allocating = .init(allocator);
1516 try aw.ensureTotalCapacity(alloc_size + std.compress.flate.max_window_len);
1517 defer aw.deinit();
1518 var decompress: std.compress.flate.Decompress = .init(reader, .zlib, &.{});
1519 try decompress.reader.streamExact(&aw.writer, alloc_size);
1520 return aw.toOwnedSlice();
1521}
1522
1523/// Expands delta data from `delta_reader` to `writer`.
1524///
1525/// The format of the delta data is documented in
1526/// [pack-format](https://git-scm.com/docs/pack-format).
1527fn expandDelta(base_object: []const u8, delta_reader: *Io.Reader, writer: *Io.Writer) !void {
1528 while (true) {
1529 const inst: packed struct { value: u7, copy: bool } = @bitCast(delta_reader.takeByte() catch |e| switch (e) {
1530 error.EndOfStream => return,
1531 else => |other| return other,
1532 });
1533 if (inst.copy) {
1534 const available: packed struct {
1535 offset1: bool,
1536 offset2: bool,
1537 offset3: bool,
1538 offset4: bool,
1539 size1: bool,
1540 size2: bool,
1541 size3: bool,
1542 } = @bitCast(inst.value);
1543 const offset_parts: packed struct { offset1: u8, offset2: u8, offset3: u8, offset4: u8 } = .{
1544 .offset1 = if (available.offset1) try delta_reader.takeByte() else 0,
1545 .offset2 = if (available.offset2) try delta_reader.takeByte() else 0,
1546 .offset3 = if (available.offset3) try delta_reader.takeByte() else 0,
1547 .offset4 = if (available.offset4) try delta_reader.takeByte() else 0,
1548 };
1549 const base_offset: u32 = @bitCast(offset_parts);
1550 const size_parts: packed struct { size1: u8, size2: u8, size3: u8 } = .{
1551 .size1 = if (available.size1) try delta_reader.takeByte() else 0,
1552 .size2 = if (available.size2) try delta_reader.takeByte() else 0,
1553 .size3 = if (available.size3) try delta_reader.takeByte() else 0,
1554 };
1555 var size: u24 = @bitCast(size_parts);
1556 if (size == 0) size = 0x10000;
1557 try writer.writeAll(base_object[base_offset..][0..size]);
1558 } else if (inst.value != 0) {
1559 try delta_reader.streamExact(writer, inst.value);
1560 } else {
1561 return error.InvalidDeltaInstruction;
1562 }
1563 }
1564}
1565
1566/// Runs the packfile indexing and checkout test.
1567///
1568/// The two testrepo repositories under testdata contain identical commit
1569/// histories and contents.
1570///
1571/// To verify the contents of the packfiles using Git alone, run the
1572/// following commands in an empty directory:
1573///
1574/// 1. `git init --object-format=(sha1|sha256)`
1575/// 2. `git unpack-objects <path/to/testrepo.pack`
1576/// 3. `git fsck` - will print one "dangling commit":
1577/// - SHA-1: `dd582c0720819ab7130b103635bd7271b9fd4feb`
1578/// - SHA-256: `7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a`
1579/// 4. `git checkout $commit`
1580fn runRepositoryTest(io: Io, comptime format: Oid.Format, head_commit: []const u8) !void {
1581 const testrepo_pack = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".pack");
1582
1583 var git_dir = testing.tmpDir(.{});
1584 defer git_dir.cleanup();
1585 var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true });
1586 defer pack_file.close();
1587 try pack_file.writeAll(testrepo_pack);
1588
1589 var pack_file_buffer: [2000]u8 = undefined;
1590 var pack_file_reader = pack_file.reader(io, &pack_file_buffer);
1591
1592 var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
1593 defer index_file.close();
1594 var index_file_buffer: [2000]u8 = undefined;
1595 var index_file_writer = index_file.writer(&index_file_buffer);
1596 try indexPack(testing.allocator, format, &pack_file_reader, &index_file_writer);
1597
1598 // Arbitrary size limit on files read while checking the repository contents
1599 // (all files in the test repo are known to be smaller than this)
1600 const max_file_size = 8192;
1601
1602 if (!skip_checksums) {
1603 const index_file_data = try git_dir.dir.readFileAlloc("testrepo.idx", testing.allocator, .limited(max_file_size));
1604 defer testing.allocator.free(index_file_data);
1605 // testrepo.idx is generated by Git. The index created by this file should
1606 // match it exactly. Running `git verify-pack -v testrepo.pack` can verify
1607 // this.
1608 const testrepo_idx = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".idx");
1609 try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
1610 }
1611
1612 var index_file_reader = index_file.reader(io, &index_file_buffer);
1613 var repository: Repository = undefined;
1614 try repository.init(testing.allocator, format, &pack_file_reader, &index_file_reader);
1615 defer repository.deinit();
1616
1617 var worktree = testing.tmpDir(.{ .iterate = true });
1618 defer worktree.cleanup();
1619
1620 const commit_id = try Oid.parse(format, head_commit);
1621
1622 var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
1623 defer diagnostics.deinit();
1624 try repository.checkout(worktree.dir, commit_id, &diagnostics);
1625 try testing.expect(diagnostics.errors.items.len == 0);
1626
1627 const expected_files: []const []const u8 = &.{
1628 "dir/file",
1629 "dir/subdir/file",
1630 "dir/subdir/file2",
1631 "dir2/file",
1632 "dir3/file",
1633 "dir3/file2",
1634 "file",
1635 "file2",
1636 "file3",
1637 "file4",
1638 "file5",
1639 "file6",
1640 "file7",
1641 "file8",
1642 "file9",
1643 };
1644 var actual_files: std.ArrayList([]u8) = .empty;
1645 defer actual_files.deinit(testing.allocator);
1646 defer for (actual_files.items) |file| testing.allocator.free(file);
1647 var walker = try worktree.dir.walk(testing.allocator);
1648 defer walker.deinit();
1649 while (try walker.next()) |entry| {
1650 if (entry.kind != .file) continue;
1651 const path = try testing.allocator.dupe(u8, entry.path);
1652 errdefer testing.allocator.free(path);
1653 mem.replaceScalar(u8, path, std.fs.path.sep, '/');
1654 try actual_files.append(testing.allocator, path);
1655 }
1656 mem.sortUnstable([]u8, actual_files.items, {}, struct {
1657 fn lessThan(_: void, a: []u8, b: []u8) bool {
1658 return mem.lessThan(u8, a, b);
1659 }
1660 }.lessThan);
1661 try testing.expectEqualDeep(expected_files, actual_files.items);
1662
1663 const expected_file_contents =
1664 \\revision 1
1665 \\revision 2
1666 \\revision 4
1667 \\revision 5
1668 \\revision 7
1669 \\revision 8
1670 \\revision 9
1671 \\revision 10
1672 \\revision 12
1673 \\revision 13
1674 \\revision 14
1675 \\revision 18
1676 \\revision 19
1677 \\
1678 ;
1679 const actual_file_contents = try worktree.dir.readFileAlloc("file", testing.allocator, .limited(max_file_size));
1680 defer testing.allocator.free(actual_file_contents);
1681 try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
1682}
1683
1684/// Checksum calculation is useful for troubleshooting and debugging, but it's
1685/// redundant since the package manager already does content hashing at the
1686/// end. Let's save time by not doing that work, but, I left a cookie crumb
1687/// trail here if you want to restore the functionality for tinkering purposes.
1688const skip_checksums = true;
1689
1690test "SHA-1 packfile indexing and checkout" {
1691 try runRepositoryTest(std.testing.io, .sha1, "dd582c0720819ab7130b103635bd7271b9fd4feb");
1692}
1693
1694test "SHA-256 packfile indexing and checkout" {
1695 try runRepositoryTest(std.testing.io, .sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a");
1696}
1697
1698/// Checks out a commit of a packfile. Intended for experimenting with and
1699/// benchmarking possible optimizations to the indexing and checkout behavior.
1700pub fn main() !void {
1701 const allocator = std.heap.smp_allocator;
1702
1703 var threaded: Io.Threaded = .init(allocator);
1704 defer threaded.deinit();
1705 const io = threaded.io();
1706
1707 const args = try std.process.argsAlloc(allocator);
1708 defer std.process.argsFree(allocator, args);
1709 if (args.len != 5) {
1710 return error.InvalidArguments; // Arguments: format packfile commit worktree
1711 }
1712
1713 const format = std.meta.stringToEnum(Oid.Format, args[1]) orelse return error.InvalidFormat;
1714
1715 var pack_file = try std.fs.cwd().openFile(args[2], .{});
1716 defer pack_file.close();
1717 var pack_file_buffer: [4096]u8 = undefined;
1718 var pack_file_reader = pack_file.reader(io, &pack_file_buffer);
1719
1720 const commit = try Oid.parse(format, args[3]);
1721 var worktree = try std.fs.cwd().makeOpenPath(args[4], .{});
1722 defer worktree.close();
1723
1724 var git_dir = try worktree.makeOpenPath(".git", .{});
1725 defer git_dir.close();
1726
1727 std.debug.print("Starting index...\n", .{});
1728 var index_file = try git_dir.createFile("idx", .{ .read = true });
1729 defer index_file.close();
1730 var index_file_buffer: [4096]u8 = undefined;
1731 var index_file_writer = index_file.writer(&index_file_buffer);
1732 try indexPack(allocator, format, &pack_file_reader, &index_file_writer);
1733
1734 std.debug.print("Starting checkout...\n", .{});
1735 var index_file_reader = index_file.reader(io, &index_file_buffer);
1736 var repository: Repository = undefined;
1737 try repository.init(allocator, format, &pack_file_reader, &index_file_reader);
1738 defer repository.deinit();
1739 var diagnostics: Diagnostics = .{ .allocator = allocator };
1740 defer diagnostics.deinit();
1741 try repository.checkout(worktree, commit, &diagnostics);
1742
1743 for (diagnostics.errors.items) |err| {
1744 std.debug.print("Diagnostic: {}\n", .{err});
1745 }
1746}