master
1const std = @import("std");
2const builtin = @import("builtin");
3const fmt = std.fmt;
4const mem = std.mem;
5const Io = std.Io;
6const Thread = std.Thread;
7
8const Vec4 = @Vector(4, u32);
9const Vec8 = @Vector(8, u32);
10const Vec16 = @Vector(16, u32);
11
12const chunk_length = 1024;
13const max_depth = 54;
14
15const simd_degree = std.simd.suggestVectorLength(u32) orelse 1;
16const max_simd_degree = simd_degree;
17const max_simd_degree_or_2 = if (max_simd_degree > 2) max_simd_degree else 2;
18
19/// Threshold for switching to parallel processing.
20/// Below this size, sequential hashing is used.
21/// Benchmarks generally show significant speedup starting at 3 MiB.
22const parallel_threshold = 3 * 1024 * 1024;
23
24const iv: [8]u32 = .{
25 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
26 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
27};
28
29const msg_schedule: [7][16]u8 = .{
30 .{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
31 .{ 2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8 },
32 .{ 3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1 },
33 .{ 10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6 },
34 .{ 12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4 },
35 .{ 9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7 },
36 .{ 11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13 },
37};
38
39const Flags = packed struct(u8) {
40 chunk_start: bool = false,
41 chunk_end: bool = false,
42 parent: bool = false,
43 root: bool = false,
44 keyed_hash: bool = false,
45 derive_key_context: bool = false,
46 derive_key_material: bool = false,
47 reserved: bool = false,
48
49 fn toInt(self: Flags) u8 {
50 return @bitCast(self);
51 }
52
53 fn with(self: Flags, other: Flags) Flags {
54 return @bitCast(self.toInt() | other.toInt());
55 }
56};
57
58const rotr = std.math.rotr;
59
60inline fn rotr32(w: u32, c: u5) u32 {
61 return rotr(u32, w, c);
62}
63
64inline fn load32(bytes: []const u8) u32 {
65 return mem.readInt(u32, bytes[0..4], .little);
66}
67
68inline fn store32(bytes: []u8, w: u32) void {
69 mem.writeInt(u32, bytes[0..4], w, .little);
70}
71
72fn loadKeyWords(key: [Blake3.key_length]u8) [8]u32 {
73 var key_words: [8]u32 = undefined;
74 for (0..8) |i| {
75 key_words[i] = load32(key[i * 4 ..][0..4]);
76 }
77 return key_words;
78}
79
80fn storeCvWords(cv_words: [8]u32) [Blake3.digest_length]u8 {
81 var bytes: [Blake3.digest_length]u8 = undefined;
82 for (0..8) |i| {
83 store32(bytes[i * 4 ..][0..4], cv_words[i]);
84 }
85 return bytes;
86}
87
88fn loadCvWords(bytes: [Blake3.digest_length]u8) [8]u32 {
89 var cv_words: [8]u32 = undefined;
90 for (0..8) |i| {
91 cv_words[i] = load32(bytes[i * 4 ..][0..4]);
92 }
93 return cv_words;
94}
95
96inline fn counterLow(counter: u64) u32 {
97 return @truncate(counter);
98}
99
100inline fn counterHigh(counter: u64) u32 {
101 return @truncate(counter >> 32);
102}
103
104fn highestOne(x: u64) u6 {
105 if (x == 0) return 0;
106 return @intCast(63 - @clz(x));
107}
108
109fn roundDownToPowerOf2(x: u64) u64 {
110 return @as(u64, 1) << highestOne(x | 1);
111}
112
113inline fn g(state: *[16]u32, a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) void {
114 state[a] +%= state[b] +% x;
115 state[d] = rotr32(state[d] ^ state[a], 16);
116 state[c] +%= state[d];
117 state[b] = rotr32(state[b] ^ state[c], 12);
118 state[a] +%= state[b] +% y;
119 state[d] = rotr32(state[d] ^ state[a], 8);
120 state[c] +%= state[d];
121 state[b] = rotr32(state[b] ^ state[c], 7);
122}
123
124inline fn roundFn(state: *[16]u32, msg: *const [16]u32, round: usize) void {
125 const schedule = &msg_schedule[round];
126
127 g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
128 g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
129 g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
130 g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
131
132 g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
133 g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
134 g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
135 g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
136}
137
138fn compressPre(state: *[16]u32, cv: *const [8]u32, block: []const u8, block_len: u8, counter: u64, flags: Flags) void {
139 var block_words: [16]u32 = undefined;
140 for (0..16) |i| {
141 block_words[i] = load32(block[i * 4 ..][0..4]);
142 }
143
144 for (0..8) |i| {
145 state[i] = cv[i];
146 }
147 for (0..4) |i| {
148 state[i + 8] = iv[i];
149 }
150 state[12] = counterLow(counter);
151 state[13] = counterHigh(counter);
152 state[14] = @as(u32, block_len);
153 state[15] = @as(u32, flags.toInt());
154
155 for (0..7) |round| {
156 roundFn(state, &block_words, round);
157 }
158}
159
160fn compressInPlace(cv: *[8]u32, block: []const u8, block_len: u8, counter: u64, flags: Flags) void {
161 var state: [16]u32 = undefined;
162 compressPre(&state, cv, block, block_len, counter, flags);
163 for (0..8) |i| {
164 cv[i] = state[i] ^ state[i + 8];
165 }
166}
167
168fn compressXof(cv: *const [8]u32, block: []const u8, block_len: u8, counter: u64, flags: Flags, out: *[64]u8) void {
169 var state: [16]u32 = undefined;
170 compressPre(&state, cv, block, block_len, counter, flags);
171
172 for (0..8) |i| {
173 store32(out[i * 4 ..][0..4], state[i] ^ state[i + 8]);
174 }
175 for (0..8) |i| {
176 store32(out[(i + 8) * 4 ..][0..4], state[i + 8] ^ cv[i]);
177 }
178}
179
180fn hashOne(input: []const u8, blocks: usize, key: [8]u32, counter: u64, flags: Flags, flags_start: Flags, flags_end: Flags) [Blake3.digest_length]u8 {
181 var cv = key;
182 var block_flags = flags.with(flags_start);
183 var inp = input;
184 var remaining_blocks = blocks;
185
186 while (remaining_blocks > 0) {
187 if (remaining_blocks == 1) {
188 block_flags = block_flags.with(flags_end);
189 }
190 compressInPlace(&cv, inp[0..Blake3.block_length], Blake3.block_length, counter, block_flags);
191 inp = inp[Blake3.block_length..];
192 remaining_blocks -= 1;
193 block_flags = flags;
194 }
195
196 return storeCvWords(cv);
197}
198
199fn hashManyPortable(inputs: [][*]const u8, num_inputs: usize, blocks: usize, key: [8]u32, counter_arg: u64, increment_counter: bool, flags: Flags, flags_start: Flags, flags_end: Flags, out: []u8) void {
200 var counter = counter_arg;
201 for (0..num_inputs) |i| {
202 const input = inputs[i][0 .. blocks * Blake3.block_length];
203 const result = hashOne(input, blocks, key, counter, flags, flags_start, flags_end);
204 @memcpy(out[i * Blake3.digest_length ..][0..Blake3.digest_length], &result);
205 if (increment_counter) {
206 counter += 1;
207 }
208 }
209}
210
211fn transposeNxN(comptime Vec: type, comptime n: comptime_int, vecs: *[n]Vec) void {
212 const temp: [n]Vec = vecs.*;
213
214 inline for (0..n) |i| {
215 inline for (0..n) |j| {
216 vecs[i][j] = temp[j][i];
217 }
218 }
219}
220
221fn transposeMsg(comptime Vec: type, comptime n: comptime_int, inputs: [n][*]const u8, block_offset: usize, out: *[16]Vec) void {
222 const info = @typeInfo(Vec);
223 if (info != .vector) @compileError("transposeMsg requires a vector type");
224 if (info.vector.len != n) @compileError("vector width must match N");
225
226 var temp: [n][16]u32 = undefined;
227
228 for (0..n) |i| {
229 const block = inputs[i] + block_offset;
230 for (0..16) |j| {
231 temp[i][j] = load32(block[j * 4 ..][0..4]);
232 }
233 }
234
235 for (0..16) |j| {
236 var result: Vec = undefined;
237 inline for (0..n) |i| {
238 result[i] = temp[i][j];
239 }
240 out[j] = result;
241 }
242}
243
244fn roundFnVec(comptime Vec: type, v: *[16]Vec, m: *const [16]Vec, r: usize) void {
245 const schedule = &msg_schedule[r];
246
247 // Column round - first half
248 inline for (0..4) |i| {
249 v[i] +%= m[schedule[i * 2]];
250 }
251 inline for (0..4) |i| {
252 v[i] +%= v[i + 4];
253 }
254 inline for (0..4) |i| {
255 v[i + 12] ^= v[i];
256 }
257 inline for (0..4) |i| {
258 v[i + 12] = rotr(Vec, v[i + 12], 16);
259 }
260 inline for (0..4) |i| {
261 v[i + 8] +%= v[i + 12];
262 }
263 inline for (0..4) |i| {
264 v[i + 4] ^= v[i + 8];
265 }
266 inline for (0..4) |i| {
267 v[i + 4] = rotr(Vec, v[i + 4], 12);
268 }
269
270 // Column round - second half
271 inline for (0..4) |i| {
272 v[i] +%= m[schedule[i * 2 + 1]];
273 }
274 inline for (0..4) |i| {
275 v[i] +%= v[i + 4];
276 }
277 inline for (0..4) |i| {
278 v[i + 12] ^= v[i];
279 }
280 inline for (0..4) |i| {
281 v[i + 12] = rotr(Vec, v[i + 12], 8);
282 }
283 inline for (0..4) |i| {
284 v[i + 8] +%= v[i + 12];
285 }
286 inline for (0..4) |i| {
287 v[i + 4] ^= v[i + 8];
288 }
289 inline for (0..4) |i| {
290 v[i + 4] = rotr(Vec, v[i + 4], 7);
291 }
292
293 // Diagonal round - first half
294 inline for (0..4) |i| {
295 v[i] +%= m[schedule[i * 2 + 8]];
296 }
297 const b_indices = [4]u8{ 5, 6, 7, 4 };
298 inline for (0..4) |i| {
299 v[i] +%= v[b_indices[i]];
300 }
301 const d_indices = [4]u8{ 15, 12, 13, 14 };
302 inline for (0..4) |i| {
303 v[d_indices[i]] ^= v[i];
304 }
305 inline for (0..4) |i| {
306 v[d_indices[i]] = rotr(Vec, v[d_indices[i]], 16);
307 }
308 const c_indices = [4]u8{ 10, 11, 8, 9 };
309 inline for (0..4) |i| {
310 v[c_indices[i]] +%= v[d_indices[i]];
311 }
312 inline for (0..4) |i| {
313 v[b_indices[i]] ^= v[c_indices[i]];
314 }
315 inline for (0..4) |i| {
316 v[b_indices[i]] = rotr(Vec, v[b_indices[i]], 12);
317 }
318
319 // Diagonal round - second half
320 inline for (0..4) |i| {
321 v[i] +%= m[schedule[i * 2 + 9]];
322 }
323 inline for (0..4) |i| {
324 v[i] +%= v[b_indices[i]];
325 }
326 inline for (0..4) |i| {
327 v[d_indices[i]] ^= v[i];
328 }
329 inline for (0..4) |i| {
330 v[d_indices[i]] = rotr(Vec, v[d_indices[i]], 8);
331 }
332 inline for (0..4) |i| {
333 v[c_indices[i]] +%= v[d_indices[i]];
334 }
335 inline for (0..4) |i| {
336 v[b_indices[i]] ^= v[c_indices[i]];
337 }
338 inline for (0..4) |i| {
339 v[b_indices[i]] = rotr(Vec, v[b_indices[i]], 7);
340 }
341}
342
343fn hashVec(
344 comptime Vec: type,
345 comptime n: comptime_int,
346 inputs: [n][*]const u8,
347 blocks: usize,
348 key: [8]u32,
349 counter: u64,
350 increment_counter: bool,
351 flags: Flags,
352 flags_start: Flags,
353 flags_end: Flags,
354 out: *[n * Blake3.digest_length]u8,
355) void {
356 var h_vecs: [8]Vec = undefined;
357 for (0..8) |i| {
358 h_vecs[i] = @splat(key[i]);
359 }
360
361 const counter_low_vec = if (increment_counter) blk: {
362 var result: Vec = undefined;
363 inline for (0..n) |i| {
364 result[i] = counterLow(counter + i);
365 }
366 break :blk result;
367 } else @as(Vec, @splat(counterLow(counter)));
368
369 const counter_high_vec = if (increment_counter) blk: {
370 var result: Vec = undefined;
371 inline for (0..n) |i| {
372 result[i] = counterHigh(counter + i);
373 }
374 break :blk result;
375 } else @as(Vec, @splat(counterHigh(counter)));
376
377 var block_flags = flags.with(flags_start);
378
379 for (0..blocks) |block| {
380 if (block + 1 == blocks) {
381 block_flags = block_flags.with(flags_end);
382 }
383
384 const block_len_vec: Vec = @splat(Blake3.block_length);
385 const block_flags_vec: Vec = @splat(@as(u32, block_flags.toInt()));
386
387 var msg_vecs: [16]Vec = undefined;
388 transposeMsg(Vec, n, inputs, block * Blake3.block_length, &msg_vecs);
389
390 var v: [16]Vec = .{
391 h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
392 h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
393 @splat(iv[0]), @splat(iv[1]), @splat(iv[2]), @splat(iv[3]),
394 counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
395 };
396
397 inline for (0..7) |r| {
398 roundFnVec(Vec, &v, &msg_vecs, r);
399 }
400
401 inline for (0..8) |i| {
402 h_vecs[i] = v[i] ^ v[i + 8];
403 }
404
405 block_flags = flags;
406 }
407
408 // Output serialization - different strategies for different widths
409 switch (n) {
410 4 => {
411 // Special interleaved pattern for Vec4
412 var out_vecs = [4]Vec{ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3] };
413 transposeNxN(Vec, 4, &out_vecs);
414 inline for (0..4) |i| {
415 mem.writeInt(u32, out[0 * 16 + i * 4 ..][0..4], out_vecs[0][i], .little);
416 }
417 inline for (0..4) |i| {
418 mem.writeInt(u32, out[2 * 16 + i * 4 ..][0..4], out_vecs[1][i], .little);
419 }
420 inline for (0..4) |i| {
421 mem.writeInt(u32, out[4 * 16 + i * 4 ..][0..4], out_vecs[2][i], .little);
422 }
423 inline for (0..4) |i| {
424 mem.writeInt(u32, out[6 * 16 + i * 4 ..][0..4], out_vecs[3][i], .little);
425 }
426
427 out_vecs = [4]Vec{ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7] };
428 transposeNxN(Vec, 4, &out_vecs);
429 inline for (0..4) |i| {
430 mem.writeInt(u32, out[1 * 16 + i * 4 ..][0..4], out_vecs[0][i], .little);
431 }
432 inline for (0..4) |i| {
433 mem.writeInt(u32, out[3 * 16 + i * 4 ..][0..4], out_vecs[1][i], .little);
434 }
435 inline for (0..4) |i| {
436 mem.writeInt(u32, out[5 * 16 + i * 4 ..][0..4], out_vecs[2][i], .little);
437 }
438 inline for (0..4) |i| {
439 mem.writeInt(u32, out[7 * 16 + i * 4 ..][0..4], out_vecs[3][i], .little);
440 }
441 },
442 8 => {
443 // Linear pattern with transpose for Vec8
444 var out_vecs = [8]Vec{ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3], h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7] };
445 transposeNxN(Vec, 8, &out_vecs);
446 inline for (0..8) |i| {
447 mem.writeInt(u32, out[0 * 32 + i * 4 ..][0..4], out_vecs[0][i], .little);
448 }
449 inline for (0..8) |i| {
450 mem.writeInt(u32, out[1 * 32 + i * 4 ..][0..4], out_vecs[1][i], .little);
451 }
452 inline for (0..8) |i| {
453 mem.writeInt(u32, out[2 * 32 + i * 4 ..][0..4], out_vecs[2][i], .little);
454 }
455 inline for (0..8) |i| {
456 mem.writeInt(u32, out[3 * 32 + i * 4 ..][0..4], out_vecs[3][i], .little);
457 }
458 inline for (0..8) |i| {
459 mem.writeInt(u32, out[4 * 32 + i * 4 ..][0..4], out_vecs[4][i], .little);
460 }
461 inline for (0..8) |i| {
462 mem.writeInt(u32, out[5 * 32 + i * 4 ..][0..4], out_vecs[5][i], .little);
463 }
464 inline for (0..8) |i| {
465 mem.writeInt(u32, out[6 * 32 + i * 4 ..][0..4], out_vecs[6][i], .little);
466 }
467 inline for (0..8) |i| {
468 mem.writeInt(u32, out[7 * 32 + i * 4 ..][0..4], out_vecs[7][i], .little);
469 }
470 },
471 16 => {
472 // Direct lane-by-lane output for Vec16 (no transpose)
473 inline for (0..16) |lane| {
474 const hash_offset = lane * Blake3.digest_length;
475 inline for (0..8) |word_idx| {
476 const word = h_vecs[word_idx][lane];
477 out[hash_offset + word_idx * 4 + 0] = @truncate(word);
478 out[hash_offset + word_idx * 4 + 1] = @truncate(word >> 8);
479 out[hash_offset + word_idx * 4 + 2] = @truncate(word >> 16);
480 out[hash_offset + word_idx * 4 + 3] = @truncate(word >> 24);
481 }
482 }
483 },
484 else => @compileError("Unsupported SIMD width"),
485 }
486}
487
488fn hashManySimd(
489 inputs: [][*]const u8,
490 num_inputs: usize,
491 blocks: usize,
492 key: [8]u32,
493 counter: u64,
494 increment_counter: bool,
495 flags: Flags,
496 flags_start: Flags,
497 flags_end: Flags,
498 out: []u8,
499) void {
500 var remaining = num_inputs;
501 var inp = inputs.ptr;
502 var out_ptr = out.ptr;
503 var cnt = counter;
504
505 if (simd_degree >= 16) {
506 while (remaining >= 16) {
507 const sixteen_inputs = [16][*]const u8{
508 inp[0], inp[1], inp[2], inp[3],
509 inp[4], inp[5], inp[6], inp[7],
510 inp[8], inp[9], inp[10], inp[11],
511 inp[12], inp[13], inp[14], inp[15],
512 };
513
514 var simd_out: [16 * Blake3.digest_length]u8 = undefined;
515 hashVec(Vec16, 16, sixteen_inputs, blocks, key, cnt, increment_counter, flags, flags_start, flags_end, &simd_out);
516
517 @memcpy(out_ptr[0 .. 16 * Blake3.digest_length], &simd_out);
518
519 if (increment_counter) cnt += 16;
520 inp += 16;
521 remaining -= 16;
522 out_ptr += 16 * Blake3.digest_length;
523 }
524 }
525
526 if (simd_degree >= 8) {
527 while (remaining >= 8) {
528 const eight_inputs = [8][*]const u8{
529 inp[0], inp[1], inp[2], inp[3],
530 inp[4], inp[5], inp[6], inp[7],
531 };
532
533 var simd_out: [8 * Blake3.digest_length]u8 = undefined;
534 hashVec(Vec8, 8, eight_inputs, blocks, key, cnt, increment_counter, flags, flags_start, flags_end, &simd_out);
535
536 @memcpy(out_ptr[0 .. 8 * Blake3.digest_length], &simd_out);
537
538 if (increment_counter) cnt += 8;
539 inp += 8;
540 remaining -= 8;
541 out_ptr += 8 * Blake3.digest_length;
542 }
543 }
544
545 if (simd_degree >= 4) {
546 while (remaining >= 4) {
547 const four_inputs = [4][*]const u8{
548 inp[0],
549 inp[1],
550 inp[2],
551 inp[3],
552 };
553
554 var simd_out: [4 * Blake3.digest_length]u8 = undefined;
555 hashVec(Vec4, 4, four_inputs, blocks, key, cnt, increment_counter, flags, flags_start, flags_end, &simd_out);
556
557 @memcpy(out_ptr[0 .. 4 * Blake3.digest_length], &simd_out);
558
559 if (increment_counter) cnt += 4;
560 inp += 4;
561 remaining -= 4;
562 out_ptr += 4 * Blake3.digest_length;
563 }
564 }
565
566 if (remaining > 0) {
567 hashManyPortable(inp[0..remaining], remaining, blocks, key, cnt, increment_counter, flags, flags_start, flags_end, out_ptr[0 .. remaining * Blake3.digest_length]);
568 }
569}
570
571fn hashMany(inputs: [][*]const u8, num_inputs: usize, blocks: usize, key: [8]u32, counter: u64, increment_counter: bool, flags: Flags, flags_start: Flags, flags_end: Flags, out: []u8) void {
572 if (max_simd_degree >= 4) {
573 hashManySimd(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
574 } else {
575 hashManyPortable(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
576 }
577}
578
579fn compressChunksParallel(input: []const u8, key: [8]u32, chunk_counter: u64, flags: Flags, out: []u8) usize {
580 var chunks_array: [max_simd_degree][*]const u8 = undefined;
581 var input_position: usize = 0;
582 var chunks_array_len: usize = 0;
583
584 while (input.len - input_position >= chunk_length) {
585 chunks_array[chunks_array_len] = input[input_position..].ptr;
586 input_position += chunk_length;
587 chunks_array_len += 1;
588 }
589
590 hashMany(chunks_array[0..chunks_array_len], chunks_array_len, chunk_length / Blake3.block_length, key, chunk_counter, true, flags, .{ .chunk_start = true }, .{ .chunk_end = true }, out);
591
592 if (input.len > input_position) {
593 const counter = chunk_counter + @as(u64, chunks_array_len);
594 var chunk_state = ChunkState.init(key, flags);
595 chunk_state.chunk_counter = counter;
596 chunk_state.update(input[input_position..]);
597 const output = chunk_state.output();
598 const cv = output.chainingValue();
599 const cv_bytes = storeCvWords(cv);
600 @memcpy(out[chunks_array_len * Blake3.digest_length ..][0..Blake3.digest_length], &cv_bytes);
601 return chunks_array_len + 1;
602 } else {
603 return chunks_array_len;
604 }
605}
606
607fn compressParentsParallel(child_chaining_values: []const u8, num_chaining_values: usize, key: [8]u32, flags: Flags, out: []u8) usize {
608 var parents_array: [max_simd_degree_or_2][*]const u8 = undefined;
609 var parents_array_len: usize = 0;
610
611 while (num_chaining_values - (2 * parents_array_len) >= 2) {
612 parents_array[parents_array_len] = child_chaining_values[2 * parents_array_len * Blake3.digest_length ..].ptr;
613 parents_array_len += 1;
614 }
615
616 hashMany(parents_array[0..parents_array_len], parents_array_len, 1, key, 0, false, flags.with(.{ .parent = true }), .{}, .{}, out);
617
618 if (num_chaining_values > 2 * parents_array_len) {
619 @memcpy(out[parents_array_len * Blake3.digest_length ..][0..Blake3.digest_length], child_chaining_values[2 * parents_array_len * Blake3.digest_length ..][0..Blake3.digest_length]);
620 return parents_array_len + 1;
621 } else {
622 return parents_array_len;
623 }
624}
625
626fn compressSubtreeWide(input: []const u8, key: [8]u32, chunk_counter: u64, flags: Flags, out: []u8) usize {
627 if (input.len <= max_simd_degree * chunk_length) {
628 return compressChunksParallel(input, key, chunk_counter, flags, out);
629 }
630
631 const left_input_len = leftSubtreeLen(input.len);
632 const right_input = input[left_input_len..];
633 const right_chunk_counter = chunk_counter + @as(u64, left_input_len / chunk_length);
634
635 var cv_array: [2 * max_simd_degree_or_2 * Blake3.digest_length]u8 = undefined;
636 var degree: usize = max_simd_degree;
637 if (left_input_len > chunk_length and degree == 1) {
638 degree = 2;
639 }
640 const right_cvs = cv_array[degree * Blake3.digest_length ..];
641
642 const left_n = compressSubtreeWide(input[0..left_input_len], key, chunk_counter, flags, cv_array[0..]);
643 const right_n = compressSubtreeWide(right_input, key, right_chunk_counter, flags, right_cvs);
644
645 if (left_n == 1) {
646 @memcpy(out[0 .. 2 * Blake3.digest_length], cv_array[0 .. 2 * Blake3.digest_length]);
647 return 2;
648 }
649
650 const num_chaining_values = left_n + right_n;
651 return compressParentsParallel(&cv_array, num_chaining_values, key, flags, out);
652}
653
654fn compressSubtreeToParentNode(input: []const u8, key: [8]u32, chunk_counter: u64, flags: Flags, out: *[2 * Blake3.digest_length]u8) void {
655 var cv_array: [max_simd_degree_or_2 * Blake3.digest_length]u8 = undefined;
656 var num_cvs = compressSubtreeWide(input, key, chunk_counter, flags, &cv_array);
657
658 if (max_simd_degree_or_2 > 2) {
659 var out_array: [max_simd_degree_or_2 * Blake3.digest_length / 2]u8 = undefined;
660 while (num_cvs > 2) {
661 num_cvs = compressParentsParallel(&cv_array, num_cvs, key, flags, &out_array);
662 @memcpy(cv_array[0 .. num_cvs * Blake3.digest_length], out_array[0 .. num_cvs * Blake3.digest_length]);
663 }
664 }
665
666 @memcpy(out, cv_array[0 .. 2 * Blake3.digest_length]);
667}
668
669fn leftSubtreeLen(input_len: usize) usize {
670 const full_chunks = (input_len - 1) / chunk_length;
671 return @intCast(roundDownToPowerOf2(full_chunks) * chunk_length);
672}
673
674const ChunkBatch = struct {
675 input: []const u8,
676 start_chunk: usize,
677 end_chunk: usize,
678 cvs: [][8]u32,
679 key: [8]u32,
680 flags: Flags,
681
682 fn process(ctx: ChunkBatch) void {
683 var cv_buffer: [max_simd_degree * Blake3.digest_length]u8 = undefined;
684 var chunk_idx = ctx.start_chunk;
685
686 while (chunk_idx < ctx.end_chunk) {
687 const remaining = ctx.end_chunk - chunk_idx;
688 const batch_size: usize = @min(remaining, max_simd_degree);
689 const offset = chunk_idx * chunk_length;
690 const batch_len = batch_size * chunk_length;
691
692 const num_cvs = compressChunksParallel(
693 ctx.input[offset..][0..batch_len],
694 ctx.key,
695 chunk_idx,
696 ctx.flags,
697 &cv_buffer,
698 );
699
700 for (0..num_cvs) |i| {
701 const cv_bytes = cv_buffer[i * Blake3.digest_length ..][0..Blake3.digest_length];
702 ctx.cvs[chunk_idx + i] = loadCvWords(cv_bytes.*);
703 }
704
705 chunk_idx += batch_size;
706 }
707 }
708};
709
710const ParentBatchContext = struct {
711 input_cvs: [][8]u32,
712 output_cvs: [][8]u32,
713 start_idx: usize,
714 end_idx: usize,
715 key: [8]u32,
716 flags: Flags,
717};
718
719fn processParentBatch(ctx: ParentBatchContext) void {
720 for (ctx.start_idx..ctx.end_idx) |i| {
721 const output = parentOutputFromCvs(ctx.input_cvs[i * 2], ctx.input_cvs[i * 2 + 1], ctx.key, ctx.flags);
722 ctx.output_cvs[i] = output.chainingValue();
723 }
724}
725
726fn processParentBatchSIMD(ctx: ParentBatchContext) void {
727 const num_parents = ctx.end_idx - ctx.start_idx;
728 if (num_parents == 0) return;
729
730 // Convert input CVs to bytes for SIMD processing
731 var input_bytes: [max_simd_degree * 2 * Blake3.digest_length]u8 = undefined;
732 var output_bytes: [max_simd_degree * Blake3.digest_length]u8 = undefined;
733 var parents_array: [max_simd_degree][*]const u8 = undefined;
734
735 var processed: usize = 0;
736 while (processed < num_parents) {
737 const batch_size: usize = @min(num_parents - processed, max_simd_degree);
738
739 // Convert CV pairs to byte blocks for this batch
740 for (0..batch_size) |i| {
741 const pair_idx = ctx.start_idx + processed + i;
742 const left_cv = ctx.input_cvs[pair_idx * 2];
743 const right_cv = ctx.input_cvs[pair_idx * 2 + 1];
744
745 // Write left CV || right CV to form 64-byte parent block
746 for (0..8) |j| {
747 store32(input_bytes[i * 64 + j * 4 ..][0..4], left_cv[j]);
748 store32(input_bytes[i * 64 + 32 + j * 4 ..][0..4], right_cv[j]);
749 }
750 parents_array[i] = input_bytes[i * 64 ..].ptr;
751 }
752
753 hashMany(parents_array[0..batch_size], batch_size, 1, ctx.key, 0, false, ctx.flags.with(.{ .parent = true }), .{}, .{}, output_bytes[0 .. batch_size * Blake3.digest_length]);
754
755 for (0..batch_size) |i| {
756 const output_idx = ctx.start_idx + processed + i;
757 ctx.output_cvs[output_idx] = loadCvWords(output_bytes[i * Blake3.digest_length ..][0..Blake3.digest_length].*);
758 }
759
760 processed += batch_size;
761 }
762}
763
764fn buildMerkleTreeLayerParallel(
765 input_cvs: [][8]u32,
766 output_cvs: [][8]u32,
767 key: [8]u32,
768 flags: Flags,
769 io: Io,
770) void {
771 const num_parents = input_cvs.len / 2;
772
773 // Process sequentially with SIMD for smaller tree layers to avoid thread overhead
774 // Tree layers shrink quickly, so only parallelize the first few large layers
775 if (num_parents <= 1024) {
776 processParentBatchSIMD(ParentBatchContext{
777 .input_cvs = input_cvs,
778 .output_cvs = output_cvs,
779 .start_idx = 0,
780 .end_idx = num_parents,
781 .key = key,
782 .flags = flags,
783 });
784 return;
785 }
786
787 const num_workers = Thread.getCpuCount() catch 1;
788 const parents_per_worker = (num_parents + num_workers - 1) / num_workers;
789 var group: Io.Group = .init;
790
791 for (0..num_workers) |worker_id| {
792 const start_idx = worker_id * parents_per_worker;
793 if (start_idx >= num_parents) break;
794
795 group.async(io, processParentBatchSIMD, .{ParentBatchContext{
796 .input_cvs = input_cvs,
797 .output_cvs = output_cvs,
798 .start_idx = start_idx,
799 .end_idx = @min(start_idx + parents_per_worker, num_parents),
800 .key = key,
801 .flags = flags,
802 }});
803 }
804 group.wait(io);
805}
806
807fn parentOutput(parent_block: []const u8, key: [8]u32, flags: Flags) Output {
808 var block: [Blake3.block_length]u8 = undefined;
809 @memcpy(&block, parent_block[0..Blake3.block_length]);
810 return Output{
811 .input_cv = key,
812 .block = block,
813 .block_len = Blake3.block_length,
814 .counter = 0,
815 .flags = flags.with(.{ .parent = true }),
816 };
817}
818
819fn parentOutputFromCvs(left_cv: [8]u32, right_cv: [8]u32, key: [8]u32, flags: Flags) Output {
820 var block: [Blake3.block_length]u8 align(16) = undefined;
821 for (0..8) |i| {
822 store32(block[i * 4 ..][0..4], left_cv[i]);
823 store32(block[(i + 8) * 4 ..][0..4], right_cv[i]);
824 }
825 return Output{
826 .input_cv = key,
827 .block = block,
828 .block_len = Blake3.block_length,
829 .counter = 0,
830 .flags = flags.with(.{ .parent = true }),
831 };
832}
833
834const ChunkState = struct {
835 cv: [8]u32 align(16),
836 chunk_counter: u64,
837 buf: [Blake3.block_length]u8 align(16),
838 buf_len: u8,
839 blocks_compressed: u8,
840 flags: Flags,
841
842 fn init(key: [8]u32, flags: Flags) ChunkState {
843 return ChunkState{
844 .cv = key,
845 .chunk_counter = 0,
846 .buf = @splat(0),
847 .buf_len = 0,
848 .blocks_compressed = 0,
849 .flags = flags,
850 };
851 }
852
853 fn reset(self: *ChunkState, key: [8]u32, chunk_counter: u64) void {
854 self.cv = key;
855 self.chunk_counter = chunk_counter;
856 self.blocks_compressed = 0;
857 self.buf = @splat(0);
858 self.buf_len = 0;
859 }
860
861 fn len(self: *const ChunkState) usize {
862 return (Blake3.block_length * @as(usize, self.blocks_compressed)) + @as(usize, self.buf_len);
863 }
864
865 fn fillBuf(self: *ChunkState, input: []const u8) usize {
866 const take = @min(Blake3.block_length - @as(usize, self.buf_len), input.len);
867 @memcpy(self.buf[self.buf_len..][0..take], input[0..take]);
868 self.buf_len += @intCast(take);
869 return take;
870 }
871
872 fn maybeStartFlag(self: *const ChunkState) Flags {
873 return if (self.blocks_compressed == 0) .{ .chunk_start = true } else .{};
874 }
875
876 fn update(self: *ChunkState, input: []const u8) void {
877 var inp = input;
878
879 while (inp.len > 0) {
880 if (self.buf_len == Blake3.block_length) {
881 compressInPlace(&self.cv, &self.buf, Blake3.block_length, self.chunk_counter, self.flags.with(self.maybeStartFlag()));
882 self.blocks_compressed += 1;
883 self.buf = @splat(0);
884 self.buf_len = 0;
885 }
886
887 const take = self.fillBuf(inp);
888 inp = inp[take..];
889 }
890 }
891
892 fn output(self: *const ChunkState) Output {
893 const block_flags = self.flags.with(self.maybeStartFlag()).with(.{ .chunk_end = true });
894 return Output{
895 .input_cv = self.cv,
896 .block = self.buf,
897 .block_len = self.buf_len,
898 .counter = self.chunk_counter,
899 .flags = block_flags,
900 };
901 }
902};
903
904const Output = struct {
905 input_cv: [8]u32 align(16),
906 block: [Blake3.block_length]u8 align(16),
907 block_len: u8,
908 counter: u64,
909 flags: Flags,
910
911 fn chainingValue(self: *const Output) [8]u32 {
912 var cv_words = self.input_cv;
913 compressInPlace(&cv_words, &self.block, self.block_len, self.counter, self.flags);
914 return cv_words;
915 }
916
917 fn rootBytes(self: *const Output, seek: u64, out: []u8) void {
918 if (out.len == 0) return;
919
920 var output_block_counter = seek / 64;
921 const offset_within_block = @as(usize, @intCast(seek % 64));
922 var out_remaining = out;
923
924 if (offset_within_block > 0) {
925 var wide_buf: [64]u8 = undefined;
926 compressXof(&self.input_cv, &self.block, self.block_len, output_block_counter, self.flags.with(.{ .root = true }), &wide_buf);
927 const available_bytes = 64 - offset_within_block;
928 const bytes = @min(out_remaining.len, available_bytes);
929 @memcpy(out_remaining[0..bytes], wide_buf[offset_within_block..][0..bytes]);
930 out_remaining = out_remaining[bytes..];
931 output_block_counter += 1;
932 }
933
934 while (out_remaining.len >= 64) {
935 compressXof(&self.input_cv, &self.block, self.block_len, output_block_counter, self.flags.with(.{ .root = true }), out_remaining[0..64]);
936 out_remaining = out_remaining[64..];
937 output_block_counter += 1;
938 }
939
940 if (out_remaining.len > 0) {
941 var wide_buf: [64]u8 = undefined;
942 compressXof(&self.input_cv, &self.block, self.block_len, output_block_counter, self.flags.with(.{ .root = true }), &wide_buf);
943 @memcpy(out_remaining, wide_buf[0..out_remaining.len]);
944 }
945 }
946};
947
948/// BLAKE3 is a cryptographic hash function that produces a 256-bit digest by default but also supports extendable output.
949pub const Blake3 = struct {
950 pub const block_length = 64;
951 pub const digest_length = 32;
952 pub const key_length = 32;
953
954 pub const Options = struct { key: ?[key_length]u8 = null };
955 pub const KdfOptions = struct {};
956
957 key: [8]u32,
958 chunk: ChunkState,
959 cv_stack_len: u8,
960 cv_stack: [max_depth + 1][8]u32,
961
962 /// Construct a new `Blake3` for the hash function, with an optional key
963 pub fn init(options: Options) Blake3 {
964 if (options.key) |key| {
965 const key_words = loadKeyWords(key);
966 return init_internal(key_words, .{ .keyed_hash = true });
967 } else {
968 return init_internal(iv, .{});
969 }
970 }
971
972 /// Construct a new `Blake3` for the key derivation function. The context
973 /// string should be hardcoded, globally unique, and application-specific.
974 pub fn initKdf(context: []const u8, options: KdfOptions) Blake3 {
975 _ = options;
976 var context_hasher = init_internal(iv, .{ .derive_key_context = true });
977 context_hasher.update(context);
978 var context_key: [key_length]u8 = undefined;
979 context_hasher.final(&context_key);
980 const context_key_words = loadKeyWords(context_key);
981 return init_internal(context_key_words, .{ .derive_key_material = true });
982 }
983
984 pub fn hash(b: []const u8, out: []u8, options: Options) void {
985 var d = Blake3.init(options);
986 d.update(b);
987 d.final(out);
988 }
989
990 pub fn hashParallel(b: []const u8, out: []u8, options: Options, allocator: std.mem.Allocator, io: Io) !void {
991 if (b.len < parallel_threshold) {
992 return hash(b, out, options);
993 }
994
995 const key_words = if (options.key) |key| loadKeyWords(key) else iv;
996 const flags: Flags = if (options.key != null) .{ .keyed_hash = true } else .{};
997
998 const num_full_chunks = b.len / chunk_length;
999 const thread_count = Thread.getCpuCount() catch 1;
1000 if (thread_count <= 1 or num_full_chunks == 0) {
1001 return hash(b, out, options);
1002 }
1003
1004 const cvs = try allocator.alloc([8]u32, num_full_chunks);
1005 defer allocator.free(cvs);
1006
1007 // Process chunks in parallel
1008 const num_workers = thread_count;
1009 const chunks_per_worker = (num_full_chunks + num_workers - 1) / num_workers;
1010 var group: Io.Group = .init;
1011
1012 for (0..num_workers) |worker_id| {
1013 const start_chunk = worker_id * chunks_per_worker;
1014 if (start_chunk >= num_full_chunks) break;
1015
1016 group.async(io, ChunkBatch.process, .{ChunkBatch{
1017 .input = b,
1018 .start_chunk = start_chunk,
1019 .end_chunk = @min(start_chunk + chunks_per_worker, num_full_chunks),
1020 .cvs = cvs,
1021 .key = key_words,
1022 .flags = flags,
1023 }});
1024 }
1025 group.wait(io);
1026
1027 // Build Merkle tree in parallel layers using ping-pong buffers
1028 const max_intermediate_size = (num_full_chunks + 1) / 2;
1029 const buffer0 = try allocator.alloc([8]u32, max_intermediate_size);
1030 defer allocator.free(buffer0);
1031 const buffer1 = try allocator.alloc([8]u32, max_intermediate_size);
1032 defer allocator.free(buffer1);
1033
1034 var current_level = cvs;
1035 var next_level_buf = buffer0;
1036 var toggle = false;
1037
1038 while (current_level.len > 8) {
1039 const num_parents = current_level.len / 2;
1040 const has_odd = current_level.len % 2 == 1;
1041 const next_level_size = num_parents + @intFromBool(has_odd);
1042
1043 buildMerkleTreeLayerParallel(
1044 current_level[0 .. num_parents * 2],
1045 next_level_buf[0..num_parents],
1046 key_words,
1047 flags,
1048 io,
1049 );
1050
1051 if (has_odd) {
1052 next_level_buf[num_parents] = current_level[current_level.len - 1];
1053 }
1054
1055 current_level = next_level_buf[0..next_level_size];
1056 next_level_buf = if (toggle) buffer0 else buffer1;
1057 toggle = !toggle;
1058 }
1059
1060 // Finalize remaining small tree sequentially
1061 var hasher = init_internal(key_words, flags);
1062 for (current_level, 0..) |cv, i| hasher.pushCv(cv, i);
1063
1064 hasher.chunk.chunk_counter = num_full_chunks;
1065 const remaining_bytes = b.len % chunk_length;
1066 if (remaining_bytes > 0) {
1067 hasher.chunk.update(b[num_full_chunks * chunk_length ..]);
1068 hasher.mergeCvStack(hasher.chunk.chunk_counter);
1069 }
1070
1071 hasher.final(out);
1072 }
1073
1074 fn init_internal(key: [8]u32, flags: Flags) Blake3 {
1075 return Blake3{
1076 .key = key,
1077 .chunk = ChunkState.init(key, flags),
1078 .cv_stack_len = 0,
1079 .cv_stack = undefined,
1080 };
1081 }
1082
1083 fn mergeCvStack(self: *Blake3, total_len: u64) void {
1084 const post_merge_stack_len = @as(u8, @intCast(@popCount(total_len)));
1085 while (self.cv_stack_len > post_merge_stack_len) {
1086 const left_cv = self.cv_stack[self.cv_stack_len - 2];
1087 const right_cv = self.cv_stack[self.cv_stack_len - 1];
1088 const output = parentOutputFromCvs(left_cv, right_cv, self.key, self.chunk.flags);
1089 const cv = output.chainingValue();
1090 self.cv_stack[self.cv_stack_len - 2] = cv;
1091 self.cv_stack_len -= 1;
1092 }
1093 }
1094
1095 fn pushCv(self: *Blake3, new_cv: [8]u32, chunk_counter: u64) void {
1096 self.mergeCvStack(chunk_counter);
1097 self.cv_stack[self.cv_stack_len] = new_cv;
1098 self.cv_stack_len += 1;
1099 }
1100
1101 /// Add input to the hash state. This can be called any number of times.
1102 pub fn update(self: *Blake3, input: []const u8) void {
1103 if (input.len == 0) return;
1104
1105 var inp = input;
1106
1107 if (self.chunk.len() > 0) {
1108 const take = @min(chunk_length - self.chunk.len(), inp.len);
1109 self.chunk.update(inp[0..take]);
1110 inp = inp[take..];
1111 if (inp.len > 0) {
1112 const output = self.chunk.output();
1113 const chunk_cv = output.chainingValue();
1114 self.pushCv(chunk_cv, self.chunk.chunk_counter);
1115 self.chunk.reset(self.key, self.chunk.chunk_counter + 1);
1116 } else {
1117 return;
1118 }
1119 }
1120
1121 while (inp.len > chunk_length) {
1122 var subtree_len = roundDownToPowerOf2(inp.len);
1123 const count_so_far = self.chunk.chunk_counter * chunk_length;
1124
1125 while ((subtree_len - 1) & count_so_far != 0) {
1126 subtree_len /= 2;
1127 }
1128
1129 const subtree_chunks = subtree_len / chunk_length;
1130 if (subtree_len <= chunk_length) {
1131 var chunk_state = ChunkState.init(self.key, self.chunk.flags);
1132 chunk_state.chunk_counter = self.chunk.chunk_counter;
1133 chunk_state.update(inp[0..@intCast(subtree_len)]);
1134 const output = chunk_state.output();
1135 const cv = output.chainingValue();
1136 self.pushCv(cv, chunk_state.chunk_counter);
1137 } else {
1138 var cv_pair: [2 * digest_length]u8 = undefined;
1139 compressSubtreeToParentNode(inp[0..@intCast(subtree_len)], self.key, self.chunk.chunk_counter, self.chunk.flags, &cv_pair);
1140 const left_cv = loadCvWords(cv_pair[0..digest_length].*);
1141 const right_cv = loadCvWords(cv_pair[digest_length..][0..digest_length].*);
1142 self.pushCv(left_cv, self.chunk.chunk_counter);
1143 self.pushCv(right_cv, self.chunk.chunk_counter + (subtree_chunks / 2));
1144 }
1145 self.chunk.chunk_counter += subtree_chunks;
1146 inp = inp[@intCast(subtree_len)..];
1147 }
1148
1149 if (inp.len > 0) {
1150 self.chunk.update(inp);
1151 self.mergeCvStack(self.chunk.chunk_counter);
1152 }
1153 }
1154
1155 /// Finalize the hash and write any number of output bytes.
1156 pub fn final(self: *const Blake3, out: []u8) void {
1157 self.finalizeSeek(0, out);
1158 }
1159
1160 /// Finalize the hash and write any number of output bytes, starting at a given seek position.
1161 /// This is an XOF (extendable-output function) extension.
1162 pub fn finalizeSeek(self: *const Blake3, seek: u64, out: []u8) void {
1163 if (out.len == 0) return;
1164
1165 if (self.cv_stack_len == 0) {
1166 const output = self.chunk.output();
1167 output.rootBytes(seek, out);
1168 return;
1169 }
1170
1171 var output: Output = undefined;
1172 var cvs_remaining: usize = undefined;
1173
1174 if (self.chunk.len() > 0) {
1175 cvs_remaining = self.cv_stack_len;
1176 output = self.chunk.output();
1177 } else {
1178 cvs_remaining = self.cv_stack_len - 2;
1179 const left_cv = self.cv_stack[cvs_remaining];
1180 const right_cv = self.cv_stack[cvs_remaining + 1];
1181 output = parentOutputFromCvs(left_cv, right_cv, self.key, self.chunk.flags);
1182 }
1183
1184 while (cvs_remaining > 0) {
1185 cvs_remaining -= 1;
1186 const left_cv = self.cv_stack[cvs_remaining];
1187 const right_cv = output.chainingValue();
1188 output = parentOutputFromCvs(left_cv, right_cv, self.key, self.chunk.flags);
1189 }
1190
1191 output.rootBytes(seek, out);
1192 }
1193
1194 pub fn reset(self: *Blake3) void {
1195 self.chunk.reset(self.key, 0);
1196 self.cv_stack_len = 0;
1197 }
1198};
1199
1200// Use named type declarations to workaround crash with anonymous structs (issue #4373).
1201const ReferenceTest = struct {
1202 key: *const [Blake3.key_length]u8,
1203 context_string: []const u8,
1204 cases: []const ReferenceTestCase,
1205};
1206
1207const ReferenceTestCase = struct {
1208 input_len: usize,
1209 hash: *const [262]u8,
1210 keyed_hash: *const [262]u8,
1211 derive_key: *const [262]u8,
1212};
1213
1214// Each test is an input length and three outputs, one for each of the `hash`, `keyed_hash`, and
1215// `derive_key` modes. The input in each case is filled with a 251-byte-long repeating pattern:
1216// 0, 1, 2, ..., 249, 250, 0, 1, ... The key used with `keyed_hash` is the 32-byte ASCII string
1217// given in the `key` field below. For `derive_key`, the test input is used as the input key, and
1218// the context string is 'BLAKE3 2019-12-27 16:29:52 test vectors context'. (As good practice for
1219// following the security requirements of `derive_key`, test runners should make that context
1220// string a hardcoded constant, and we do not provided it in machine-readable form.) Outputs are
1221// encoded as hexadecimal. Each case is an extended output, and implementations should also check
1222// that the first 32 bytes match their default-length output.
1223//
1224// Source: https://github.com/BLAKE3-team/BLAKE3/blob/92d421dea1a89e2f079f4dbd93b0dab41234b279/test_vectors/test_vectors.json
1225const reference_test = ReferenceTest{
1226 .key = "whats the Elvish word for friend",
1227 .context_string = "BLAKE3 2019-12-27 16:29:52 test vectors context",
1228 .cases = &[_]ReferenceTestCase{
1229 .{
1230 .input_len = 0,
1231 .hash = "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262e00f03e7b69af26b7faaf09fcd333050338ddfe085b8cc869ca98b206c08243a26f5487789e8f660afe6c99ef9e0c52b92e7393024a80459cf91f476f9ffdbda7001c22e159b402631f277ca96f2defdf1078282314e763699a31c5363165421cce14d",
1232 .keyed_hash = "92b2b75604ed3c761f9d6f62392c8a9227ad0ea3f09573e783f1498a4ed60d26b18171a2f22a4b94822c701f107153dba24918c4bae4d2945c20ece13387627d3b73cbf97b797d5e59948c7ef788f54372df45e45e4293c7dc18c1d41144a9758be58960856be1eabbe22c2653190de560ca3b2ac4aa692a9210694254c371e851bc8f",
1233 .derive_key = "2cc39783c223154fea8dfb7c1b1660f2ac2dcbd1c1de8277b0b0dd39b7e50d7d905630c8be290dfcf3e6842f13bddd573c098c3f17361f1f206b8cad9d088aa4a3f746752c6b0ce6a83b0da81d59649257cdf8eb3e9f7d4998e41021fac119deefb896224ac99f860011f73609e6e0e4540f93b273e56547dfd3aa1a035ba6689d89a0",
1234 },
1235 .{
1236 .input_len = 1,
1237 .hash = "2d3adedff11b61f14c886e35afa036736dcd87a74d27b5c1510225d0f592e213c3a6cb8bf623e20cdb535f8d1a5ffb86342d9c0b64aca3bce1d31f60adfa137b358ad4d79f97b47c3d5e79f179df87a3b9776ef8325f8329886ba42f07fb138bb502f4081cbcec3195c5871e6c23e2cc97d3c69a613eba131e5f1351f3f1da786545e5",
1238 .keyed_hash = "6d7878dfff2f485635d39013278ae14f1454b8c0a3a2d34bc1ab38228a80c95b6568c0490609413006fbd428eb3fd14e7756d90f73a4725fad147f7bf70fd61c4e0cf7074885e92b0e3f125978b4154986d4fb202a3f331a3fb6cf349a3a70e49990f98fe4289761c8602c4e6ab1138d31d3b62218078b2f3ba9a88e1d08d0dd4cea11",
1239 .derive_key = "b3e2e340a117a499c6cf2398a19ee0d29cca2bb7404c73063382693bf66cb06c5827b91bf889b6b97c5477f535361caefca0b5d8c4746441c57617111933158950670f9aa8a05d791daae10ac683cbef8faf897c84e6114a59d2173c3f417023a35d6983f2c7dfa57e7fc559ad751dbfb9ffab39c2ef8c4aafebc9ae973a64f0c76551",
1240 },
1241 .{
1242 .input_len = 1023,
1243 .hash = "10108970eeda3eb932baac1428c7a2163b0e924c9a9e25b35bba72b28f70bd11a182d27a591b05592b15607500e1e8dd56bc6c7fc063715b7a1d737df5bad3339c56778957d870eb9717b57ea3d9fb68d1b55127bba6a906a4a24bbd5acb2d123a37b28f9e9a81bbaae360d58f85e5fc9d75f7c370a0cc09b6522d9c8d822f2f28f485",
1244 .keyed_hash = "c951ecdf03288d0fcc96ee3413563d8a6d3589547f2c2fb36d9786470f1b9d6e890316d2e6d8b8c25b0a5b2180f94fb1a158ef508c3cde45e2966bd796a696d3e13efd86259d756387d9becf5c8bf1ce2192b87025152907b6d8cc33d17826d8b7b9bc97e38c3c85108ef09f013e01c229c20a83d9e8efac5b37470da28575fd755a10",
1245 .derive_key = "74a16c1c3d44368a86e1ca6df64be6a2f64cce8f09220787450722d85725dea59c413264404661e9e4d955409dfe4ad3aa487871bcd454ed12abfe2c2b1eb7757588cf6cb18d2eccad49e018c0d0fec323bec82bf1644c6325717d13ea712e6840d3e6e730d35553f59eff5377a9c350bcc1556694b924b858f329c44ee64b884ef00d",
1246 },
1247 .{
1248 .input_len = 1024,
1249 .hash = "42214739f095a406f3fc83deb889744ac00df831c10daa55189b5d121c855af71cf8107265ecdaf8505b95d8fcec83a98a6a96ea5109d2c179c47a387ffbb404756f6eeae7883b446b70ebb144527c2075ab8ab204c0086bb22b7c93d465efc57f8d917f0b385c6df265e77003b85102967486ed57db5c5ca170ba441427ed9afa684e",
1250 .keyed_hash = "75c46f6f3d9eb4f55ecaaee480db732e6c2105546f1e675003687c31719c7ba4a78bc838c72852d4f49c864acb7adafe2478e824afe51c8919d06168414c265f298a8094b1ad813a9b8614acabac321f24ce61c5a5346eb519520d38ecc43e89b5000236df0597243e4d2493fd626730e2ba17ac4d8824d09d1a4a8f57b8227778e2de",
1251 .derive_key = "7356cd7720d5b66b6d0697eb3177d9f8d73a4a5c5e968896eb6a6896843027066c23b601d3ddfb391e90d5c8eccdef4ae2a264bce9e612ba15e2bc9d654af1481b2e75dbabe615974f1070bba84d56853265a34330b4766f8e75edd1f4a1650476c10802f22b64bd3919d246ba20a17558bc51c199efdec67e80a227251808d8ce5bad",
1252 },
1253 .{
1254 .input_len = 1025,
1255 .hash = "d00278ae47eb27b34faecf67b4fe263f82d5412916c1ffd97c8cb7fb814b8444f4c4a22b4b399155358a994e52bf255de60035742ec71bd08ac275a1b51cc6bfe332b0ef84b409108cda080e6269ed4b3e2c3f7d722aa4cdc98d16deb554e5627be8f955c98e1d5f9565a9194cad0c4285f93700062d9595adb992ae68ff12800ab67a",
1256 .keyed_hash = "357dc55de0c7e382c900fd6e320acc04146be01db6a8ce7210b7189bd664ea69362396b77fdc0d2634a552970843722066c3c15902ae5097e00ff53f1e116f1cd5352720113a837ab2452cafbde4d54085d9cf5d21ca613071551b25d52e69d6c81123872b6f19cd3bc1333edf0c52b94de23ba772cf82636cff4542540a7738d5b930",
1257 .derive_key = "effaa245f065fbf82ac186839a249707c3bddf6d3fdda22d1b95a3c970379bcb5d31013a167509e9066273ab6e2123bc835b408b067d88f96addb550d96b6852dad38e320b9d940f86db74d398c770f462118b35d2724efa13da97194491d96dd37c3c09cbef665953f2ee85ec83d88b88d11547a6f911c8217cca46defa2751e7f3ad",
1258 },
1259 .{
1260 .input_len = 2048,
1261 .hash = "e776b6028c7cd22a4d0ba182a8bf62205d2ef576467e838ed6f2529b85fba24a9a60bf80001410ec9eea6698cd537939fad4749edd484cb541aced55cd9bf54764d063f23f6f1e32e12958ba5cfeb1bf618ad094266d4fc3c968c2088f677454c288c67ba0dba337b9d91c7e1ba586dc9a5bc2d5e90c14f53a8863ac75655461cea8f9",
1262 .keyed_hash = "879cf1fa2ea0e79126cb1063617a05b6ad9d0b696d0d757cf053439f60a99dd10173b961cd574288194b23ece278c330fbb8585485e74967f31352a8183aa782b2b22f26cdcadb61eed1a5bc144b8198fbb0c13abbf8e3192c145d0a5c21633b0ef86054f42809df823389ee40811a5910dcbd1018af31c3b43aa55201ed4edaac74fe",
1263 .derive_key = "7b2945cb4fef70885cc5d78a87bf6f6207dd901ff239201351ffac04e1088a23e2c11a1ebffcea4d80447867b61badb1383d842d4e79645d48dd82ccba290769caa7af8eaa1bd78a2a5e6e94fbdab78d9c7b74e894879f6a515257ccf6f95056f4e25390f24f6b35ffbb74b766202569b1d797f2d4bd9d17524c720107f985f4ddc583",
1264 },
1265 .{
1266 .input_len = 2049,
1267 .hash = "5f4d72f40d7a5f82b15ca2b2e44b1de3c2ef86c426c95c1af0b687952256303096de31d71d74103403822a2e0bc1eb193e7aecc9643a76b7bbc0c9f9c52e8783aae98764ca468962b5c2ec92f0c74eb5448d519713e09413719431c802f948dd5d90425a4ecdadece9eb178d80f26efccae630734dff63340285adec2aed3b51073ad3",
1268 .keyed_hash = "9f29700902f7c86e514ddc4df1e3049f258b2472b6dd5267f61bf13983b78dd5f9a88abfefdfa1e00b418971f2b39c64ca621e8eb37fceac57fd0c8fc8e117d43b81447be22d5d8186f8f5919ba6bcc6846bd7d50726c06d245672c2ad4f61702c646499ee1173daa061ffe15bf45a631e2946d616a4c345822f1151284712f76b2b0e",
1269 .derive_key = "2ea477c5515cc3dd606512ee72bb3e0e758cfae7232826f35fb98ca1bcbdf27316d8e9e79081a80b046b60f6a263616f33ca464bd78d79fa18200d06c7fc9bffd808cc4755277a7d5e09da0f29ed150f6537ea9bed946227ff184cc66a72a5f8c1e4bd8b04e81cf40fe6dc4427ad5678311a61f4ffc39d195589bdbc670f63ae70f4b6",
1270 },
1271 .{
1272 .input_len = 3072,
1273 .hash = "b98cb0ff3623be03326b373de6b9095218513e64f1ee2edd2525c7ad1e5cffd29a3f6b0b978d6608335c09dc94ccf682f9951cdfc501bfe47b9c9189a6fc7b404d120258506341a6d802857322fbd20d3e5dae05b95c88793fa83db1cb08e7d8008d1599b6209d78336e24839724c191b2a52a80448306e0daa84a3fdb566661a37e11",
1274 .keyed_hash = "044a0e7b172a312dc02a4c9a818c036ffa2776368d7f528268d2e6b5df19177022f302d0529e4174cc507c463671217975e81dab02b8fdeb0d7ccc7568dd22574c783a76be215441b32e91b9a904be8ea81f7a0afd14bad8ee7c8efc305ace5d3dd61b996febe8da4f56ca0919359a7533216e2999fc87ff7d8f176fbecb3d6f34278b",
1275 .derive_key = "050df97f8c2ead654d9bb3ab8c9178edcd902a32f8495949feadcc1e0480c46b3604131bbd6e3ba573b6dd682fa0a63e5b165d39fc43a625d00207607a2bfeb65ff1d29292152e26b298868e3b87be95d6458f6f2ce6118437b632415abe6ad522874bcd79e4030a5e7bad2efa90a7a7c67e93f0a18fb28369d0a9329ab5c24134ccb0",
1276 },
1277 .{
1278 .input_len = 3073,
1279 .hash = "7124b49501012f81cc7f11ca069ec9226cecb8a2c850cfe644e327d22d3e1cd39a27ae3b79d68d89da9bf25bc27139ae65a324918a5f9b7828181e52cf373c84f35b639b7fccbb985b6f2fa56aea0c18f531203497b8bbd3a07ceb5926f1cab74d14bd66486d9a91eba99059a98bd1cd25876b2af5a76c3e9eed554ed72ea952b603bf",
1280 .keyed_hash = "68dede9bef00ba89e43f31a6825f4cf433389fedae75c04ee9f0cf16a427c95a96d6da3fe985054d3478865be9a092250839a697bbda74e279e8a9e69f0025e4cfddd6cfb434b1cd9543aaf97c635d1b451a4386041e4bb100f5e45407cbbc24fa53ea2de3536ccb329e4eb9466ec37093a42cf62b82903c696a93a50b702c80f3c3c5",
1281 .derive_key = "72613c9ec9ff7e40f8f5c173784c532ad852e827dba2bf85b2ab4b76f7079081576288e552647a9d86481c2cae75c2dd4e7c5195fb9ada1ef50e9c5098c249d743929191441301c69e1f48505a4305ec1778450ee48b8e69dc23a25960fe33070ea549119599760a8a2d28aeca06b8c5e9ba58bc19e11fe57b6ee98aa44b2a8e6b14a5",
1282 },
1283 .{
1284 .input_len = 4096,
1285 .hash = "015094013f57a5277b59d8475c0501042c0b642e531b0a1c8f58d2163229e9690289e9409ddb1b99768eafe1623da896faf7e1114bebeadc1be30829b6f8af707d85c298f4f0ff4d9438aef948335612ae921e76d411c3a9111df62d27eaf871959ae0062b5492a0feb98ef3ed4af277f5395172dbe5c311918ea0074ce0036454f620",
1286 .keyed_hash = "befc660aea2f1718884cd8deb9902811d332f4fc4a38cf7c7300d597a081bfc0bbb64a36edb564e01e4b4aaf3b060092a6b838bea44afebd2deb8298fa562b7b597c757b9df4c911c3ca462e2ac89e9a787357aaf74c3b56d5c07bc93ce899568a3eb17d9250c20f6c5f6c1e792ec9a2dcb715398d5a6ec6d5c54f586a00403a1af1de",
1287 .derive_key = "1e0d7f3db8c414c97c6307cbda6cd27ac3b030949da8e23be1a1a924ad2f25b9d78038f7b198596c6cc4a9ccf93223c08722d684f240ff6569075ed81591fd93f9fff1110b3a75bc67e426012e5588959cc5a4c192173a03c00731cf84544f65a2fb9378989f72e9694a6a394a8a30997c2e67f95a504e631cd2c5f55246024761b245",
1288 },
1289 .{
1290 .input_len = 4097,
1291 .hash = "9b4052b38f1c5fc8b1f9ff7ac7b27cd242487b3d890d15c96a1c25b8aa0fb99505f91b0b5600a11251652eacfa9497b31cd3c409ce2e45cfe6c0a016967316c426bd26f619eab5d70af9a418b845c608840390f361630bd497b1ab44019316357c61dbe091ce72fc16dc340ac3d6e009e050b3adac4b5b2c92e722cffdc46501531956",
1292 .keyed_hash = "00df940cd36bb9fa7cbbc3556744e0dbc8191401afe70520ba292ee3ca80abbc606db4976cfdd266ae0abf667d9481831ff12e0caa268e7d3e57260c0824115a54ce595ccc897786d9dcbf495599cfd90157186a46ec800a6763f1c59e36197e9939e900809f7077c102f888caaf864b253bc41eea812656d46742e4ea42769f89b83f",
1293 .derive_key = "aca51029626b55fda7117b42a7c211f8c6e9ba4fe5b7a8ca922f34299500ead8a897f66a400fed9198fd61dd2d58d382458e64e100128075fc54b860934e8de2e84170734b06e1d212a117100820dbc48292d148afa50567b8b84b1ec336ae10d40c8c975a624996e12de31abbe135d9d159375739c333798a80c64ae895e51e22f3ad",
1294 },
1295 .{
1296 .input_len = 5120,
1297 .hash = "9cadc15fed8b5d854562b26a9536d9707cadeda9b143978f319ab34230535833acc61c8fdc114a2010ce8038c853e121e1544985133fccdd0a2d507e8e615e611e9a0ba4f47915f49e53d721816a9198e8b30f12d20ec3689989175f1bf7a300eee0d9321fad8da232ece6efb8e9fd81b42ad161f6b9550a069e66b11b40487a5f5059",
1298 .keyed_hash = "2c493e48e9b9bf31e0553a22b23503c0a3388f035cece68eb438d22fa1943e209b4dc9209cd80ce7c1f7c9a744658e7e288465717ae6e56d5463d4f80cdb2ef56495f6a4f5487f69749af0c34c2cdfa857f3056bf8d807336a14d7b89bf62bef2fb54f9af6a546f818dc1e98b9e07f8a5834da50fa28fb5874af91bf06020d1bf0120e",
1299 .derive_key = "7a7acac8a02adcf3038d74cdd1d34527de8a0fcc0ee3399d1262397ce5817f6055d0cefd84d9d57fe792d65a278fd20384ac6c30fdb340092f1a74a92ace99c482b28f0fc0ef3b923e56ade20c6dba47e49227166251337d80a037e987ad3a7f728b5ab6dfafd6e2ab1bd583a95d9c895ba9c2422c24ea0f62961f0dca45cad47bfa0d",
1300 },
1301 .{
1302 .input_len = 5121,
1303 .hash = "628bd2cb2004694adaab7bbd778a25df25c47b9d4155a55f8fbd79f2fe154cff96adaab0613a6146cdaabe498c3a94e529d3fc1da2bd08edf54ed64d40dcd6777647eac51d8277d70219a9694334a68bc8f0f23e20b0ff70ada6f844542dfa32cd4204ca1846ef76d811cdb296f65e260227f477aa7aa008bac878f72257484f2b6c95",
1304 .keyed_hash = "6ccf1c34753e7a044db80798ecd0782a8f76f33563accaddbfbb2e0ea4b2d0240d07e63f13667a8d1490e5e04f13eb617aea16a8c8a5aaed1ef6fbde1b0515e3c81050b361af6ead126032998290b563e3caddeaebfab592e155f2e161fb7cba939092133f23f9e65245e58ec23457b78a2e8a125588aad6e07d7f11a85b88d375b72d",
1305 .derive_key = "b07f01e518e702f7ccb44a267e9e112d403a7b3f4883a47ffbed4b48339b3c341a0add0ac032ab5aaea1e4e5b004707ec5681ae0fcbe3796974c0b1cf31a194740c14519273eedaabec832e8a784b6e7cfc2c5952677e6c3f2c3914454082d7eb1ce1766ac7d75a4d3001fc89544dd46b5147382240d689bbbaefc359fb6ae30263165",
1306 },
1307 .{
1308 .input_len = 6144,
1309 .hash = "3e2e5b74e048f3add6d21faab3f83aa44d3b2278afb83b80b3c35164ebeca2054d742022da6fdda444ebc384b04a54c3ac5839b49da7d39f6d8a9db03deab32aade156c1c0311e9b3435cde0ddba0dce7b26a376cad121294b689193508dd63151603c6ddb866ad16c2ee41585d1633a2cea093bea714f4c5d6b903522045b20395c83",
1310 .keyed_hash = "3d6b6d21281d0ade5b2b016ae4034c5dec10ca7e475f90f76eac7138e9bc8f1dc35754060091dc5caf3efabe0603c60f45e415bb3407db67e6beb3d11cf8e4f7907561f05dace0c15807f4b5f389c841eb114d81a82c02a00b57206b1d11fa6e803486b048a5ce87105a686dee041207e095323dfe172df73deb8c9532066d88f9da7e",
1311 .derive_key = "2a95beae63ddce523762355cf4b9c1d8f131465780a391286a5d01abb5683a1597099e3c6488aab6c48f3c15dbe1942d21dbcdc12115d19a8b8465fb54e9053323a9178e4275647f1a9927f6439e52b7031a0b465c861a3fc531527f7758b2b888cf2f20582e9e2c593709c0a44f9c6e0f8b963994882ea4168827823eef1f64169fef",
1312 },
1313 .{
1314 .input_len = 6145,
1315 .hash = "f1323a8631446cc50536a9f705ee5cb619424d46887f3c376c695b70e0f0507f18a2cfdd73c6e39dd75ce7c1c6e3ef238fd54465f053b25d21044ccb2093beb015015532b108313b5829c3621ce324b8e14229091b7c93f32db2e4e63126a377d2a63a3597997d4f1cba59309cb4af240ba70cebff9a23d5e3ff0cdae2cfd54e070022",
1316 .keyed_hash = "9ac301e9e39e45e3250a7e3b3df701aa0fb6889fbd80eeecf28dbc6300fbc539f3c184ca2f59780e27a576c1d1fb9772e99fd17881d02ac7dfd39675aca918453283ed8c3169085ef4a466b91c1649cc341dfdee60e32231fc34c9c4e0b9a2ba87ca8f372589c744c15fd6f985eec15e98136f25beeb4b13c4e43dc84abcc79cd4646c",
1317 .derive_key = "379bcc61d0051dd489f686c13de00d5b14c505245103dc040d9e4dd1facab8e5114493d029bdbd295aaa744a59e31f35c7f52dba9c3642f773dd0b4262a9980a2aef811697e1305d37ba9d8b6d850ef07fe41108993180cf779aeece363704c76483458603bbeeb693cffbbe5588d1f3535dcad888893e53d977424bb707201569a8d2",
1318 },
1319 .{
1320 .input_len = 7168,
1321 .hash = "61da957ec2499a95d6b8023e2b0e604ec7f6b50e80a9678b89d2628e99ada77a5707c321c83361793b9af62a40f43b523df1c8633cecb4cd14d00bdc79c78fca5165b863893f6d38b02ff7236c5a9a8ad2dba87d24c547cab046c29fc5bc1ed142e1de4763613bb162a5a538e6ef05ed05199d751f9eb58d332791b8d73fb74e4fce95",
1322 .keyed_hash = "b42835e40e9d4a7f42ad8cc04f85a963a76e18198377ed84adddeaecacc6f3fca2f01d5277d69bb681c70fa8d36094f73ec06e452c80d2ff2257ed82e7ba348400989a65ee8daa7094ae0933e3d2210ac6395c4af24f91c2b590ef87d7788d7066ea3eaebca4c08a4f14b9a27644f99084c3543711b64a070b94f2c9d1d8a90d035d52",
1323 .derive_key = "11c37a112765370c94a51415d0d651190c288566e295d505defdad895dae223730d5a5175a38841693020669c7638f40b9bc1f9f39cf98bda7a5b54ae24218a800a2116b34665aa95d846d97ea988bfcb53dd9c055d588fa21ba78996776ea6c40bc428b53c62b5f3ccf200f647a5aae8067f0ea1976391fcc72af1945100e2a6dcb88",
1324 },
1325 .{
1326 .input_len = 7169,
1327 .hash = "a003fc7a51754a9b3c7fae0367ab3d782dccf28855a03d435f8cfe74605e781798a8b20534be1ca9eb2ae2df3fae2ea60e48c6fb0b850b1385b5de0fe460dbe9d9f9b0d8db4435da75c601156df9d047f4ede008732eb17adc05d96180f8a73548522840779e6062d643b79478a6e8dbce68927f36ebf676ffa7d72d5f68f050b119c8",
1328 .keyed_hash = "ed9b1a922c046fdb3d423ae34e143b05ca1bf28b710432857bf738bcedbfa5113c9e28d72fcbfc020814ce3f5d4fc867f01c8f5b6caf305b3ea8a8ba2da3ab69fabcb438f19ff11f5378ad4484d75c478de425fb8e6ee809b54eec9bdb184315dc856617c09f5340451bf42fd3270a7b0b6566169f242e533777604c118a6358250f54",
1329 .derive_key = "554b0a5efea9ef183f2f9b931b7497995d9eb26f5c5c6dad2b97d62fc5ac31d99b20652c016d88ba2a611bbd761668d5eda3e568e940faae24b0d9991c3bd25a65f770b89fdcadabcb3d1a9c1cb63e69721cacf1ae69fefdcef1e3ef41bc5312ccc17222199e47a26552c6adc460cf47a72319cb5039369d0060eaea59d6c65130f1dd",
1330 },
1331 .{
1332 .input_len = 8192,
1333 .hash = "aae792484c8efe4f19e2ca7d371d8c467ffb10748d8a5a1ae579948f718a2a635fe51a27db045a567c1ad51be5aa34c01c6651c4d9b5b5ac5d0fd58cf18dd61a47778566b797a8c67df7b1d60b97b19288d2d877bb2df417ace009dcb0241ca1257d62712b6a4043b4ff33f690d849da91ea3bf711ed583cb7b7a7da2839ba71309bbf",
1334 .keyed_hash = "dc9637c8845a770b4cbf76b8daec0eebf7dc2eac11498517f08d44c8fc00d58a4834464159dcbc12a0ba0c6d6eb41bac0ed6585cabfe0aca36a375e6c5480c22afdc40785c170f5a6b8a1107dbee282318d00d915ac9ed1143ad40765ec120042ee121cd2baa36250c618adaf9e27260fda2f94dea8fb6f08c04f8f10c78292aa46102",
1335 .derive_key = "ad01d7ae4ad059b0d33baa3c01319dcf8088094d0359e5fd45d6aeaa8b2d0c3d4c9e58958553513b67f84f8eac653aeeb02ae1d5672dcecf91cd9985a0e67f4501910ecba25555395427ccc7241d70dc21c190e2aadee875e5aae6bf1912837e53411dabf7a56cbf8e4fb780432b0d7fe6cec45024a0788cf5874616407757e9e6bef7",
1336 },
1337 .{
1338 .input_len = 8193,
1339 .hash = "bab6c09cb8ce8cf459261398d2e7aef35700bf488116ceb94a36d0f5f1b7bc3bb2282aa69be089359ea1154b9a9286c4a56af4de975a9aa4a5c497654914d279bea60bb6d2cf7225a2fa0ff5ef56bbe4b149f3ed15860f78b4e2ad04e158e375c1e0c0b551cd7dfc82f1b155c11b6b3ed51ec9edb30d133653bb5709d1dbd55f4e1ff6",
1340 .keyed_hash = "954a2a75420c8d6547e3ba5b98d963e6fa6491addc8c023189cc519821b4a1f5f03228648fd983aef045c2fa8290934b0866b615f585149587dda2299039965328835a2b18f1d63b7e300fc76ff260b571839fe44876a4eae66cbac8c67694411ed7e09df51068a22c6e67d6d3dd2cca8ff12e3275384006c80f4db68023f24eebba57",
1341 .derive_key = "af1e0346e389b17c23200270a64aa4e1ead98c61695d917de7d5b00491c9b0f12f20a01d6d622edf3de026a4db4e4526225debb93c1237934d71c7340bb5916158cbdafe9ac3225476b6ab57a12357db3abbad7a26c6e66290e44034fb08a20a8d0ec264f309994d2810c49cfba6989d7abb095897459f5425adb48aba07c5fb3c83c0",
1342 },
1343 .{
1344 .input_len = 16384,
1345 .hash = "f875d6646de28985646f34ee13be9a576fd515f76b5b0a26bb324735041ddde49d764c270176e53e97bdffa58d549073f2c660be0e81293767ed4e4929f9ad34bbb39a529334c57c4a381ffd2a6d4bfdbf1482651b172aa883cc13408fa67758a3e47503f93f87720a3177325f7823251b85275f64636a8f1d599c2e49722f42e93893",
1346 .keyed_hash = "9e9fc4eb7cf081ea7c47d1807790ed211bfec56aa25bb7037784c13c4b707b0df9e601b101e4cf63a404dfe50f2e1865bb12edc8fca166579ce0c70dba5a5c0fc960ad6f3772183416a00bd29d4c6e651ea7620bb100c9449858bf14e1ddc9ecd35725581ca5b9160de04060045993d972571c3e8f71e9d0496bfa744656861b169d65",
1347 .derive_key = "160e18b5878cd0df1c3af85eb25a0db5344d43a6fbd7a8ef4ed98d0714c3f7e160dc0b1f09caa35f2f417b9ef309dfe5ebd67f4c9507995a531374d099cf8ae317542e885ec6f589378864d3ea98716b3bbb65ef4ab5e0ab5bb298a501f19a41ec19af84a5e6b428ecd813b1a47ed91c9657c3fba11c406bc316768b58f6802c9e9b57",
1348 },
1349 .{
1350 .input_len = 31744,
1351 .hash = "62b6960e1a44bcc1eb1a611a8d6235b6b4b78f32e7abc4fb4c6cdcce94895c47860cc51f2b0c28a7b77304bd55fe73af663c02d3f52ea053ba43431ca5bab7bfea2f5e9d7121770d88f70ae9649ea713087d1914f7f312147e247f87eb2d4ffef0ac978bf7b6579d57d533355aa20b8b77b13fd09748728a5cc327a8ec470f4013226f",
1352 .keyed_hash = "efa53b389ab67c593dba624d898d0f7353ab99e4ac9d42302ee64cbf9939a4193a7258db2d9cd32a7a3ecfce46144114b15c2fcb68a618a976bd74515d47be08b628be420b5e830fade7c080e351a076fbc38641ad80c736c8a18fe3c66ce12f95c61c2462a9770d60d0f77115bbcd3782b593016a4e728d4c06cee4505cb0c08a42ec",
1353 .derive_key = "39772aef80e0ebe60596361e45b061e8f417429d529171b6764468c22928e28e9759adeb797a3fbf771b1bcea30150a020e317982bf0d6e7d14dd9f064bc11025c25f31e81bd78a921db0174f03dd481d30e93fd8e90f8b2fee209f849f2d2a52f31719a490fb0ba7aea1e09814ee912eba111a9fde9d5c274185f7bae8ba85d300a2b",
1354 },
1355 .{
1356 .input_len = 102400,
1357 .hash = "bc3e3d41a1146b069abffad3c0d44860cf664390afce4d9661f7902e7943e085e01c59dab908c04c3342b816941a26d69c2605ebee5ec5291cc55e15b76146e6745f0601156c3596cb75065a9c57f35585a52e1ac70f69131c23d611ce11ee4ab1ec2c009012d236648e77be9295dd0426f29b764d65de58eb7d01dd42248204f45f8e",
1358 .keyed_hash = "1c35d1a5811083fd7119f5d5d1ba027b4d01c0c6c49fb6ff2cf75393ea5db4a7f9dbdd3e1d81dcbca3ba241bb18760f207710b751846faaeb9dff8262710999a59b2aa1aca298a032d94eacfadf1aa192418eb54808db23b56e34213266aa08499a16b354f018fc4967d05f8b9d2ad87a7278337be9693fc638a3bfdbe314574ee6fc4",
1359 .derive_key = "4652cff7a3f385a6103b5c260fc1593e13c778dbe608efb092fe7ee69df6e9c6d83a3e041bc3a48df2879f4a0a3ed40e7c961c73eff740f3117a0504c2dff4786d44fb17f1549eb0ba585e40ec29bf7732f0b7e286ff8acddc4cb1e23b87ff5d824a986458dcc6a04ac83969b80637562953df51ed1a7e90a7926924d2763778be8560",
1360 },
1361 },
1362};
1363
1364fn testBlake3(hasher: *Blake3, input_len: usize, expected_hex: [262]u8) !void {
1365 // Save initial state
1366 const initial_state = hasher.*;
1367
1368 // Setup input pattern
1369 var input_pattern: [251]u8 = undefined;
1370 for (&input_pattern, 0..) |*e, i| e.* = @as(u8, @truncate(i));
1371
1372 // Write repeating input pattern to hasher
1373 var input_counter = input_len;
1374 while (input_counter > 0) {
1375 const update_len = @min(input_counter, input_pattern.len);
1376 hasher.update(input_pattern[0..update_len]);
1377 input_counter -= update_len;
1378 }
1379
1380 // Read final hash value
1381 var actual_bytes: [expected_hex.len / 2]u8 = undefined;
1382 hasher.final(actual_bytes[0..]);
1383
1384 // Compare to expected value
1385 var expected_bytes: [expected_hex.len / 2]u8 = undefined;
1386 _ = fmt.hexToBytes(expected_bytes[0..], expected_hex[0..]) catch unreachable;
1387 try std.testing.expectEqual(actual_bytes, expected_bytes);
1388
1389 // Restore initial state
1390 hasher.* = initial_state;
1391}
1392
1393test "BLAKE3 reference test cases" {
1394 var hash_state = Blake3.init(.{});
1395 const hash = &hash_state;
1396 var keyed_hash_state = Blake3.init(.{ .key = reference_test.key.* });
1397 const keyed_hash = &keyed_hash_state;
1398 var derive_key_state = Blake3.initKdf(reference_test.context_string, .{});
1399 const derive_key = &derive_key_state;
1400
1401 for (reference_test.cases) |t| {
1402 try testBlake3(hash, t.input_len, t.hash.*);
1403 try testBlake3(keyed_hash, t.input_len, t.keyed_hash.*);
1404 try testBlake3(derive_key, t.input_len, t.derive_key.*);
1405 }
1406}
1407
1408test "BLAKE3 parallel vs sequential" {
1409 const allocator = std.testing.allocator;
1410 const io = std.testing.io;
1411
1412 // Test various sizes including those above the parallelization threshold
1413 const test_sizes = [_]usize{
1414 0, // Empty
1415 64, // One block
1416 1024, // One chunk
1417 1024 * 10, // Multiple chunks
1418 1024 * 100, // 100KB
1419 1024 * 1000, // 1MB
1420 1024 * 5000, // 5MB (above threshold)
1421 1024 * 10000, // 10MB (above threshold)
1422 };
1423
1424 for (test_sizes) |size| {
1425 // Allocate and fill test data with a pattern
1426 const input = try allocator.alloc(u8, size);
1427 defer allocator.free(input);
1428 for (input, 0..) |*byte, i| {
1429 byte.* = @truncate(i);
1430 }
1431
1432 // Test regular hash
1433 var expected: [32]u8 = undefined;
1434 Blake3.hash(input, &expected, .{});
1435
1436 var actual: [32]u8 = undefined;
1437 try Blake3.hashParallel(input, &actual, .{}, allocator, io);
1438
1439 try std.testing.expectEqualSlices(u8, &expected, &actual);
1440
1441 // Test keyed hash
1442 const key: [32]u8 = @splat(0x42);
1443 var expected_keyed: [32]u8 = undefined;
1444 Blake3.hash(input, &expected_keyed, .{ .key = key });
1445
1446 var actual_keyed: [32]u8 = undefined;
1447 try Blake3.hashParallel(input, &actual_keyed, .{ .key = key }, allocator, io);
1448
1449 try std.testing.expectEqualSlices(u8, &expected_keyed, &actual_keyed);
1450 }
1451}