master
1const std = @import("std");
2
3pub const Wyhash = struct {
4 const secret = [_]u64{
5 0xa0761d6478bd642f,
6 0xe7037ed1a0b428db,
7 0x8ebc6af09c88c6e3,
8 0x589965cc75374cc3,
9 };
10
11 a: u64,
12 b: u64,
13 state: [3]u64,
14 total_len: usize,
15
16 buf: [48]u8,
17 buf_len: usize,
18
19 pub fn init(seed: u64) Wyhash {
20 var self = Wyhash{
21 .a = undefined,
22 .b = undefined,
23 .state = undefined,
24 .total_len = 0,
25 .buf = undefined,
26 .buf_len = 0,
27 };
28
29 self.state[0] = seed ^ mix(seed ^ secret[0], secret[1]);
30 self.state[1] = self.state[0];
31 self.state[2] = self.state[0];
32 return self;
33 }
34
35 // This is subtly different from other hash function update calls. Wyhash requires the last
36 // full 48-byte block to be run through final1 if is exactly aligned to 48-bytes.
37 pub fn update(self: *Wyhash, input: []const u8) void {
38 self.total_len += input.len;
39
40 if (input.len <= 48 - self.buf_len) {
41 @memcpy(self.buf[self.buf_len..][0..input.len], input);
42 self.buf_len += input.len;
43 return;
44 }
45
46 var i: usize = 0;
47
48 if (self.buf_len > 0) {
49 i = 48 - self.buf_len;
50 @memcpy(self.buf[self.buf_len..][0..i], input[0..i]);
51 self.round(&self.buf);
52 self.buf_len = 0;
53 }
54
55 while (i + 48 < input.len) : (i += 48) {
56 self.round(input[i..][0..48]);
57 }
58
59 const remaining_bytes = input[i..];
60 if (remaining_bytes.len < 16 and i >= 48) {
61 const rem = 16 - remaining_bytes.len;
62 @memcpy(self.buf[self.buf.len - rem ..], input[i - rem .. i]);
63 }
64 @memcpy(self.buf[0..remaining_bytes.len], remaining_bytes);
65 self.buf_len = remaining_bytes.len;
66 }
67
68 pub fn final(self: *Wyhash) u64 {
69 var input: []const u8 = self.buf[0..self.buf_len];
70 var newSelf = self.shallowCopy(); // ensure idempotency
71
72 if (self.total_len <= 16) {
73 newSelf.smallKey(input);
74 } else {
75 var offset: usize = 0;
76 var scratch: [16]u8 = undefined;
77 if (self.buf_len < 16) {
78 const rem = 16 - self.buf_len;
79 @memcpy(scratch[0..rem], self.buf[self.buf.len - rem ..][0..rem]);
80 @memcpy(scratch[rem..][0..self.buf_len], self.buf[0..self.buf_len]);
81
82 // Same as input but with additional bytes preceding start in case of a short buffer
83 input = &scratch;
84 offset = rem;
85 }
86
87 newSelf.final0();
88 newSelf.final1(input, offset);
89 }
90
91 return newSelf.final2();
92 }
93
94 // Copies the core wyhash state but not any internal buffers.
95 inline fn shallowCopy(self: *Wyhash) Wyhash {
96 return .{
97 .a = self.a,
98 .b = self.b,
99 .state = self.state,
100 .total_len = self.total_len,
101 .buf = undefined,
102 .buf_len = undefined,
103 };
104 }
105
106 inline fn smallKey(self: *Wyhash, input: []const u8) void {
107 std.debug.assert(input.len <= 16);
108
109 if (input.len >= 4) {
110 const end = input.len - 4;
111 const quarter = (input.len >> 3) << 2;
112 self.a = (read(4, input[0..]) << 32) | read(4, input[quarter..]);
113 self.b = (read(4, input[end..]) << 32) | read(4, input[end - quarter ..]);
114 } else if (input.len > 0) {
115 self.a = (@as(u64, input[0]) << 16) | (@as(u64, input[input.len >> 1]) << 8) | input[input.len - 1];
116 self.b = 0;
117 } else {
118 self.a = 0;
119 self.b = 0;
120 }
121 }
122
123 inline fn round(self: *Wyhash, input: *const [48]u8) void {
124 inline for (0..3) |i| {
125 const a = read(8, input[8 * (2 * i) ..]);
126 const b = read(8, input[8 * (2 * i + 1) ..]);
127 self.state[i] = mix(a ^ secret[i + 1], b ^ self.state[i]);
128 }
129 }
130
131 inline fn read(comptime bytes: usize, data: []const u8) u64 {
132 std.debug.assert(bytes <= 8);
133 const T = std.meta.Int(.unsigned, 8 * bytes);
134 return @as(u64, std.mem.readInt(T, data[0..bytes], .little));
135 }
136
137 inline fn mum(a: *u64, b: *u64) void {
138 const x = @as(u128, a.*) *% b.*;
139 a.* = @as(u64, @truncate(x));
140 b.* = @as(u64, @truncate(x >> 64));
141 }
142
143 inline fn mix(a_: u64, b_: u64) u64 {
144 var a = a_;
145 var b = b_;
146 mum(&a, &b);
147 return a ^ b;
148 }
149
150 inline fn final0(self: *Wyhash) void {
151 self.state[0] ^= self.state[1] ^ self.state[2];
152 }
153
154 // input_lb must be at least 16-bytes long (in shorter key cases the smallKey function will be
155 // used instead). We use an index into a slice to for comptime processing as opposed to if we
156 // used pointers.
157 inline fn final1(self: *Wyhash, input_lb: []const u8, start_pos: usize) void {
158 std.debug.assert(input_lb.len >= 16);
159 std.debug.assert(input_lb.len - start_pos <= 48);
160 const input = input_lb[start_pos..];
161
162 var i: usize = 0;
163 while (i + 16 < input.len) : (i += 16) {
164 self.state[0] = mix(read(8, input[i..]) ^ secret[1], read(8, input[i + 8 ..]) ^ self.state[0]);
165 }
166
167 self.a = read(8, input_lb[input_lb.len - 16 ..][0..8]);
168 self.b = read(8, input_lb[input_lb.len - 8 ..][0..8]);
169 }
170
171 inline fn final2(self: *Wyhash) u64 {
172 self.a ^= secret[1];
173 self.b ^= self.state[0];
174 mum(&self.a, &self.b);
175 return mix(self.a ^ secret[0] ^ self.total_len, self.b ^ secret[1]);
176 }
177
178 pub fn hash(seed: u64, input: []const u8) u64 {
179 var self = Wyhash.init(seed);
180
181 if (input.len <= 16) {
182 self.smallKey(input);
183 } else {
184 var i: usize = 0;
185 if (input.len >= 48) {
186 while (i + 48 < input.len) : (i += 48) {
187 self.round(input[i..][0..48]);
188 }
189 self.final0();
190 }
191 self.final1(input, i);
192 }
193
194 self.total_len = input.len;
195 return self.final2();
196 }
197};
198
199const verify = @import("verify.zig");
200const expectEqual = std.testing.expectEqual;
201
202const TestVector = struct {
203 expected: u64,
204 seed: u64,
205 input: []const u8,
206};
207
208// Run https://github.com/wangyi-fudan/wyhash/blob/77e50f267fbc7b8e2d09f2d455219adb70ad4749/test_vector.cpp directly.
209const vectors = [_]TestVector{
210 .{ .seed = 0, .expected = 0x409638ee2bde459, .input = "" },
211 .{ .seed = 1, .expected = 0xa8412d091b5fe0a9, .input = "a" },
212 .{ .seed = 2, .expected = 0x32dd92e4b2915153, .input = "abc" },
213 .{ .seed = 3, .expected = 0x8619124089a3a16b, .input = "message digest" },
214 .{ .seed = 4, .expected = 0x7a43afb61d7f5f40, .input = "abcdefghijklmnopqrstuvwxyz" },
215 .{ .seed = 5, .expected = 0xff42329b90e50d58, .input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" },
216 .{ .seed = 6, .expected = 0xc39cab13b115aad3, .input = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
217};
218
219test "test vectors" {
220 for (vectors) |e| {
221 try expectEqual(e.expected, Wyhash.hash(e.seed, e.input));
222 }
223}
224
225test "test vectors at comptime" {
226 comptime {
227 for (vectors) |e| {
228 try expectEqual(e.expected, Wyhash.hash(e.seed, e.input));
229 }
230 }
231}
232
233test "smhasher" {
234 const Test = struct {
235 fn do() !void {
236 try expectEqual(verify.smhasher(Wyhash.hash), 0xBD5E840C);
237 }
238 };
239 try Test.do();
240 @setEvalBranchQuota(50000);
241 try comptime Test.do();
242}
243
244test "iterative api" {
245 const Test = struct {
246 fn do() !void {
247 try verify.iterativeApi(Wyhash);
248 }
249 };
250 try Test.do();
251 @setEvalBranchQuota(50000);
252 try comptime Test.do();
253}
254
255test "iterative maintains last sixteen" {
256 const input = "Z" ** 48 ++ "01234567890abcdefg";
257 const seed = 0;
258
259 for (0..17) |i| {
260 const payload = input[0 .. input.len - i];
261 const non_iterative_hash = Wyhash.hash(seed, payload);
262
263 var wh = Wyhash.init(seed);
264 wh.update(payload);
265 const iterative_hash = wh.final();
266
267 try expectEqual(non_iterative_hash, iterative_hash);
268 }
269}