master
  1const std = @import("std");
  2
  3pub const Wyhash = struct {
  4    const secret = [_]u64{
  5        0xa0761d6478bd642f,
  6        0xe7037ed1a0b428db,
  7        0x8ebc6af09c88c6e3,
  8        0x589965cc75374cc3,
  9    };
 10
 11    a: u64,
 12    b: u64,
 13    state: [3]u64,
 14    total_len: usize,
 15
 16    buf: [48]u8,
 17    buf_len: usize,
 18
 19    pub fn init(seed: u64) Wyhash {
 20        var self = Wyhash{
 21            .a = undefined,
 22            .b = undefined,
 23            .state = undefined,
 24            .total_len = 0,
 25            .buf = undefined,
 26            .buf_len = 0,
 27        };
 28
 29        self.state[0] = seed ^ mix(seed ^ secret[0], secret[1]);
 30        self.state[1] = self.state[0];
 31        self.state[2] = self.state[0];
 32        return self;
 33    }
 34
 35    // This is subtly different from other hash function update calls. Wyhash requires the last
 36    // full 48-byte block to be run through final1 if is exactly aligned to 48-bytes.
 37    pub fn update(self: *Wyhash, input: []const u8) void {
 38        self.total_len += input.len;
 39
 40        if (input.len <= 48 - self.buf_len) {
 41            @memcpy(self.buf[self.buf_len..][0..input.len], input);
 42            self.buf_len += input.len;
 43            return;
 44        }
 45
 46        var i: usize = 0;
 47
 48        if (self.buf_len > 0) {
 49            i = 48 - self.buf_len;
 50            @memcpy(self.buf[self.buf_len..][0..i], input[0..i]);
 51            self.round(&self.buf);
 52            self.buf_len = 0;
 53        }
 54
 55        while (i + 48 < input.len) : (i += 48) {
 56            self.round(input[i..][0..48]);
 57        }
 58
 59        const remaining_bytes = input[i..];
 60        if (remaining_bytes.len < 16 and i >= 48) {
 61            const rem = 16 - remaining_bytes.len;
 62            @memcpy(self.buf[self.buf.len - rem ..], input[i - rem .. i]);
 63        }
 64        @memcpy(self.buf[0..remaining_bytes.len], remaining_bytes);
 65        self.buf_len = remaining_bytes.len;
 66    }
 67
 68    pub fn final(self: *Wyhash) u64 {
 69        var input: []const u8 = self.buf[0..self.buf_len];
 70        var newSelf = self.shallowCopy(); // ensure idempotency
 71
 72        if (self.total_len <= 16) {
 73            newSelf.smallKey(input);
 74        } else {
 75            var offset: usize = 0;
 76            var scratch: [16]u8 = undefined;
 77            if (self.buf_len < 16) {
 78                const rem = 16 - self.buf_len;
 79                @memcpy(scratch[0..rem], self.buf[self.buf.len - rem ..][0..rem]);
 80                @memcpy(scratch[rem..][0..self.buf_len], self.buf[0..self.buf_len]);
 81
 82                // Same as input but with additional bytes preceding start in case of a short buffer
 83                input = &scratch;
 84                offset = rem;
 85            }
 86
 87            newSelf.final0();
 88            newSelf.final1(input, offset);
 89        }
 90
 91        return newSelf.final2();
 92    }
 93
 94    // Copies the core wyhash state but not any internal buffers.
 95    inline fn shallowCopy(self: *Wyhash) Wyhash {
 96        return .{
 97            .a = self.a,
 98            .b = self.b,
 99            .state = self.state,
100            .total_len = self.total_len,
101            .buf = undefined,
102            .buf_len = undefined,
103        };
104    }
105
106    inline fn smallKey(self: *Wyhash, input: []const u8) void {
107        std.debug.assert(input.len <= 16);
108
109        if (input.len >= 4) {
110            const end = input.len - 4;
111            const quarter = (input.len >> 3) << 2;
112            self.a = (read(4, input[0..]) << 32) | read(4, input[quarter..]);
113            self.b = (read(4, input[end..]) << 32) | read(4, input[end - quarter ..]);
114        } else if (input.len > 0) {
115            self.a = (@as(u64, input[0]) << 16) | (@as(u64, input[input.len >> 1]) << 8) | input[input.len - 1];
116            self.b = 0;
117        } else {
118            self.a = 0;
119            self.b = 0;
120        }
121    }
122
123    inline fn round(self: *Wyhash, input: *const [48]u8) void {
124        inline for (0..3) |i| {
125            const a = read(8, input[8 * (2 * i) ..]);
126            const b = read(8, input[8 * (2 * i + 1) ..]);
127            self.state[i] = mix(a ^ secret[i + 1], b ^ self.state[i]);
128        }
129    }
130
131    inline fn read(comptime bytes: usize, data: []const u8) u64 {
132        std.debug.assert(bytes <= 8);
133        const T = std.meta.Int(.unsigned, 8 * bytes);
134        return @as(u64, std.mem.readInt(T, data[0..bytes], .little));
135    }
136
137    inline fn mum(a: *u64, b: *u64) void {
138        const x = @as(u128, a.*) *% b.*;
139        a.* = @as(u64, @truncate(x));
140        b.* = @as(u64, @truncate(x >> 64));
141    }
142
143    inline fn mix(a_: u64, b_: u64) u64 {
144        var a = a_;
145        var b = b_;
146        mum(&a, &b);
147        return a ^ b;
148    }
149
150    inline fn final0(self: *Wyhash) void {
151        self.state[0] ^= self.state[1] ^ self.state[2];
152    }
153
154    // input_lb must be at least 16-bytes long (in shorter key cases the smallKey function will be
155    // used instead). We use an index into a slice to for comptime processing as opposed to if we
156    // used pointers.
157    inline fn final1(self: *Wyhash, input_lb: []const u8, start_pos: usize) void {
158        std.debug.assert(input_lb.len >= 16);
159        std.debug.assert(input_lb.len - start_pos <= 48);
160        const input = input_lb[start_pos..];
161
162        var i: usize = 0;
163        while (i + 16 < input.len) : (i += 16) {
164            self.state[0] = mix(read(8, input[i..]) ^ secret[1], read(8, input[i + 8 ..]) ^ self.state[0]);
165        }
166
167        self.a = read(8, input_lb[input_lb.len - 16 ..][0..8]);
168        self.b = read(8, input_lb[input_lb.len - 8 ..][0..8]);
169    }
170
171    inline fn final2(self: *Wyhash) u64 {
172        self.a ^= secret[1];
173        self.b ^= self.state[0];
174        mum(&self.a, &self.b);
175        return mix(self.a ^ secret[0] ^ self.total_len, self.b ^ secret[1]);
176    }
177
178    pub fn hash(seed: u64, input: []const u8) u64 {
179        var self = Wyhash.init(seed);
180
181        if (input.len <= 16) {
182            self.smallKey(input);
183        } else {
184            var i: usize = 0;
185            if (input.len >= 48) {
186                while (i + 48 < input.len) : (i += 48) {
187                    self.round(input[i..][0..48]);
188                }
189                self.final0();
190            }
191            self.final1(input, i);
192        }
193
194        self.total_len = input.len;
195        return self.final2();
196    }
197};
198
199const verify = @import("verify.zig");
200const expectEqual = std.testing.expectEqual;
201
202const TestVector = struct {
203    expected: u64,
204    seed: u64,
205    input: []const u8,
206};
207
208// Run https://github.com/wangyi-fudan/wyhash/blob/77e50f267fbc7b8e2d09f2d455219adb70ad4749/test_vector.cpp directly.
209const vectors = [_]TestVector{
210    .{ .seed = 0, .expected = 0x409638ee2bde459, .input = "" },
211    .{ .seed = 1, .expected = 0xa8412d091b5fe0a9, .input = "a" },
212    .{ .seed = 2, .expected = 0x32dd92e4b2915153, .input = "abc" },
213    .{ .seed = 3, .expected = 0x8619124089a3a16b, .input = "message digest" },
214    .{ .seed = 4, .expected = 0x7a43afb61d7f5f40, .input = "abcdefghijklmnopqrstuvwxyz" },
215    .{ .seed = 5, .expected = 0xff42329b90e50d58, .input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" },
216    .{ .seed = 6, .expected = 0xc39cab13b115aad3, .input = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
217};
218
219test "test vectors" {
220    for (vectors) |e| {
221        try expectEqual(e.expected, Wyhash.hash(e.seed, e.input));
222    }
223}
224
225test "test vectors at comptime" {
226    comptime {
227        for (vectors) |e| {
228            try expectEqual(e.expected, Wyhash.hash(e.seed, e.input));
229        }
230    }
231}
232
233test "smhasher" {
234    const Test = struct {
235        fn do() !void {
236            try expectEqual(verify.smhasher(Wyhash.hash), 0xBD5E840C);
237        }
238    };
239    try Test.do();
240    @setEvalBranchQuota(50000);
241    try comptime Test.do();
242}
243
244test "iterative api" {
245    const Test = struct {
246        fn do() !void {
247            try verify.iterativeApi(Wyhash);
248        }
249    };
250    try Test.do();
251    @setEvalBranchQuota(50000);
252    try comptime Test.do();
253}
254
255test "iterative maintains last sixteen" {
256    const input = "Z" ** 48 ++ "01234567890abcdefg";
257    const seed = 0;
258
259    for (0..17) |i| {
260        const payload = input[0 .. input.len - i];
261        const non_iterative_hash = Wyhash.hash(seed, payload);
262
263        var wh = Wyhash.init(seed);
264        wh.update(payload);
265        const iterative_hash = wh.final();
266
267        try expectEqual(non_iterative_hash, iterative_hash);
268    }
269}