Commit 83dffc70af

Marc Tiehuis <marc@tiehu.is>
2019-07-01 13:23:26
Add iterative wyhash api
1 parent c9ce43f
Changed files (3)
std/hash/wyhash.zig
@@ -10,7 +10,7 @@ const primes = [_]u64{
 };
 
 fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
-    return mem.readVarInt(u64, data[0..bytes], @import("builtin").endian);
+    return mem.readVarInt(u64, data[0..bytes], .Little);
 }
 
 fn read_8bytes_swapped(data: []const u8) u64 {
@@ -18,7 +18,7 @@ fn read_8bytes_swapped(data: []const u8) u64 {
 }
 
 fn mum(a: u64, b: u64) u64 {
-    var r: u128 = @intCast(u128, a) * @intCast(u128, b);
+    var r = std.math.mulWide(u64, a, b);
     r = (r >> 64) ^ r;
     return @truncate(u64, r);
 }
@@ -31,69 +31,117 @@ fn mix1(a: u64, b: u64, seed: u64) u64 {
     return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
 }
 
-pub fn hash(key: []const u8, initial_seed: u64) u64 {
-    var seed = initial_seed;
+pub const Wyhash = struct {
+    seed: u64,
 
-    var i: usize = 0;
-    while (i + 32 <= key.len) : (i += 32) {
-        seed = mix0(
-            read_bytes(8, key[i..]),
-            read_bytes(8, key[i + 8 ..]),
-            seed,
+    buf: [32]u8,
+    buf_len: usize,
+    msg_len: usize,
+
+    pub fn init(seed: u64) Wyhash {
+        return Wyhash{
+            .seed = seed,
+            .buf = undefined,
+            .buf_len = 0,
+            .msg_len = 0,
+        };
+    }
+
+    fn round(self: *Wyhash, b: []const u8) void {
+        std.debug.assert(b.len == 32);
+
+        self.seed = mix0(
+            read_bytes(8, b[0..]),
+            read_bytes(8, b[8..]),
+            self.seed,
         ) ^ mix1(
-            read_bytes(8, key[i + 16 ..]),
-            read_bytes(8, key[i + 24 ..]),
-            seed,
+            read_bytes(8, b[16..]),
+            read_bytes(8, b[24..]),
+            self.seed,
         );
     }
 
-    const rem_len = @truncate(u5, key.len);
-    const rem_key = key[i..];
-    seed = switch (rem_len) {
-        0 => seed,
-        1 => mix0(read_bytes(1, rem_key), primes[4], seed),
-        2 => mix0(read_bytes(2, rem_key), primes[4], seed),
-        3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
-        4 => mix0(read_bytes(4, rem_key), primes[4], seed),
-        5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
-        6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
-        7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
-        8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
-        9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
-        10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
-        11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
-        12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
-        13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
-        14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
-        15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
-        16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
-        17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
-        18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
-        19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
-        20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
-        21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
-        22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
-        23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
-        24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
-        25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
-        26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
-        27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
-        28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
-        29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
-        30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
-        31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
-    };
-
-    return mum(seed ^ key.len, primes[4]);
-}
+    pub fn update(self: *Wyhash, b: []const u8) void {
+        var off: usize = 0;
+
+        // Partial from previous.
+        if (self.buf_len != 0 and self.buf_len + b.len > 32) {
+            off += 32 - self.buf_len;
+            mem.copy(u8, self.buf[self.buf_len..], b[0..off]);
+            self.round(self.buf[0..]);
+            self.buf_len = 0;
+        }
+
+        // Full middle blocks.
+        while (off + 32 <= b.len) : (off += 32) {
+            @inlineCall(self.round, b[off .. off + 32]);
+        }
+
+        // Remainder for next pass.
+        mem.copy(u8, self.buf[self.buf_len..], b[off..]);
+        self.buf_len += @intCast(u8, b[off..].len);
+        self.msg_len += b.len;
+    }
+
+    pub fn final(self: *Wyhash) u64 {
+        const seed = self.seed;
+        const rem_len = @intCast(u5, self.buf_len);
+        const rem_key = self.buf[0..self.buf_len];
+
+        self.seed = switch (rem_len) {
+            0 => seed,
+            1 => mix0(read_bytes(1, rem_key), primes[4], seed),
+            2 => mix0(read_bytes(2, rem_key), primes[4], seed),
+            3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
+            4 => mix0(read_bytes(4, rem_key), primes[4], seed),
+            5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
+            6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
+            7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
+            8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
+            9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
+            10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
+            11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
+            12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
+            13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
+            14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
+            15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
+            16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
+            17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
+            18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
+            19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
+            20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
+            21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
+            22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
+            23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
+            24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
+            25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
+            26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
+            27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
+            28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
+            29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
+            30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
+            31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
+        };
+
+        return mum(self.seed ^ self.msg_len, primes[4]);
+    }
+
+    pub fn hash(seed: u64, input: []const u8) u64 {
+        var c = Wyhash.init(seed);
+        c.update(input);
+        return c.final();
+    }
+};
 
 test "test vectors" {
     const expectEqual = std.testing.expectEqual;
-    expectEqual(hash("", 0), 0x0);
-    expectEqual(hash("a", 1), 0xbed235177f41d328);
-    expectEqual(hash("abc", 2), 0xbe348debe59b27c3);
-    expectEqual(hash("message digest", 3), 0x37320f657213a290);
-    expectEqual(hash("abcdefghijklmnopqrstuvwxyz", 4), 0xd0b270e1d8a7019c);
-    expectEqual(hash("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 5), 0x602a1894d3bbfe7f);
-    expectEqual(hash("12345678901234567890123456789012345678901234567890123456789012345678901234567890", 6), 0x829e9c148b75970e);
+    const hash = Wyhash.hash;
+
+    expectEqual(hash(0, ""), 0x0);
+    expectEqual(hash(1, "a"), 0xbed235177f41d328);
+    expectEqual(hash(2, "abc"), 0xbe348debe59b27c3);
+    expectEqual(hash(3, "message digest"), 0x37320f657213a290);
+    expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c);
+    expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
+    expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
 }
std/hash.zig
@@ -17,6 +17,7 @@ pub const SipHash128 = siphash.SipHash128;
 pub const murmur = @import("hash/murmur.zig");
 pub const Murmur2_32 = murmur.Murmur2_32;
 
+
 pub const Murmur2_64 = murmur.Murmur2_64;
 pub const Murmur3_32 = murmur.Murmur3_32;
 
@@ -24,7 +25,8 @@ pub const cityhash = @import("hash/cityhash.zig");
 pub const CityHash32 = cityhash.CityHash32;
 pub const CityHash64 = cityhash.CityHash64;
 
-pub const wyhash = @import("hash/wyhash.zig").hash;
+const wyhash = @import("hash/wyhash.zig");
+pub const Wyhash = wyhash.Wyhash;
 
 test "hash" {
     _ = @import("hash/adler.zig");
std/hash_map.zig
@@ -5,7 +5,7 @@ const testing = std.testing;
 const math = std.math;
 const mem = std.mem;
 const meta = std.meta;
-const wyhash = std.hash.wyhash;
+const Wyhash = std.hash.Wyhash;
 const Allocator = mem.Allocator;
 const builtin = @import("builtin");
 
@@ -557,7 +557,7 @@ pub fn autoHash(key: var, seed: u64) u64 {
         builtin.TypeId.EnumLiteral,
         => @compileError("cannot hash this type"),
 
-        builtin.TypeId.Int => return wyhash(std.mem.asBytes(&key), seed),
+        builtin.TypeId.Int => return Wyhash.hash(seed, std.mem.asBytes(&key)),
 
         builtin.TypeId.Float => |info| return autoHash(@bitCast(@IntType(false, info.bits), key), seed),
 
@@ -594,7 +594,7 @@ pub fn autoHash(key: var, seed: u64) u64 {
             // If there's no unused bits in the child type, we can just hash
             // this as an array of bytes.
             if (info.child.bit_count % 8 == 0) {
-                return wyhash(mem.asBytes(&key), seed);
+                return Wyhash.hash(seed, mem.asBytes(&key));
             }
 
             // Otherwise, hash every element.