Commit 6150da3df9

Sahnvour <sahnvour@pm.me>
2019-06-27 23:21:35
direct port of wyhash v2 also inspired by https://github.com/ManDeJan/zig-wyhash
1 parent 5687323
Changed files (2)
std/hash/wyhash.zig
@@ -0,0 +1,99 @@
+const std = @import("std");
+const mem = std.mem;
+
+const primes = [_]u64{
+    0xa0761d6478bd642f,
+    0xe7037ed1a0b428db,
+    0x8ebc6af09c88c6e3,
+    0x589965cc75374cc3,
+    0x1d8e4e27c47d124f,
+};
+
+fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
+    return mem.readVarInt(u64, data[0..bytes], @import("builtin").endian);
+}
+
+fn read_8bytes_swapped(data: []const u8) u64 {
+    return (read_bytes(4, data) << 32 | read_bytes(4, data[4..]));
+}
+
+fn mum(a: u64, b: u64) u64 {
+    var r: u128 = @intCast(u128, a) * @intCast(u128, b);
+    r = (r >> 64) ^ r;
+    return @truncate(u64, r);
+}
+
+fn mix0(a: u64, b: u64, seed: u64) u64 {
+    return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]);
+}
+
+fn mix1(a: u64, b: u64, seed: u64) u64 {
+    return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
+}
+
+pub fn hash(key: []const u8, initial_seed: u64) u64 {
+    var seed = initial_seed;
+
+    var i: usize = 0;
+    while (i + 32 <= key.len) : (i += 32) {
+        seed = mix0(
+            read_bytes(8, key[i..]),
+            read_bytes(8, key[i + 8 ..]),
+            seed,
+        ) ^ mix1(
+            read_bytes(8, key[i + 16 ..]),
+            read_bytes(8, key[i + 24 ..]),
+            seed,
+        );
+    }
+
+    const rem_len = @truncate(u5, key.len);
+    const rem_key = key[i..];
+    seed = switch (rem_len) {
+        0 => seed,
+        1 => mix0(read_bytes(1, rem_key), primes[4], seed),
+        2 => mix0(read_bytes(2, rem_key), primes[4], seed),
+        3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
+        4 => mix0(read_bytes(4, rem_key), primes[4], seed),
+        5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
+        6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
+        7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
+        8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
+        9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
+        10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
+        11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
+        12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
+        13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
+        14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
+        15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
+        16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
+        17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
+        18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
+        19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
+        20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
+        21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
+        22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
+        23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
+        24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
+        25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
+        26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
+        27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
+        28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
+        29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
+        30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
+        31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
+    };
+
+    return mum(seed ^ key.len, primes[4]);
+}
+
+test "test vectors" {
+    const expectEqual = std.testing.expectEqual;
+    expectEqual(hash("", 0), 0x0);
+    expectEqual(hash("a", 1), 0xbed235177f41d328);
+    expectEqual(hash("abc", 2), 0xbe348debe59b27c3);
+    expectEqual(hash("message digest", 3), 0x37320f657213a290);
+    expectEqual(hash("abcdefghijklmnopqrstuvwxyz", 4), 0xd0b270e1d8a7019c);
+    expectEqual(hash("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 5), 0x602a1894d3bbfe7f);
+    expectEqual(hash("12345678901234567890123456789012345678901234567890123456789012345678901234567890", 6), 0x829e9c148b75970e);
+}
std/hash.zig
@@ -16,6 +16,7 @@ pub const SipHash128 = siphash.SipHash128;
 
 pub const murmur = @import("hash/murmur.zig");
 pub const Murmur2_32 = murmur.Murmur2_32;
+
 pub const Murmur2_64 = murmur.Murmur2_64;
 pub const Murmur3_32 = murmur.Murmur3_32;
 
@@ -23,6 +24,8 @@ pub const cityhash = @import("hash/cityhash.zig");
 pub const CityHash32 = cityhash.CityHash32;
 pub const CityHash64 = cityhash.CityHash64;
 
+pub const wyhash = @import("hash/wyhash.zig").hash;
+
 test "hash" {
     _ = @import("hash/adler.zig");
     _ = @import("hash/crc.zig");
@@ -30,4 +33,5 @@ test "hash" {
     _ = @import("hash/siphash.zig");
     _ = @import("hash/murmur.zig");
     _ = @import("hash/cityhash.zig");
+    _ = @import("hash/wyhash.zig");
 }