Commit 26d61812a8

Marc Tiehuis <marc@tiehu.is>
2023-09-01 09:09:05
std/hash: add smhasher verification tests
Not all hashes are added just yet as these need to be generated manually from reference implementations as they are not included by default in smhasher.
1 parent bb2eb44
lib/std/hash/cityhash.zig
@@ -342,64 +342,21 @@ pub const CityHash64 = struct {
     }
 };
 
-fn SMHasherTest(comptime hash_fn: anytype) u32 {
-    const HashResult = @typeInfo(@TypeOf(hash_fn)).Fn.return_type.?;
-
-    var key: [256]u8 = undefined;
-    var hashes_bytes: [256 * @sizeOf(HashResult)]u8 = undefined;
-
-    @memset(&key, 0);
-    @memset(&hashes_bytes, 0);
-
-    var i: u32 = 0;
-    while (i < 256) : (i += 1) {
-        key[i] = @as(u8, @intCast(i));
-
-        var h: HashResult = hash_fn(key[0..i], 256 - i);
-
-        // comptime can't really do reinterpret casting yet,
-        // so we need to write the bytes manually.
-        for (hashes_bytes[i * @sizeOf(HashResult) ..][0..@sizeOf(HashResult)]) |*byte| {
-            byte.* = @as(u8, @truncate(h));
-            h = h >> 8;
-        }
-    }
-
-    return @as(u32, @truncate(hash_fn(&hashes_bytes, 0)));
-}
-
 fn CityHash32hashIgnoreSeed(str: []const u8, seed: u32) u32 {
     _ = seed;
     return CityHash32.hash(str);
 }
 
+const verify = @import("verify.zig");
+
 test "cityhash32" {
-    const Test = struct {
-        fn doTest() !void {
-            // Note: SMHasher doesn't provide a 32bit version of the algorithm.
-            // Note: The implementation was verified against the Google Abseil version.
-            try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
-            try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
-        }
-    };
-    try Test.doTest();
-    // TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
-    // case once we ship stage2.
-    //@setEvalBranchQuota(50000);
-    //comptime Test.doTest();
+    // Note: SMHasher doesn't provide a 32bit version of the algorithm.
+    // Note: The implementation was verified against the Google Abseil version.
+    try std.testing.expectEqual(verify.smhasher(CityHash32hashIgnoreSeed), 0x68254F81);
 }
 
 test "cityhash64" {
-    const Test = struct {
-        fn doTest() !void {
-            // Note: This is not compliant with the SMHasher implementation of CityHash64!
-            // Note: The implementation was verified against the Google Abseil version.
-            try std.testing.expectEqual(SMHasherTest(CityHash64.hashWithSeed), 0x5FABC5C5);
-        }
-    };
-    try Test.doTest();
-    // TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
-    // case once we ship stage2.
-    //@setEvalBranchQuota(50000);
-    //comptime Test.doTest();
+    // Note: This is not compliant with the SMHasher implementation of CityHash64!
+    // Note: The implementation was verified against the Google Abseil version.
+    try std.testing.expectEqual(verify.smhasher(CityHash64.hashWithSeed), 0x5FABC5C5);
 }
lib/std/hash/fnv.zig
@@ -4,7 +4,7 @@
 //
 // https://tools.ietf.org/html/draft-eastlake-fnv-14
 
-const std = @import("../std.zig");
+const std = @import("std");
 const testing = std.testing;
 
 pub const Fnv1a_32 = Fnv1a(u32, 0x01000193, 0x811c9dc5);
lib/std/hash/murmur.zig
@@ -279,26 +279,10 @@ pub const Murmur3_32 = struct {
     }
 };
 
-fn SMHasherTest(comptime hash_fn: anytype, comptime hashbits: u32) u32 {
-    const hashbytes = hashbits / 8;
-    var key: [256]u8 = [1]u8{0} ** 256;
-    var hashes: [hashbytes * 256]u8 = [1]u8{0} ** (hashbytes * 256);
-
-    var i: u32 = 0;
-    while (i < 256) : (i += 1) {
-        key[i] = @as(u8, @truncate(i));
-
-        var h = hash_fn(key[0..i], 256 - i);
-        if (native_endian == .Big)
-            h = @byteSwap(h);
-        @memcpy(hashes[i * hashbytes ..][0..hashbytes], @as([*]u8, @ptrCast(&h)));
-    }
-
-    return @as(u32, @truncate(hash_fn(&hashes, 0)));
-}
+const verify = @import("verify.zig");
 
 test "murmur2_32" {
-    try testing.expectEqual(SMHasherTest(Murmur2_32.hashWithSeed, 32), 0x27864C1E);
+    try testing.expectEqual(verify.smhasher(Murmur2_32.hashWithSeed), 0x27864C1E);
     var v0: u32 = 0x12345678;
     var v1: u64 = 0x1234567812345678;
     var v0le: u32 = v0;
@@ -312,7 +296,7 @@ test "murmur2_32" {
 }
 
 test "murmur2_64" {
-    try std.testing.expectEqual(SMHasherTest(Murmur2_64.hashWithSeed, 64), 0x1F0D3804);
+    try std.testing.expectEqual(verify.smhasher(Murmur2_64.hashWithSeed), 0x1F0D3804);
     var v0: u32 = 0x12345678;
     var v1: u64 = 0x1234567812345678;
     var v0le: u32 = v0;
@@ -326,7 +310,7 @@ test "murmur2_64" {
 }
 
 test "murmur3_32" {
-    try std.testing.expectEqual(SMHasherTest(Murmur3_32.hashWithSeed, 32), 0xB0F57EE3);
+    try std.testing.expectEqual(verify.smhasher(Murmur3_32.hashWithSeed), 0xB0F57EE3);
     var v0: u32 = 0x12345678;
     var v1: u64 = 0x1234567812345678;
     var v0le: u32 = v0;
lib/std/hash/verify.zig
@@ -0,0 +1,35 @@
+const std = @import("std");
+
+fn hashMaybeSeed(comptime hash_fn: anytype, seed: anytype, buf: []const u8) @typeInfo(@TypeOf(hash_fn)).Fn.return_type.? {
+    const HashFn = @typeInfo(@TypeOf(hash_fn)).Fn;
+    if (HashFn.params.len > 1) {
+        if (@typeInfo(HashFn.params[0].type.?) == .Int) {
+            return hash_fn(@intCast(seed), buf);
+        } else {
+            return hash_fn(buf, @intCast(seed));
+        }
+    } else {
+        return hash_fn(buf);
+    }
+}
+
+// Returns a verification code, the same as user by SMHasher.
+//
+// Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255, using 256-N as seed.
+// First four-bytes of the hash, interpreted as little-endian is the verification code.
+pub fn smhasher(comptime hash_fn: anytype) u32 {
+    const HashFnTy = @typeInfo(@TypeOf(hash_fn)).Fn;
+    const HashResult = HashFnTy.return_type.?;
+    const hash_size = @sizeOf(HashResult);
+
+    var buf: [256]u8 = undefined;
+    var buf_all: [256 * hash_size]u8 = undefined;
+
+    for (0..256) |i| {
+        buf[i] = @intCast(i);
+        const h = hashMaybeSeed(hash_fn, 256 - i, buf[0..i]);
+        std.mem.writeIntLittle(HashResult, buf_all[i * hash_size ..][0..hash_size], h);
+    }
+
+    return @truncate(hashMaybeSeed(hash_fn, 0, buf_all[0..]));
+}
lib/std/hash/wyhash.zig
@@ -196,6 +196,7 @@ pub const Wyhash = struct {
     }
 };
 
+const verify = @import("verify.zig");
 const expectEqual = std.testing.expectEqual;
 
 const TestVector = struct {
@@ -229,6 +230,10 @@ test "test vectors at comptime" {
     }
 }
 
+test "smhasher" {
+    try expectEqual(verify.smhasher(Wyhash.hash), 0xBD5E840C);
+}
+
 test "test vectors streaming" {
     const step = 5;
 
lib/std/hash/xxhash.zig
@@ -438,6 +438,8 @@ fn validateType(comptime T: type) void {
     }
 }
 
+const verify = @import("verify.zig");
+
 fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64) !void {
     try expectEqual(expected, H.hash(0, input));
 
@@ -455,6 +457,8 @@ test "xxhash64" {
     try testExpect(H, 0, "abcdefghijklmnopqrstuvwxyz", 0xcfe1f278fa89835c);
     try testExpect(H, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 0xaaa46907d3047814);
     try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0xe04a477f19ee145d);
+
+    try expectEqual(verify.smhasher(H.hash), 0x024B7CF4);
 }
 
 test "xxhash32" {
@@ -467,4 +471,6 @@ test "xxhash32" {
     try testExpect(H, 0, "abcdefghijklmnopqrstuvwxyz", 0x63a14d5f);
     try testExpect(H, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 0x9c285e64);
     try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0x9c05f475);
+
+    try expectEqual(verify.smhasher(H.hash), 0xBA88B743);
 }