Commit 1498ccac2a

Sahnvour <sahnvour@pm.me>
2019-08-13 21:14:21
auto_hash: better generic hashing implementation
autoHash forbids slices as input hash was added to handle all types from autoHash plus slices, with a specified strategy
1 parent 50a8026
Changed files (1)
std
std/hash/auto_hash.zig
@@ -3,9 +3,76 @@ const builtin = @import("builtin");
 const mem = std.mem;
 const meta = std.meta;
 
+/// Describes how pointer types should be hashed.
+pub const HashStrategy = enum {
+    /// Do not follow pointers, only hash their value.
+    Shallow,
+
+    /// Follow pointers, hash the pointee content.
+    /// Only dereferences one level, ie. it is changed into .Shallow when a
+    /// pointer type is encountered.
+    Deep,
+
+    /// Follow pointers, hash the pointee content.
+    /// Dereferences all pointers encountered.
+    /// Assumes no cycle.
+    DeepRecursive,
+};
+
+/// Helper function to hash a pointer and mutate the strategy if needed.
+pub fn hashPointer(hasher: var, key: var, comptime strat: HashStrategy) void {
+    const info = @typeInfo(@typeOf(key));
+
+    switch (info.Pointer.size) {
+        builtin.TypeInfo.Pointer.Size.One => switch (strat) {
+            .Shallow => hash(hasher, @ptrToInt(key), .Shallow),
+            .Deep => hash(hasher, key.*, .Shallow),
+            .DeepRecursive => hash(hasher, key.*, .DeepRecursive),
+        },
+
+        builtin.TypeInfo.Pointer.Size.Slice => switch (strat) {
+            .Shallow => {
+                hashPointer(hasher, key.ptr, .Shallow);
+                hash(hasher, key.len, .Shallow);
+            },
+            .Deep => hashArray(hasher, key, .Shallow),
+            .DeepRecursive => hashArray(hasher, key, .DeepRecursive),
+        },
+
+        builtin.TypeInfo.Pointer.Size.Many,
+        builtin.TypeInfo.Pointer.Size.C,
+        => switch (strat) {
+            .Shallow => hash(hasher, @ptrToInt(key), .Shallow),
+            else => @compileError(
+                \\ unknown-length pointers and C pointers cannot be hashed deeply.
+                \\ Consider providing your own hash function.
+            ),
+        },
+    }
+}
+
+/// Helper function to hash a set of contiguous objects, from an array or slice.
+pub fn hashArray(hasher: var, key: var, comptime strat: HashStrategy) void {
+    switch (strat) {
+        .Shallow => {
+            // TODO detect via a trait when Key has no padding bits to
+            // hash it as an array of bytes.
+            // Otherwise, hash every element.
+            for (key) |element| {
+                hash(hasher, element, .Shallow);
+            }
+        },
+        else => {
+            for (key) |element| {
+                hash(hasher, element, strat);
+            }
+        },
+    }
+}
+
 /// Provides generic hashing for any eligible type.
-/// Only hashes `key` itself, pointers are not followed.
-pub fn autoHash(hasher: var, key: var) void {
+/// Strategy is provided to determine if pointers should be followed or not.
+pub fn hash(hasher: var, key: var, comptime strat: HashStrategy) void {
     const Key = @typeOf(key);
     switch (@typeInfo(Key)) {
         .NoReturn,
@@ -26,35 +93,18 @@ pub fn autoHash(hasher: var, key: var) void {
         // TODO Check if the situation is better after #561 is resolved.
         .Int => @inlineCall(hasher.update, std.mem.asBytes(&key)),
 
-        .Float => |info| autoHash(hasher, @bitCast(@IntType(false, info.bits), key)),
+        .Float => |info| hash(hasher, @bitCast(@IntType(false, info.bits), key), strat),
 
-        .Bool => autoHash(hasher, @boolToInt(key)),
-        .Enum => autoHash(hasher, @enumToInt(key)),
-        .ErrorSet => autoHash(hasher, @errorToInt(key)),
-        .AnyFrame, .Fn => autoHash(hasher, @ptrToInt(key)),
+        .Bool => hash(hasher, @boolToInt(key), strat),
+        .Enum => hash(hasher, @enumToInt(key), strat),
+        .ErrorSet => hash(hasher, @errorToInt(key), strat),
+        .AnyFrame, .Fn => hash(hasher, @ptrToInt(key), strat),
 
-        .Pointer => |info| switch (info.size) {
-            builtin.TypeInfo.Pointer.Size.One,
-            builtin.TypeInfo.Pointer.Size.Many,
-            builtin.TypeInfo.Pointer.Size.C,
-            => autoHash(hasher, @ptrToInt(key)),
+        .Pointer => @inlineCall(hashPointer, hasher, key, strat),
 
-            builtin.TypeInfo.Pointer.Size.Slice => {
-                autoHash(hasher, key.ptr);
-                autoHash(hasher, key.len);
-            },
-        },
+        .Optional => if (key) |k| hash(hasher, k, strat),
 
-        .Optional => if (key) |k| autoHash(hasher, k),
-
-        .Array => {
-            // TODO detect via a trait when Key has no padding bits to
-            // hash it as an array of bytes.
-            // Otherwise, hash every element.
-            for (key) |element| {
-                autoHash(hasher, element);
-            }
-        },
+        .Array => hashArray(hasher, key, strat),
 
         .Vector => |info| {
             if (info.child.bit_count % 8 == 0) {
@@ -67,7 +117,7 @@ pub fn autoHash(hasher: var, key: var) void {
                 const array: [info.len]info.child = key;
                 comptime var i: u32 = 0;
                 inline while (i < info.len) : (i += 1) {
-                    autoHash(hasher, array[i]);
+                    hash(hasher, array[i], strat);
                 }
             }
         },
@@ -79,19 +129,19 @@ pub fn autoHash(hasher: var, key: var) void {
             inline for (info.fields) |field| {
                 // We reuse the hash of the previous field as the seed for the
                 // next one so that they're dependant.
-                autoHash(hasher, @field(key, field.name));
+                hash(hasher, @field(key, field.name), strat);
             }
         },
 
         .Union => |info| blk: {
             if (info.tag_type) |tag_type| {
                 const tag = meta.activeTag(key);
-                const s = autoHash(hasher, tag);
+                const s = hash(hasher, tag, strat);
                 inline for (info.fields) |field| {
                     const enum_field = field.enum_field.?;
                     if (enum_field.value == @enumToInt(tag)) {
-                        autoHash(hasher, @field(key, enum_field.name));
-                        // TODO use a labelled break when it does not crash the compiler.
+                        hash(hasher, @field(key, enum_field.name), strat);
+                        // TODO use a labelled break when it does not crash the compiler. cf #2908
                         // break :blk;
                         return;
                     }
@@ -102,25 +152,77 @@ pub fn autoHash(hasher: var, key: var) void {
 
         .ErrorUnion => blk: {
             const payload = key catch |err| {
-                autoHash(hasher, err);
+                hash(hasher, err, strat);
                 break :blk;
             };
-            autoHash(hasher, payload);
+            hash(hasher, payload, strat);
         },
     }
 }
 
+/// Provides generic hashing for any eligible type.
+/// Only hashes `key` itself, pointers are not followed.
+/// Slices are rejected to avoid ambiguity on the user's intention.
+pub fn autoHash(hasher: var, key: var) void {
+    const Key = @typeOf(key);
+    if (comptime meta.trait.isSlice(Key))
+        @compileError("std.auto_hash.autoHash does not allow slices (here " ++ @typeName(Key) ++ " because the intent is unclear. Consider using std.auto_hash.hash or providing your own hash function instead.");
+
+    hash(hasher, key, .Shallow);
+}
+
 const testing = std.testing;
 const Wyhash = std.hash.Wyhash;
 
-fn testAutoHash(key: var) u64 {
+fn testHash(key: var) u64 {
     // Any hash could be used here, for testing autoHash.
     var hasher = Wyhash.init(0);
-    autoHash(&hasher, key);
+    hash(&hasher, key, .Shallow);
     return hasher.final();
 }
 
-test "autoHash slice" {
+fn testHashShallow(key: var) u64 {
+    // Any hash could be used here, for testing autoHash.
+    var hasher = Wyhash.init(0);
+    hash(&hasher, key, .Shallow);
+    return hasher.final();
+}
+
+fn testHashDeep(key: var) u64 {
+    // Any hash could be used here, for testing autoHash.
+    var hasher = Wyhash.init(0);
+    hash(&hasher, key, .Deep);
+    return hasher.final();
+}
+
+fn testHashDeepRecursive(key: var) u64 {
+    // Any hash could be used here, for testing autoHash.
+    var hasher = Wyhash.init(0);
+    hash(&hasher, key, .DeepRecursive);
+    return hasher.final();
+}
+
+test "hash pointer" {
+    const array = [_]u32{ 123, 123, 123 };
+    const a = &array[0];
+    const b = &array[1];
+    const c = &array[2];
+    const d = a;
+
+    testing.expect(testHashShallow(a) == testHashShallow(d));
+    testing.expect(testHashShallow(a) != testHashShallow(c));
+    testing.expect(testHashShallow(a) != testHashShallow(b));
+
+    testing.expect(testHashDeep(a) == testHashDeep(a));
+    testing.expect(testHashDeep(a) == testHashDeep(c));
+    testing.expect(testHashDeep(a) == testHashDeep(b));
+
+    testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(a));
+    testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(c));
+    testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(b));
+}
+
+test "hash slice shallow" {
     // Allocate one array dynamically so that we're assured it is not merged
     // with the other by the optimization passes.
     const array1 = try std.heap.direct_allocator.create([6]u32);
@@ -130,23 +232,78 @@ test "autoHash slice" {
     const a = array1[0..];
     const b = array2[0..];
     const c = array1[0..3];
-    testing.expect(testAutoHash(a) == testAutoHash(a));
-    testing.expect(testAutoHash(a) != testAutoHash(array1));
-    testing.expect(testAutoHash(a) != testAutoHash(b));
-    testing.expect(testAutoHash(a) != testAutoHash(c));
+    testing.expect(testHashShallow(a) == testHashShallow(a));
+    testing.expect(testHashShallow(a) != testHashShallow(array1));
+    testing.expect(testHashShallow(a) != testHashShallow(b));
+    testing.expect(testHashShallow(a) != testHashShallow(c));
+}
+
+test "hash slice deep" {
+    // Allocate one array dynamically so that we're assured it is not merged
+    // with the other by the optimization passes.
+    const array1 = try std.heap.direct_allocator.create([6]u32);
+    defer std.heap.direct_allocator.destroy(array1);
+    array1.* = [_]u32{ 1, 2, 3, 4, 5, 6 };
+    const array2 = [_]u32{ 1, 2, 3, 4, 5, 6 };
+    const a = array1[0..];
+    const b = array2[0..];
+    const c = array1[0..3];
+    testing.expect(testHashDeep(a) == testHashDeep(a));
+    testing.expect(testHashDeep(a) == testHashDeep(array1));
+    testing.expect(testHashDeep(a) == testHashDeep(b));
+    testing.expect(testHashDeep(a) != testHashDeep(c));
+}
+
+test "hash struct deep" {
+    const Foo = struct {
+        a: u32,
+        b: f64,
+        c: *bool,
+
+        const Self = @This();
+
+        pub fn init(allocator: *mem.Allocator, a_: u32, b_: f64, c_: bool) !Self {
+            const ptr = try allocator.create(bool);
+            ptr.* = c_;
+            return Self{ .a = a_, .b = b_, .c = ptr };
+        }
+    };
+
+    const allocator = std.heap.direct_allocator;
+    const foo = try Foo.init(allocator, 123, 1.0, true);
+    const bar = try Foo.init(allocator, 123, 1.0, true);
+    const baz = try Foo.init(allocator, 123, 1.0, false);
+    defer allocator.destroy(foo.c);
+    defer allocator.destroy(bar.c);
+    defer allocator.destroy(baz.c);
+
+    testing.expect(testHashDeep(foo) == testHashDeep(bar));
+    testing.expect(testHashDeep(foo) != testHashDeep(baz));
+    testing.expect(testHashDeep(bar) != testHashDeep(baz));
+
+    var hasher = Wyhash.init(0);
+    const h = testHashDeep(foo);
+    autoHash(&hasher, foo.a);
+    autoHash(&hasher, foo.b);
+    autoHash(&hasher, foo.c.*);
+    testing.expectEqual(h, hasher.final());
+
+    const h2 = testHashDeepRecursive(&foo);
+    testing.expect(h2 != testHashDeep(&foo));
+    testing.expect(h2 == testHashDeep(foo));
 }
 
-test "testAutoHash optional" {
+test "testHash optional" {
     const a: ?u32 = 123;
     const b: ?u32 = null;
-    testing.expectEqual(testAutoHash(a), testAutoHash(u32(123)));
-    testing.expect(testAutoHash(a) != testAutoHash(b));
-    testing.expectEqual(testAutoHash(b), 0);
+    testing.expectEqual(testHash(a), testHash(u32(123)));
+    testing.expect(testHash(a) != testHash(b));
+    testing.expectEqual(testHash(b), 0);
 }
 
-test "testAutoHash array" {
+test "testHash array" {
     const a = [_]u32{ 1, 2, 3 };
-    const h = testAutoHash(a);
+    const h = testHash(a);
     var hasher = Wyhash.init(0);
     autoHash(&hasher, u32(1));
     autoHash(&hasher, u32(2));
@@ -154,14 +311,14 @@ test "testAutoHash array" {
     testing.expectEqual(h, hasher.final());
 }
 
-test "testAutoHash struct" {
+test "testHash struct" {
     const Foo = struct {
         a: u32 = 1,
         b: u32 = 2,
         c: u32 = 3,
     };
     const f = Foo{};
-    const h = testAutoHash(f);
+    const h = testHash(f);
     var hasher = Wyhash.init(0);
     autoHash(&hasher, u32(1));
     autoHash(&hasher, u32(2));
@@ -169,7 +326,7 @@ test "testAutoHash struct" {
     testing.expectEqual(h, hasher.final());
 }
 
-test "testAutoHash union" {
+test "testHash union" {
     const Foo = union(enum) {
         A: u32,
         B: f32,
@@ -179,24 +336,24 @@ test "testAutoHash union" {
     const a = Foo{ .A = 18 };
     var b = Foo{ .B = 12.34 };
     const c = Foo{ .C = 18 };
-    testing.expect(testAutoHash(a) == testAutoHash(a));
-    testing.expect(testAutoHash(a) != testAutoHash(b));
-    testing.expect(testAutoHash(a) != testAutoHash(c));
+    testing.expect(testHash(a) == testHash(a));
+    testing.expect(testHash(a) != testHash(b));
+    testing.expect(testHash(a) != testHash(c));
 
     b = Foo{ .A = 18 };
-    testing.expect(testAutoHash(a) == testAutoHash(b));
+    testing.expect(testHash(a) == testHash(b));
 }
 
-test "testAutoHash vector" {
+test "testHash vector" {
     const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
     const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
     const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
-    testing.expect(testAutoHash(a) == testAutoHash(a));
-    testing.expect(testAutoHash(a) != testAutoHash(b));
-    testing.expect(testAutoHash(a) != testAutoHash(c));
+    testing.expect(testHash(a) == testHash(a));
+    testing.expect(testHash(a) != testHash(b));
+    testing.expect(testHash(a) != testHash(c));
 }
 
-test "testAutoHash error union" {
+test "testHash error union" {
     const Errors = error{Test};
     const Foo = struct {
         a: u32 = 1,
@@ -205,7 +362,7 @@ test "testAutoHash error union" {
     };
     const f = Foo{};
     const g: Errors!Foo = Errors.Test;
-    testing.expect(testAutoHash(f) != testAutoHash(g));
-    testing.expect(testAutoHash(f) == testAutoHash(Foo{}));
-    testing.expect(testAutoHash(g) == testAutoHash(Errors.Test));
+    testing.expect(testHash(f) != testHash(g));
+    testing.expect(testHash(f) == testHash(Foo{}));
+    testing.expect(testHash(g) == testHash(Errors.Test));
 }