master
  1const std = @import("std");
  2
  3inline fn offsetPtr(ptr: [*]const u8, offset: usize) [*]const u8 {
  4    // ptr + offset doesn't work at comptime so we need this instead.
  5    return @as([*]const u8, @ptrCast(&ptr[offset]));
  6}
  7
  8fn fetch32(ptr: [*]const u8, offset: usize) u32 {
  9    return std.mem.readInt(u32, offsetPtr(ptr, offset)[0..4], .little);
 10}
 11
 12fn fetch64(ptr: [*]const u8, offset: usize) u64 {
 13    return std.mem.readInt(u64, offsetPtr(ptr, offset)[0..8], .little);
 14}
 15
 16pub const CityHash32 = struct {
 17    const Self = @This();
 18
 19    // Magic numbers for 32-bit hashing.  Copied from Murmur3.
 20    const c1: u32 = 0xcc9e2d51;
 21    const c2: u32 = 0x1b873593;
 22
 23    // A 32-bit to 32-bit integer hash copied from Murmur3.
 24    fn fmix(h: u32) u32 {
 25        var h1: u32 = h;
 26        h1 ^= h1 >> 16;
 27        h1 *%= 0x85ebca6b;
 28        h1 ^= h1 >> 13;
 29        h1 *%= 0xc2b2ae35;
 30        h1 ^= h1 >> 16;
 31        return h1;
 32    }
 33
 34    // Rotate right helper
 35    fn rotr32(x: u32, comptime r: u32) u32 {
 36        return (x >> r) | (x << (32 - r));
 37    }
 38
 39    // Helper from Murmur3 for combining two 32-bit values.
 40    fn mur(a: u32, h: u32) u32 {
 41        var a1: u32 = a;
 42        var h1: u32 = h;
 43        a1 *%= c1;
 44        a1 = rotr32(a1, 17);
 45        a1 *%= c2;
 46        h1 ^= a1;
 47        h1 = rotr32(h1, 19);
 48        return h1 *% 5 +% 0xe6546b64;
 49    }
 50
 51    fn hash32Len0To4(str: []const u8) u32 {
 52        const len: u32 = @as(u32, @truncate(str.len));
 53        var b: u32 = 0;
 54        var c: u32 = 9;
 55        for (str) |v| {
 56            b = b *% c1 +% @as(u32, @bitCast(@as(i32, @intCast(@as(i8, @bitCast(v))))));
 57            c ^= b;
 58        }
 59        return fmix(mur(b, mur(len, c)));
 60    }
 61
 62    fn hash32Len5To12(str: []const u8) u32 {
 63        var a: u32 = @as(u32, @truncate(str.len));
 64        var b: u32 = a *% 5;
 65        var c: u32 = 9;
 66        const d: u32 = b;
 67
 68        a +%= fetch32(str.ptr, 0);
 69        b +%= fetch32(str.ptr, str.len - 4);
 70        c +%= fetch32(str.ptr, (str.len >> 1) & 4);
 71
 72        return fmix(mur(c, mur(b, mur(a, d))));
 73    }
 74
 75    fn hash32Len13To24(str: []const u8) u32 {
 76        const len: u32 = @as(u32, @truncate(str.len));
 77        const a: u32 = fetch32(str.ptr, (str.len >> 1) - 4);
 78        const b: u32 = fetch32(str.ptr, 4);
 79        const c: u32 = fetch32(str.ptr, str.len - 8);
 80        const d: u32 = fetch32(str.ptr, str.len >> 1);
 81        const e: u32 = fetch32(str.ptr, 0);
 82        const f: u32 = fetch32(str.ptr, str.len - 4);
 83
 84        return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, len)))))));
 85    }
 86
 87    pub fn hash(str: []const u8) u32 {
 88        if (str.len <= 24) {
 89            if (str.len <= 4) {
 90                return hash32Len0To4(str);
 91            } else {
 92                if (str.len <= 12)
 93                    return hash32Len5To12(str);
 94                return hash32Len13To24(str);
 95            }
 96        }
 97
 98        const len: u32 = @as(u32, @truncate(str.len));
 99        var h: u32 = len;
100        var g: u32 = c1 *% len;
101        var f: u32 = g;
102
103        const a0: u32 = rotr32(fetch32(str.ptr, str.len - 4) *% c1, 17) *% c2;
104        const a1: u32 = rotr32(fetch32(str.ptr, str.len - 8) *% c1, 17) *% c2;
105        const a2: u32 = rotr32(fetch32(str.ptr, str.len - 16) *% c1, 17) *% c2;
106        const a3: u32 = rotr32(fetch32(str.ptr, str.len - 12) *% c1, 17) *% c2;
107        const a4: u32 = rotr32(fetch32(str.ptr, str.len - 20) *% c1, 17) *% c2;
108
109        h ^= a0;
110        h = rotr32(h, 19);
111        h = h *% 5 +% 0xe6546b64;
112        h ^= a2;
113        h = rotr32(h, 19);
114        h = h *% 5 +% 0xe6546b64;
115        g ^= a1;
116        g = rotr32(g, 19);
117        g = g *% 5 +% 0xe6546b64;
118        g ^= a3;
119        g = rotr32(g, 19);
120        g = g *% 5 +% 0xe6546b64;
121        f +%= a4;
122        f = rotr32(f, 19);
123        f = f *% 5 +% 0xe6546b64;
124        var iters = (str.len - 1) / 20;
125        var ptr = str.ptr;
126        while (iters != 0) : (iters -= 1) {
127            const b0: u32 = rotr32(fetch32(ptr, 0) *% c1, 17) *% c2;
128            const b1: u32 = fetch32(ptr, 4);
129            const b2: u32 = rotr32(fetch32(ptr, 8) *% c1, 17) *% c2;
130            const b3: u32 = rotr32(fetch32(ptr, 12) *% c1, 17) *% c2;
131            const b4: u32 = fetch32(ptr, 16);
132
133            h ^= b0;
134            h = rotr32(h, 18);
135            h = h *% 5 +% 0xe6546b64;
136            f +%= b1;
137            f = rotr32(f, 19);
138            f = f *% c1;
139            g +%= b2;
140            g = rotr32(g, 18);
141            g = g *% 5 +% 0xe6546b64;
142            h ^= b3 +% b1;
143            h = rotr32(h, 19);
144            h = h *% 5 +% 0xe6546b64;
145            g ^= b4;
146            g = @byteSwap(g) *% 5;
147            h +%= b4 *% 5;
148            h = @byteSwap(h);
149            f +%= b0;
150            const t: u32 = h;
151            h = f;
152            f = g;
153            g = t;
154            ptr = offsetPtr(ptr, 20);
155        }
156        g = rotr32(g, 11) *% c1;
157        g = rotr32(g, 17) *% c1;
158        f = rotr32(f, 11) *% c1;
159        f = rotr32(f, 17) *% c1;
160        h = rotr32(h +% g, 19);
161        h = h *% 5 +% 0xe6546b64;
162        h = rotr32(h, 17) *% c1;
163        h = rotr32(h +% f, 19);
164        h = h *% 5 +% 0xe6546b64;
165        h = rotr32(h, 17) *% c1;
166        return h;
167    }
168};
169
170pub const CityHash64 = struct {
171    const Self = @This();
172
173    // Some primes between 2^63 and 2^64 for various uses.
174    const k0: u64 = 0xc3a5c85c97cb3127;
175    const k1: u64 = 0xb492b66fbe98f273;
176    const k2: u64 = 0x9ae16a3b2f90404f;
177
178    // Rotate right helper
179    fn rotr64(x: u64, comptime r: u64) u64 {
180        return (x >> r) | (x << (64 - r));
181    }
182
183    fn shiftmix(v: u64) u64 {
184        return v ^ (v >> 47);
185    }
186
187    fn hashLen16(u: u64, v: u64) u64 {
188        return @call(.always_inline, hash128To64, .{ u, v });
189    }
190
191    fn hashLen16Mul(low: u64, high: u64, mul: u64) u64 {
192        var a: u64 = (low ^ high) *% mul;
193        a ^= (a >> 47);
194        var b: u64 = (high ^ a) *% mul;
195        b ^= (b >> 47);
196        b *%= mul;
197        return b;
198    }
199
200    fn hash128To64(low: u64, high: u64) u64 {
201        return @call(.always_inline, hashLen16Mul, .{ low, high, 0x9ddfea08eb382d69 });
202    }
203
204    fn hashLen0To16(str: []const u8) u64 {
205        const len: u64 = @as(u64, str.len);
206        if (len >= 8) {
207            const mul: u64 = k2 +% len *% 2;
208            const a: u64 = fetch64(str.ptr, 0) +% k2;
209            const b: u64 = fetch64(str.ptr, str.len - 8);
210            const c: u64 = rotr64(b, 37) *% mul +% a;
211            const d: u64 = (rotr64(a, 25) +% b) *% mul;
212            return hashLen16Mul(c, d, mul);
213        }
214        if (len >= 4) {
215            const mul: u64 = k2 +% len *% 2;
216            const a: u64 = fetch32(str.ptr, 0);
217            return hashLen16Mul(len +% (a << 3), fetch32(str.ptr, str.len - 4), mul);
218        }
219        if (len > 0) {
220            const a: u8 = str[0];
221            const b: u8 = str[str.len >> 1];
222            const c: u8 = str[str.len - 1];
223            const y: u32 = @as(u32, @intCast(a)) +% (@as(u32, @intCast(b)) << 8);
224            const z: u32 = @as(u32, @truncate(str.len)) +% (@as(u32, @intCast(c)) << 2);
225            return shiftmix(@as(u64, @intCast(y)) *% k2 ^ @as(u64, @intCast(z)) *% k0) *% k2;
226        }
227        return k2;
228    }
229
230    fn hashLen17To32(str: []const u8) u64 {
231        const len: u64 = @as(u64, str.len);
232        const mul: u64 = k2 +% len *% 2;
233        const a: u64 = fetch64(str.ptr, 0) *% k1;
234        const b: u64 = fetch64(str.ptr, 8);
235        const c: u64 = fetch64(str.ptr, str.len - 8) *% mul;
236        const d: u64 = fetch64(str.ptr, str.len - 16) *% k2;
237
238        return hashLen16Mul(rotr64(a +% b, 43) +% rotr64(c, 30) +% d, a +% rotr64(b +% k2, 18) +% c, mul);
239    }
240
241    fn hashLen33To64(str: []const u8) u64 {
242        const len: u64 = @as(u64, str.len);
243        const mul: u64 = k2 +% len *% 2;
244        const a: u64 = fetch64(str.ptr, 0) *% k2;
245        const b: u64 = fetch64(str.ptr, 8);
246        const c: u64 = fetch64(str.ptr, str.len - 24);
247        const d: u64 = fetch64(str.ptr, str.len - 32);
248        const e: u64 = fetch64(str.ptr, 16) *% k2;
249        const f: u64 = fetch64(str.ptr, 24) *% 9;
250        const g: u64 = fetch64(str.ptr, str.len - 8);
251        const h: u64 = fetch64(str.ptr, str.len - 16) *% mul;
252
253        const u: u64 = rotr64(a +% g, 43) +% (rotr64(b, 30) +% c) *% 9;
254        const v: u64 = ((a +% g) ^ d) +% f +% 1;
255        const w: u64 = @byteSwap((u +% v) *% mul) +% h;
256        const x: u64 = rotr64(e +% f, 42) +% c;
257        const y: u64 = (@byteSwap((v +% w) *% mul) +% g) *% mul;
258        const z: u64 = e +% f +% c;
259        const a1: u64 = @byteSwap((x +% z) *% mul +% y) +% b;
260        const b1: u64 = shiftmix((z +% a1) *% mul +% d +% h) *% mul;
261        return b1 +% x;
262    }
263
264    const WeakPair = struct {
265        first: u64,
266        second: u64,
267    };
268
269    fn weakHashLen32WithSeedsHelper(w: u64, x: u64, y: u64, z: u64, a: u64, b: u64) WeakPair {
270        var a1: u64 = a;
271        var b1: u64 = b;
272        a1 +%= w;
273        b1 = rotr64(b1 +% a1 +% z, 21);
274        const c: u64 = a1;
275        a1 +%= x;
276        a1 +%= y;
277        b1 +%= rotr64(a1, 44);
278        return WeakPair{ .first = a1 +% z, .second = b1 +% c };
279    }
280
281    fn weakHashLen32WithSeeds(ptr: [*]const u8, a: u64, b: u64) WeakPair {
282        return @call(.always_inline, weakHashLen32WithSeedsHelper, .{
283            fetch64(ptr, 0),
284            fetch64(ptr, 8),
285            fetch64(ptr, 16),
286            fetch64(ptr, 24),
287            a,
288            b,
289        });
290    }
291
292    pub fn hash(str: []const u8) u64 {
293        if (str.len <= 32) {
294            if (str.len <= 16) {
295                return hashLen0To16(str);
296            } else {
297                return hashLen17To32(str);
298            }
299        } else if (str.len <= 64) {
300            return hashLen33To64(str);
301        }
302
303        var len: u64 = @as(u64, str.len);
304
305        var x: u64 = fetch64(str.ptr, str.len - 40);
306        var y: u64 = fetch64(str.ptr, str.len - 16) +% fetch64(str.ptr, str.len - 56);
307        var z: u64 = hashLen16(fetch64(str.ptr, str.len - 48) +% len, fetch64(str.ptr, str.len - 24));
308        var v: WeakPair = weakHashLen32WithSeeds(offsetPtr(str.ptr, str.len - 64), len, z);
309        var w: WeakPair = weakHashLen32WithSeeds(offsetPtr(str.ptr, str.len - 32), y +% k1, x);
310
311        x = x *% k1 +% fetch64(str.ptr, 0);
312        len = (len - 1) & ~@as(u64, @intCast(63));
313
314        var ptr: [*]const u8 = str.ptr;
315        while (true) {
316            x = rotr64(x +% y +% v.first +% fetch64(ptr, 8), 37) *% k1;
317            y = rotr64(y +% v.second +% fetch64(ptr, 48), 42) *% k1;
318            x ^= w.second;
319            y +%= v.first +% fetch64(ptr, 40);
320            z = rotr64(z +% w.first, 33) *% k1;
321            v = weakHashLen32WithSeeds(ptr, v.second *% k1, x +% w.first);
322            w = weakHashLen32WithSeeds(offsetPtr(ptr, 32), z +% w.second, y +% fetch64(ptr, 16));
323            const t: u64 = z;
324            z = x;
325            x = t;
326
327            ptr = offsetPtr(ptr, 64);
328            len -= 64;
329            if (len == 0)
330                break;
331        }
332
333        return hashLen16(hashLen16(v.first, w.first) +% shiftmix(y) *% k1 +% z, hashLen16(v.second, w.second) +% x);
334    }
335
336    pub fn hashWithSeed(str: []const u8, seed: u64) u64 {
337        return @call(.always_inline, Self.hashWithSeeds, .{ str, k2, seed });
338    }
339
340    pub fn hashWithSeeds(str: []const u8, seed0: u64, seed1: u64) u64 {
341        return hashLen16(hash(str) -% seed0, seed1);
342    }
343};
344
345fn CityHash32hashIgnoreSeed(str: []const u8, seed: u32) u32 {
346    _ = seed;
347    return CityHash32.hash(str);
348}
349
350const verify = @import("verify.zig");
351
352test "cityhash32" {
353    const Test = struct {
354        fn do() !void {
355            // SMHasher doesn't provide a 32bit version of the algorithm.
356            // The implementation was verified against the Google Abseil version.
357            try std.testing.expectEqual(verify.smhasher(CityHash32hashIgnoreSeed), 0x68254F81);
358        }
359    };
360    try Test.do();
361    @setEvalBranchQuota(75000);
362    try comptime Test.do();
363}
364
365test "cityhash64" {
366    const Test = struct {
367        fn do() !void {
368            // This is not compliant with the SMHasher implementation of CityHash64!
369            // The implementation was verified against the Google Abseil version.
370            try std.testing.expectEqual(verify.smhasher(CityHash64.hashWithSeed), 0x5FABC5C5);
371        }
372    };
373    try Test.do();
374    @setEvalBranchQuota(75000);
375    try comptime Test.do();
376}