master
1const std = @import("std");
2
3inline fn offsetPtr(ptr: [*]const u8, offset: usize) [*]const u8 {
4 // ptr + offset doesn't work at comptime so we need this instead.
5 return @as([*]const u8, @ptrCast(&ptr[offset]));
6}
7
8fn fetch32(ptr: [*]const u8, offset: usize) u32 {
9 return std.mem.readInt(u32, offsetPtr(ptr, offset)[0..4], .little);
10}
11
12fn fetch64(ptr: [*]const u8, offset: usize) u64 {
13 return std.mem.readInt(u64, offsetPtr(ptr, offset)[0..8], .little);
14}
15
16pub const CityHash32 = struct {
17 const Self = @This();
18
19 // Magic numbers for 32-bit hashing. Copied from Murmur3.
20 const c1: u32 = 0xcc9e2d51;
21 const c2: u32 = 0x1b873593;
22
23 // A 32-bit to 32-bit integer hash copied from Murmur3.
24 fn fmix(h: u32) u32 {
25 var h1: u32 = h;
26 h1 ^= h1 >> 16;
27 h1 *%= 0x85ebca6b;
28 h1 ^= h1 >> 13;
29 h1 *%= 0xc2b2ae35;
30 h1 ^= h1 >> 16;
31 return h1;
32 }
33
34 // Rotate right helper
35 fn rotr32(x: u32, comptime r: u32) u32 {
36 return (x >> r) | (x << (32 - r));
37 }
38
39 // Helper from Murmur3 for combining two 32-bit values.
40 fn mur(a: u32, h: u32) u32 {
41 var a1: u32 = a;
42 var h1: u32 = h;
43 a1 *%= c1;
44 a1 = rotr32(a1, 17);
45 a1 *%= c2;
46 h1 ^= a1;
47 h1 = rotr32(h1, 19);
48 return h1 *% 5 +% 0xe6546b64;
49 }
50
51 fn hash32Len0To4(str: []const u8) u32 {
52 const len: u32 = @as(u32, @truncate(str.len));
53 var b: u32 = 0;
54 var c: u32 = 9;
55 for (str) |v| {
56 b = b *% c1 +% @as(u32, @bitCast(@as(i32, @intCast(@as(i8, @bitCast(v))))));
57 c ^= b;
58 }
59 return fmix(mur(b, mur(len, c)));
60 }
61
62 fn hash32Len5To12(str: []const u8) u32 {
63 var a: u32 = @as(u32, @truncate(str.len));
64 var b: u32 = a *% 5;
65 var c: u32 = 9;
66 const d: u32 = b;
67
68 a +%= fetch32(str.ptr, 0);
69 b +%= fetch32(str.ptr, str.len - 4);
70 c +%= fetch32(str.ptr, (str.len >> 1) & 4);
71
72 return fmix(mur(c, mur(b, mur(a, d))));
73 }
74
75 fn hash32Len13To24(str: []const u8) u32 {
76 const len: u32 = @as(u32, @truncate(str.len));
77 const a: u32 = fetch32(str.ptr, (str.len >> 1) - 4);
78 const b: u32 = fetch32(str.ptr, 4);
79 const c: u32 = fetch32(str.ptr, str.len - 8);
80 const d: u32 = fetch32(str.ptr, str.len >> 1);
81 const e: u32 = fetch32(str.ptr, 0);
82 const f: u32 = fetch32(str.ptr, str.len - 4);
83
84 return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, len)))))));
85 }
86
87 pub fn hash(str: []const u8) u32 {
88 if (str.len <= 24) {
89 if (str.len <= 4) {
90 return hash32Len0To4(str);
91 } else {
92 if (str.len <= 12)
93 return hash32Len5To12(str);
94 return hash32Len13To24(str);
95 }
96 }
97
98 const len: u32 = @as(u32, @truncate(str.len));
99 var h: u32 = len;
100 var g: u32 = c1 *% len;
101 var f: u32 = g;
102
103 const a0: u32 = rotr32(fetch32(str.ptr, str.len - 4) *% c1, 17) *% c2;
104 const a1: u32 = rotr32(fetch32(str.ptr, str.len - 8) *% c1, 17) *% c2;
105 const a2: u32 = rotr32(fetch32(str.ptr, str.len - 16) *% c1, 17) *% c2;
106 const a3: u32 = rotr32(fetch32(str.ptr, str.len - 12) *% c1, 17) *% c2;
107 const a4: u32 = rotr32(fetch32(str.ptr, str.len - 20) *% c1, 17) *% c2;
108
109 h ^= a0;
110 h = rotr32(h, 19);
111 h = h *% 5 +% 0xe6546b64;
112 h ^= a2;
113 h = rotr32(h, 19);
114 h = h *% 5 +% 0xe6546b64;
115 g ^= a1;
116 g = rotr32(g, 19);
117 g = g *% 5 +% 0xe6546b64;
118 g ^= a3;
119 g = rotr32(g, 19);
120 g = g *% 5 +% 0xe6546b64;
121 f +%= a4;
122 f = rotr32(f, 19);
123 f = f *% 5 +% 0xe6546b64;
124 var iters = (str.len - 1) / 20;
125 var ptr = str.ptr;
126 while (iters != 0) : (iters -= 1) {
127 const b0: u32 = rotr32(fetch32(ptr, 0) *% c1, 17) *% c2;
128 const b1: u32 = fetch32(ptr, 4);
129 const b2: u32 = rotr32(fetch32(ptr, 8) *% c1, 17) *% c2;
130 const b3: u32 = rotr32(fetch32(ptr, 12) *% c1, 17) *% c2;
131 const b4: u32 = fetch32(ptr, 16);
132
133 h ^= b0;
134 h = rotr32(h, 18);
135 h = h *% 5 +% 0xe6546b64;
136 f +%= b1;
137 f = rotr32(f, 19);
138 f = f *% c1;
139 g +%= b2;
140 g = rotr32(g, 18);
141 g = g *% 5 +% 0xe6546b64;
142 h ^= b3 +% b1;
143 h = rotr32(h, 19);
144 h = h *% 5 +% 0xe6546b64;
145 g ^= b4;
146 g = @byteSwap(g) *% 5;
147 h +%= b4 *% 5;
148 h = @byteSwap(h);
149 f +%= b0;
150 const t: u32 = h;
151 h = f;
152 f = g;
153 g = t;
154 ptr = offsetPtr(ptr, 20);
155 }
156 g = rotr32(g, 11) *% c1;
157 g = rotr32(g, 17) *% c1;
158 f = rotr32(f, 11) *% c1;
159 f = rotr32(f, 17) *% c1;
160 h = rotr32(h +% g, 19);
161 h = h *% 5 +% 0xe6546b64;
162 h = rotr32(h, 17) *% c1;
163 h = rotr32(h +% f, 19);
164 h = h *% 5 +% 0xe6546b64;
165 h = rotr32(h, 17) *% c1;
166 return h;
167 }
168};
169
170pub const CityHash64 = struct {
171 const Self = @This();
172
173 // Some primes between 2^63 and 2^64 for various uses.
174 const k0: u64 = 0xc3a5c85c97cb3127;
175 const k1: u64 = 0xb492b66fbe98f273;
176 const k2: u64 = 0x9ae16a3b2f90404f;
177
178 // Rotate right helper
179 fn rotr64(x: u64, comptime r: u64) u64 {
180 return (x >> r) | (x << (64 - r));
181 }
182
183 fn shiftmix(v: u64) u64 {
184 return v ^ (v >> 47);
185 }
186
187 fn hashLen16(u: u64, v: u64) u64 {
188 return @call(.always_inline, hash128To64, .{ u, v });
189 }
190
191 fn hashLen16Mul(low: u64, high: u64, mul: u64) u64 {
192 var a: u64 = (low ^ high) *% mul;
193 a ^= (a >> 47);
194 var b: u64 = (high ^ a) *% mul;
195 b ^= (b >> 47);
196 b *%= mul;
197 return b;
198 }
199
200 fn hash128To64(low: u64, high: u64) u64 {
201 return @call(.always_inline, hashLen16Mul, .{ low, high, 0x9ddfea08eb382d69 });
202 }
203
204 fn hashLen0To16(str: []const u8) u64 {
205 const len: u64 = @as(u64, str.len);
206 if (len >= 8) {
207 const mul: u64 = k2 +% len *% 2;
208 const a: u64 = fetch64(str.ptr, 0) +% k2;
209 const b: u64 = fetch64(str.ptr, str.len - 8);
210 const c: u64 = rotr64(b, 37) *% mul +% a;
211 const d: u64 = (rotr64(a, 25) +% b) *% mul;
212 return hashLen16Mul(c, d, mul);
213 }
214 if (len >= 4) {
215 const mul: u64 = k2 +% len *% 2;
216 const a: u64 = fetch32(str.ptr, 0);
217 return hashLen16Mul(len +% (a << 3), fetch32(str.ptr, str.len - 4), mul);
218 }
219 if (len > 0) {
220 const a: u8 = str[0];
221 const b: u8 = str[str.len >> 1];
222 const c: u8 = str[str.len - 1];
223 const y: u32 = @as(u32, @intCast(a)) +% (@as(u32, @intCast(b)) << 8);
224 const z: u32 = @as(u32, @truncate(str.len)) +% (@as(u32, @intCast(c)) << 2);
225 return shiftmix(@as(u64, @intCast(y)) *% k2 ^ @as(u64, @intCast(z)) *% k0) *% k2;
226 }
227 return k2;
228 }
229
230 fn hashLen17To32(str: []const u8) u64 {
231 const len: u64 = @as(u64, str.len);
232 const mul: u64 = k2 +% len *% 2;
233 const a: u64 = fetch64(str.ptr, 0) *% k1;
234 const b: u64 = fetch64(str.ptr, 8);
235 const c: u64 = fetch64(str.ptr, str.len - 8) *% mul;
236 const d: u64 = fetch64(str.ptr, str.len - 16) *% k2;
237
238 return hashLen16Mul(rotr64(a +% b, 43) +% rotr64(c, 30) +% d, a +% rotr64(b +% k2, 18) +% c, mul);
239 }
240
241 fn hashLen33To64(str: []const u8) u64 {
242 const len: u64 = @as(u64, str.len);
243 const mul: u64 = k2 +% len *% 2;
244 const a: u64 = fetch64(str.ptr, 0) *% k2;
245 const b: u64 = fetch64(str.ptr, 8);
246 const c: u64 = fetch64(str.ptr, str.len - 24);
247 const d: u64 = fetch64(str.ptr, str.len - 32);
248 const e: u64 = fetch64(str.ptr, 16) *% k2;
249 const f: u64 = fetch64(str.ptr, 24) *% 9;
250 const g: u64 = fetch64(str.ptr, str.len - 8);
251 const h: u64 = fetch64(str.ptr, str.len - 16) *% mul;
252
253 const u: u64 = rotr64(a +% g, 43) +% (rotr64(b, 30) +% c) *% 9;
254 const v: u64 = ((a +% g) ^ d) +% f +% 1;
255 const w: u64 = @byteSwap((u +% v) *% mul) +% h;
256 const x: u64 = rotr64(e +% f, 42) +% c;
257 const y: u64 = (@byteSwap((v +% w) *% mul) +% g) *% mul;
258 const z: u64 = e +% f +% c;
259 const a1: u64 = @byteSwap((x +% z) *% mul +% y) +% b;
260 const b1: u64 = shiftmix((z +% a1) *% mul +% d +% h) *% mul;
261 return b1 +% x;
262 }
263
264 const WeakPair = struct {
265 first: u64,
266 second: u64,
267 };
268
269 fn weakHashLen32WithSeedsHelper(w: u64, x: u64, y: u64, z: u64, a: u64, b: u64) WeakPair {
270 var a1: u64 = a;
271 var b1: u64 = b;
272 a1 +%= w;
273 b1 = rotr64(b1 +% a1 +% z, 21);
274 const c: u64 = a1;
275 a1 +%= x;
276 a1 +%= y;
277 b1 +%= rotr64(a1, 44);
278 return WeakPair{ .first = a1 +% z, .second = b1 +% c };
279 }
280
281 fn weakHashLen32WithSeeds(ptr: [*]const u8, a: u64, b: u64) WeakPair {
282 return @call(.always_inline, weakHashLen32WithSeedsHelper, .{
283 fetch64(ptr, 0),
284 fetch64(ptr, 8),
285 fetch64(ptr, 16),
286 fetch64(ptr, 24),
287 a,
288 b,
289 });
290 }
291
292 pub fn hash(str: []const u8) u64 {
293 if (str.len <= 32) {
294 if (str.len <= 16) {
295 return hashLen0To16(str);
296 } else {
297 return hashLen17To32(str);
298 }
299 } else if (str.len <= 64) {
300 return hashLen33To64(str);
301 }
302
303 var len: u64 = @as(u64, str.len);
304
305 var x: u64 = fetch64(str.ptr, str.len - 40);
306 var y: u64 = fetch64(str.ptr, str.len - 16) +% fetch64(str.ptr, str.len - 56);
307 var z: u64 = hashLen16(fetch64(str.ptr, str.len - 48) +% len, fetch64(str.ptr, str.len - 24));
308 var v: WeakPair = weakHashLen32WithSeeds(offsetPtr(str.ptr, str.len - 64), len, z);
309 var w: WeakPair = weakHashLen32WithSeeds(offsetPtr(str.ptr, str.len - 32), y +% k1, x);
310
311 x = x *% k1 +% fetch64(str.ptr, 0);
312 len = (len - 1) & ~@as(u64, @intCast(63));
313
314 var ptr: [*]const u8 = str.ptr;
315 while (true) {
316 x = rotr64(x +% y +% v.first +% fetch64(ptr, 8), 37) *% k1;
317 y = rotr64(y +% v.second +% fetch64(ptr, 48), 42) *% k1;
318 x ^= w.second;
319 y +%= v.first +% fetch64(ptr, 40);
320 z = rotr64(z +% w.first, 33) *% k1;
321 v = weakHashLen32WithSeeds(ptr, v.second *% k1, x +% w.first);
322 w = weakHashLen32WithSeeds(offsetPtr(ptr, 32), z +% w.second, y +% fetch64(ptr, 16));
323 const t: u64 = z;
324 z = x;
325 x = t;
326
327 ptr = offsetPtr(ptr, 64);
328 len -= 64;
329 if (len == 0)
330 break;
331 }
332
333 return hashLen16(hashLen16(v.first, w.first) +% shiftmix(y) *% k1 +% z, hashLen16(v.second, w.second) +% x);
334 }
335
336 pub fn hashWithSeed(str: []const u8, seed: u64) u64 {
337 return @call(.always_inline, Self.hashWithSeeds, .{ str, k2, seed });
338 }
339
340 pub fn hashWithSeeds(str: []const u8, seed0: u64, seed1: u64) u64 {
341 return hashLen16(hash(str) -% seed0, seed1);
342 }
343};
344
345fn CityHash32hashIgnoreSeed(str: []const u8, seed: u32) u32 {
346 _ = seed;
347 return CityHash32.hash(str);
348}
349
350const verify = @import("verify.zig");
351
352test "cityhash32" {
353 const Test = struct {
354 fn do() !void {
355 // SMHasher doesn't provide a 32bit version of the algorithm.
356 // The implementation was verified against the Google Abseil version.
357 try std.testing.expectEqual(verify.smhasher(CityHash32hashIgnoreSeed), 0x68254F81);
358 }
359 };
360 try Test.do();
361 @setEvalBranchQuota(75000);
362 try comptime Test.do();
363}
364
365test "cityhash64" {
366 const Test = struct {
367 fn do() !void {
368 // This is not compliant with the SMHasher implementation of CityHash64!
369 // The implementation was verified against the Google Abseil version.
370 try std.testing.expectEqual(verify.smhasher(CityHash64.hashWithSeed), 0x5FABC5C5);
371 }
372 };
373 try Test.do();
374 @setEvalBranchQuota(75000);
375 try comptime Test.do();
376}