master
1// zig run -O ReleaseFast --zig-lib-dir ../.. benchmark.zig
2
3const std = @import("std");
4const builtin = @import("builtin");
5const time = std.time;
6const Timer = time.Timer;
7const hash = std.hash;
8
9const KiB = 1024;
10const MiB = 1024 * KiB;
11const GiB = 1024 * MiB;
12
13var prng = std.Random.DefaultPrng.init(0);
14const random = prng.random();
15
16const Hash = struct {
17 ty: type,
18 name: []const u8,
19 has_iterative_api: bool = true,
20 has_crypto_api: bool = false,
21 has_anytype_api: ?[]const comptime_int = null,
22 /// `final` value should be read from this field.
23 has_struct_api: ?[]const u8 = null,
24 init_u8s: ?[]const u8 = null,
25 init_u64: ?u64 = null,
26 init_default: bool = false,
27};
28
29const hashes = [_]Hash{
30 Hash{
31 .ty = hash.XxHash3,
32 .name = "xxh3",
33 .init_u64 = 0,
34 .has_anytype_api = @as([]const comptime_int, &[_]comptime_int{ 8, 16, 32, 48, 64, 80, 96, 112, 128 }),
35 },
36 Hash{
37 .ty = hash.XxHash64,
38 .name = "xxhash64",
39 .init_u64 = 0,
40 .has_anytype_api = @as([]const comptime_int, &[_]comptime_int{ 8, 16, 32, 48, 64, 80, 96, 112, 128 }),
41 },
42 Hash{
43 .ty = hash.XxHash32,
44 .name = "xxhash32",
45 .init_u64 = 0,
46 .has_anytype_api = @as([]const comptime_int, &[_]comptime_int{ 8, 16, 32, 48, 64, 80, 96, 112, 128 }),
47 },
48 Hash{
49 .ty = hash.Wyhash,
50 .name = "wyhash",
51 .init_u64 = 0,
52 },
53 Hash{
54 .ty = hash.Fnv1a_64,
55 .name = "fnv1a",
56 },
57 Hash{
58 .ty = hash.Adler32,
59 .name = "adler32",
60 .has_struct_api = "adler",
61 .init_default = true,
62 },
63 Hash{
64 .ty = hash.crc.Crc32,
65 .name = "crc32",
66 },
67 Hash{
68 .ty = hash.CityHash32,
69 .name = "cityhash-32",
70 .has_iterative_api = false,
71 },
72 Hash{
73 .ty = hash.CityHash64,
74 .name = "cityhash-64",
75 .has_iterative_api = false,
76 },
77 Hash{
78 .ty = hash.Murmur2_32,
79 .name = "murmur2-32",
80 .has_iterative_api = false,
81 },
82 Hash{
83 .ty = hash.Murmur2_64,
84 .name = "murmur2-64",
85 .has_iterative_api = false,
86 },
87 Hash{
88 .ty = hash.Murmur3_32,
89 .name = "murmur3-32",
90 .has_iterative_api = false,
91 },
92 Hash{
93 .ty = hash.SipHash64(1, 3),
94 .name = "siphash64",
95 .has_crypto_api = true,
96 .init_u8s = &[_]u8{0} ** 16,
97 },
98 Hash{
99 .ty = hash.SipHash128(1, 3),
100 .name = "siphash128",
101 .has_crypto_api = true,
102 .init_u8s = &[_]u8{0} ** 16,
103 },
104};
105
106const Result = struct {
107 hash: u64,
108 throughput: u64,
109};
110
111const block_size: usize = 8 * 8192;
112
113pub fn benchmarkHash(comptime H: anytype, bytes: usize, allocator: std.mem.Allocator) !Result {
114 var blocks = try allocator.alloc(u8, bytes);
115 defer allocator.free(blocks);
116 random.bytes(blocks);
117
118 const block_count = bytes / block_size;
119
120 var h: H.ty = blk: {
121 if (H.init_u8s) |init| {
122 break :blk .init(init[0..H.ty.key_length]);
123 }
124 if (H.init_u64) |init| {
125 break :blk .init(init);
126 }
127 if (H.init_default) {
128 break :blk .{};
129 }
130 break :blk .init();
131 };
132
133 var timer = try Timer.start();
134 for (0..block_count) |i| {
135 h.update(blocks[i * block_size ..][0..block_size]);
136 }
137 const final = if (H.has_struct_api) |field_name|
138 @field(h, field_name)
139 else if (H.has_crypto_api)
140 @as(u64, @truncate(h.finalInt()))
141 else
142 h.final();
143 std.mem.doNotOptimizeAway(final);
144
145 const elapsed_ns = timer.read();
146
147 const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
148 const size_float: f64 = @floatFromInt(block_size * block_count);
149 const throughput: u64 = @intFromFloat(size_float / elapsed_s);
150
151 return Result{
152 .hash = final,
153 .throughput = throughput,
154 };
155}
156
157pub fn benchmarkHashSmallKeys(comptime H: anytype, key_size: usize, bytes: usize, allocator: std.mem.Allocator) !Result {
158 var blocks = try allocator.alloc(u8, bytes);
159 defer allocator.free(blocks);
160 random.bytes(blocks);
161
162 const key_count = bytes / key_size;
163
164 var timer = try Timer.start();
165
166 var sum: u64 = 0;
167 for (0..key_count) |i| {
168 const small_key = blocks[i * key_size ..][0..key_size];
169 const final = blk: {
170 if (H.init_u8s) |init| {
171 if (H.has_crypto_api) {
172 break :blk @as(u64, @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length])));
173 } else {
174 break :blk H.ty.hash(init, small_key);
175 }
176 }
177 if (H.init_u64) |init| {
178 break :blk H.ty.hash(init, small_key);
179 }
180 break :blk H.ty.hash(small_key);
181 };
182 sum +%= final;
183 }
184 const elapsed_ns = timer.read();
185
186 const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
187 const size_float: f64 = @floatFromInt(key_count * key_size);
188 const throughput: u64 = @intFromFloat(size_float / elapsed_s);
189
190 std.mem.doNotOptimizeAway(sum);
191
192 return Result{
193 .hash = sum,
194 .throughput = throughput,
195 };
196}
197
198// the array and array pointer benchmarks for xxhash are very sensitive to in-lining,
199// if you see strange performance changes consider using `.never_inline` or `.always_inline`
200// to ensure the changes are not only due to the optimiser inlining the benchmark differently
201pub fn benchmarkHashSmallKeysArrayPtr(
202 comptime H: anytype,
203 comptime key_size: usize,
204 bytes: usize,
205 allocator: std.mem.Allocator,
206) !Result {
207 var blocks = try allocator.alloc(u8, bytes);
208 defer allocator.free(blocks);
209 random.bytes(blocks);
210
211 const key_count = bytes / key_size;
212
213 var timer = try Timer.start();
214
215 var sum: u64 = 0;
216 for (0..key_count) |i| {
217 const small_key = blocks[i * key_size ..][0..key_size];
218 const final: u64 = blk: {
219 if (H.init_u8s) |init| {
220 if (H.has_crypto_api) {
221 break :blk @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length]));
222 } else {
223 break :blk H.ty.hash(init, small_key);
224 }
225 }
226 if (H.init_u64) |init| {
227 break :blk H.ty.hash(init, small_key);
228 }
229 break :blk H.ty.hash(small_key);
230 };
231 sum +%= final;
232 }
233 const elapsed_ns = timer.read();
234
235 const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
236 const throughput: u64 = @intFromFloat(@as(f64, @floatFromInt(bytes)) / elapsed_s);
237
238 std.mem.doNotOptimizeAway(sum);
239
240 return Result{
241 .hash = sum,
242 .throughput = throughput,
243 };
244}
245
246// the array and array pointer benchmarks for xxhash are very sensitive to in-lining,
247// if you see strange performance changes consider using `.never_inline` or `.always_inline`
248// to ensure the changes are not only due to the optimiser inlining the benchmark differently
249pub fn benchmarkHashSmallKeysArray(
250 comptime H: anytype,
251 comptime key_size: usize,
252 bytes: usize,
253 allocator: std.mem.Allocator,
254) !Result {
255 var blocks = try allocator.alloc(u8, bytes);
256 defer allocator.free(blocks);
257 random.bytes(blocks);
258
259 const key_count = bytes / key_size;
260
261 var i: usize = 0;
262 var timer = try Timer.start();
263
264 var sum: u64 = 0;
265 while (i < key_count) : (i += 1) {
266 const small_key = blocks[i * key_size ..][0..key_size];
267 const final: u64 = blk: {
268 if (H.init_u8s) |init| {
269 if (H.has_crypto_api) {
270 break :blk @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length]));
271 } else {
272 break :blk H.ty.hash(init, small_key.*);
273 }
274 }
275 if (H.init_u64) |init| {
276 break :blk H.ty.hash(init, small_key.*);
277 }
278 break :blk H.ty.hash(small_key.*);
279 };
280 sum +%= final;
281 }
282 const elapsed_ns = timer.read();
283
284 const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
285 const throughput: u64 = @intFromFloat(@as(f64, @floatFromInt(bytes)) / elapsed_s);
286
287 std.mem.doNotOptimizeAway(sum);
288
289 return Result{
290 .hash = sum,
291 .throughput = throughput,
292 };
293}
294
295pub fn benchmarkHashSmallApi(comptime H: anytype, key_size: usize, bytes: usize, allocator: std.mem.Allocator) !Result {
296 var blocks = try allocator.alloc(u8, bytes);
297 defer allocator.free(blocks);
298 random.bytes(blocks);
299
300 const key_count = bytes / key_size;
301
302 var timer = try Timer.start();
303
304 var sum: u64 = 0;
305 for (0..key_count) |i| {
306 const small_key = blocks[i * key_size ..][0..key_size];
307 const final: u64 = blk: {
308 if (H.init_u8s) |init| {
309 if (H.has_crypto_api) {
310 break :blk @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length]));
311 } else {
312 break :blk H.ty.hashSmall(init, small_key);
313 }
314 }
315 if (H.init_u64) |init| {
316 break :blk H.ty.hashSmall(init, small_key);
317 }
318 break :blk H.ty.hashSmall(small_key);
319 };
320 sum +%= final;
321 }
322 const elapsed_ns = timer.read();
323
324 const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
325 const throughput: u64 = @intFromFloat(@as(f64, @floatFromInt(bytes)) / elapsed_s);
326
327 std.mem.doNotOptimizeAway(sum);
328
329 return Result{
330 .throughput = throughput,
331 .hash = sum,
332 };
333}
334
335fn usage() void {
336 std.debug.print(
337 \\throughput_test [options]
338 \\
339 \\Options:
340 \\ --filter [test-name]
341 \\ --seed [int]
342 \\ --count [int]
343 \\ --key-size [int]
344 \\ --iterative-only
345 \\ --small-key-only
346 \\ --help
347 \\
348 , .{});
349}
350
351fn mode(comptime x: comptime_int) comptime_int {
352 return if (builtin.mode == .Debug) x / 64 else x;
353}
354
355pub fn main() !void {
356 var stdout_buffer: [0x100]u8 = undefined;
357 var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
358 const stdout = &stdout_writer.interface;
359
360 var buffer: [1024]u8 = undefined;
361 var fixed = std.heap.FixedBufferAllocator.init(buffer[0..]);
362 const args = try std.process.argsAlloc(fixed.allocator());
363
364 var filter: ?[]u8 = "";
365 var count: usize = mode(128 * MiB);
366 var key_size: ?usize = null;
367 var seed: u32 = 0;
368 var test_small_key_only = false;
369 var test_iterative_only = false;
370 var test_arrays = false;
371
372 const default_small_key_size = 32;
373
374 var i: usize = 1;
375 while (i < args.len) : (i += 1) {
376 if (std.mem.eql(u8, args[i], "--mode")) {
377 try stdout.print("{}\n", .{builtin.mode});
378 try stdout.flush();
379 return;
380 } else if (std.mem.eql(u8, args[i], "--seed")) {
381 i += 1;
382 if (i == args.len) {
383 usage();
384 std.process.exit(1);
385 }
386
387 seed = try std.fmt.parseUnsigned(u32, args[i], 10);
388 // we seed later
389 } else if (std.mem.eql(u8, args[i], "--filter")) {
390 i += 1;
391 if (i == args.len) {
392 usage();
393 std.process.exit(1);
394 }
395
396 filter = args[i];
397 } else if (std.mem.eql(u8, args[i], "--count")) {
398 i += 1;
399 if (i == args.len) {
400 usage();
401 std.process.exit(1);
402 }
403
404 const c = try std.fmt.parseUnsigned(usize, args[i], 10);
405 count = c * MiB;
406 } else if (std.mem.eql(u8, args[i], "--key-size")) {
407 i += 1;
408 if (i == args.len) {
409 usage();
410 std.process.exit(1);
411 }
412
413 key_size = try std.fmt.parseUnsigned(usize, args[i], 10);
414 if (key_size.? > block_size) {
415 try stdout.print("key_size cannot exceed block size of {}\n", .{block_size});
416 try stdout.flush();
417 std.process.exit(1);
418 }
419 } else if (std.mem.eql(u8, args[i], "--iterative-only")) {
420 test_iterative_only = true;
421 } else if (std.mem.eql(u8, args[i], "--small-key-only")) {
422 test_small_key_only = true;
423 } else if (std.mem.eql(u8, args[i], "--include-array")) {
424 test_arrays = true;
425 } else if (std.mem.eql(u8, args[i], "--help")) {
426 usage();
427 return;
428 } else {
429 usage();
430 std.process.exit(1);
431 }
432 }
433
434 if (test_iterative_only and test_small_key_only) {
435 try stdout.print("Cannot use iterative-only and small-key-only together!\n", .{});
436 try stdout.flush();
437 usage();
438 std.process.exit(1);
439 }
440
441 var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
442 defer std.testing.expect(gpa.deinit() == .ok) catch @panic("leak");
443 const allocator = gpa.allocator();
444
445 inline for (hashes) |H| {
446 if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) hash: {
447 if (!test_iterative_only or H.has_iterative_api) {
448 try stdout.print("{s}\n", .{H.name});
449 try stdout.flush();
450
451 // Always reseed prior to every call so we are hashing the same buffer contents.
452 // This allows easier comparison between different implementations.
453 if (H.has_iterative_api and !test_small_key_only) {
454 prng.seed(seed);
455 const result = try benchmarkHash(H, count, allocator);
456 try stdout.print(" iterative: {:5} MiB/s [{x:0<16}]\n", .{ result.throughput / (1 * MiB), result.hash });
457 try stdout.flush();
458 }
459
460 if (!test_iterative_only) {
461 if (key_size) |size| {
462 prng.seed(seed);
463 const result_small = try benchmarkHashSmallKeys(H, size, count, allocator);
464 try stdout.print(" small keys: {:3}B {:5} MiB/s {} Hashes/s [{x:0<16}]\n", .{
465 size,
466 result_small.throughput / (1 * MiB),
467 result_small.throughput / size,
468 result_small.hash,
469 });
470 try stdout.flush();
471
472 if (!test_arrays) break :hash;
473 if (H.has_anytype_api) |sizes| {
474 inline for (sizes) |exact_size| {
475 if (size == exact_size) {
476 prng.seed(seed);
477 const result_array = try benchmarkHashSmallKeysArray(H, exact_size, count, allocator);
478 prng.seed(seed);
479 const result_ptr = try benchmarkHashSmallKeysArrayPtr(H, exact_size, count, allocator);
480 try stdout.print(" array: {:5} MiB/s [{x:0<16}]\n", .{
481 result_array.throughput / (1 * MiB),
482 result_array.hash,
483 });
484 try stdout.print(" array ptr: {:5} MiB/s [{x:0<16}]\n", .{
485 result_ptr.throughput / (1 * MiB),
486 result_ptr.hash,
487 });
488 try stdout.flush();
489 }
490 }
491 }
492 } else {
493 prng.seed(seed);
494 const result_small = try benchmarkHashSmallKeys(H, default_small_key_size, count, allocator);
495 try stdout.print(" small keys: {:3}B {:5} MiB/s {} Hashes/s [{x:0<16}]\n", .{
496 default_small_key_size,
497 result_small.throughput / (1 * MiB),
498 result_small.throughput / default_small_key_size,
499 result_small.hash,
500 });
501 try stdout.flush();
502
503 if (!test_arrays) break :hash;
504 if (H.has_anytype_api) |sizes| {
505 try stdout.print(" array:\n", .{});
506 inline for (sizes) |exact_size| {
507 prng.seed(seed);
508 const result = try benchmarkHashSmallKeysArray(H, exact_size, count, allocator);
509 try stdout.print(" {d: >3}B {:5} MiB/s [{x:0<16}]\n", .{
510 exact_size,
511 result.throughput / (1 * MiB),
512 result.hash,
513 });
514 try stdout.flush();
515 }
516 try stdout.print(" array ptr: \n", .{});
517 inline for (sizes) |exact_size| {
518 prng.seed(seed);
519 const result = try benchmarkHashSmallKeysArrayPtr(H, exact_size, count, allocator);
520 try stdout.print(" {d: >3}B {:5} MiB/s [{x:0<16}]\n", .{
521 exact_size,
522 result.throughput / (1 * MiB),
523 result.hash,
524 });
525 try stdout.flush();
526 }
527 }
528 }
529 }
530 }
531 }
532 }
533}