master
  1// zig run -O ReleaseFast --zig-lib-dir ../.. benchmark.zig
  2
  3const std = @import("std");
  4const builtin = @import("builtin");
  5const time = std.time;
  6const Timer = time.Timer;
  7const hash = std.hash;
  8
  9const KiB = 1024;
 10const MiB = 1024 * KiB;
 11const GiB = 1024 * MiB;
 12
 13var prng = std.Random.DefaultPrng.init(0);
 14const random = prng.random();
 15
 16const Hash = struct {
 17    ty: type,
 18    name: []const u8,
 19    has_iterative_api: bool = true,
 20    has_crypto_api: bool = false,
 21    has_anytype_api: ?[]const comptime_int = null,
 22    /// `final` value should be read from this field.
 23    has_struct_api: ?[]const u8 = null,
 24    init_u8s: ?[]const u8 = null,
 25    init_u64: ?u64 = null,
 26    init_default: bool = false,
 27};
 28
 29const hashes = [_]Hash{
 30    Hash{
 31        .ty = hash.XxHash3,
 32        .name = "xxh3",
 33        .init_u64 = 0,
 34        .has_anytype_api = @as([]const comptime_int, &[_]comptime_int{ 8, 16, 32, 48, 64, 80, 96, 112, 128 }),
 35    },
 36    Hash{
 37        .ty = hash.XxHash64,
 38        .name = "xxhash64",
 39        .init_u64 = 0,
 40        .has_anytype_api = @as([]const comptime_int, &[_]comptime_int{ 8, 16, 32, 48, 64, 80, 96, 112, 128 }),
 41    },
 42    Hash{
 43        .ty = hash.XxHash32,
 44        .name = "xxhash32",
 45        .init_u64 = 0,
 46        .has_anytype_api = @as([]const comptime_int, &[_]comptime_int{ 8, 16, 32, 48, 64, 80, 96, 112, 128 }),
 47    },
 48    Hash{
 49        .ty = hash.Wyhash,
 50        .name = "wyhash",
 51        .init_u64 = 0,
 52    },
 53    Hash{
 54        .ty = hash.Fnv1a_64,
 55        .name = "fnv1a",
 56    },
 57    Hash{
 58        .ty = hash.Adler32,
 59        .name = "adler32",
 60        .has_struct_api = "adler",
 61        .init_default = true,
 62    },
 63    Hash{
 64        .ty = hash.crc.Crc32,
 65        .name = "crc32",
 66    },
 67    Hash{
 68        .ty = hash.CityHash32,
 69        .name = "cityhash-32",
 70        .has_iterative_api = false,
 71    },
 72    Hash{
 73        .ty = hash.CityHash64,
 74        .name = "cityhash-64",
 75        .has_iterative_api = false,
 76    },
 77    Hash{
 78        .ty = hash.Murmur2_32,
 79        .name = "murmur2-32",
 80        .has_iterative_api = false,
 81    },
 82    Hash{
 83        .ty = hash.Murmur2_64,
 84        .name = "murmur2-64",
 85        .has_iterative_api = false,
 86    },
 87    Hash{
 88        .ty = hash.Murmur3_32,
 89        .name = "murmur3-32",
 90        .has_iterative_api = false,
 91    },
 92    Hash{
 93        .ty = hash.SipHash64(1, 3),
 94        .name = "siphash64",
 95        .has_crypto_api = true,
 96        .init_u8s = &[_]u8{0} ** 16,
 97    },
 98    Hash{
 99        .ty = hash.SipHash128(1, 3),
100        .name = "siphash128",
101        .has_crypto_api = true,
102        .init_u8s = &[_]u8{0} ** 16,
103    },
104};
105
106const Result = struct {
107    hash: u64,
108    throughput: u64,
109};
110
111const block_size: usize = 8 * 8192;
112
113pub fn benchmarkHash(comptime H: anytype, bytes: usize, allocator: std.mem.Allocator) !Result {
114    var blocks = try allocator.alloc(u8, bytes);
115    defer allocator.free(blocks);
116    random.bytes(blocks);
117
118    const block_count = bytes / block_size;
119
120    var h: H.ty = blk: {
121        if (H.init_u8s) |init| {
122            break :blk .init(init[0..H.ty.key_length]);
123        }
124        if (H.init_u64) |init| {
125            break :blk .init(init);
126        }
127        if (H.init_default) {
128            break :blk .{};
129        }
130        break :blk .init();
131    };
132
133    var timer = try Timer.start();
134    for (0..block_count) |i| {
135        h.update(blocks[i * block_size ..][0..block_size]);
136    }
137    const final = if (H.has_struct_api) |field_name|
138        @field(h, field_name)
139    else if (H.has_crypto_api)
140        @as(u64, @truncate(h.finalInt()))
141    else
142        h.final();
143    std.mem.doNotOptimizeAway(final);
144
145    const elapsed_ns = timer.read();
146
147    const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
148    const size_float: f64 = @floatFromInt(block_size * block_count);
149    const throughput: u64 = @intFromFloat(size_float / elapsed_s);
150
151    return Result{
152        .hash = final,
153        .throughput = throughput,
154    };
155}
156
157pub fn benchmarkHashSmallKeys(comptime H: anytype, key_size: usize, bytes: usize, allocator: std.mem.Allocator) !Result {
158    var blocks = try allocator.alloc(u8, bytes);
159    defer allocator.free(blocks);
160    random.bytes(blocks);
161
162    const key_count = bytes / key_size;
163
164    var timer = try Timer.start();
165
166    var sum: u64 = 0;
167    for (0..key_count) |i| {
168        const small_key = blocks[i * key_size ..][0..key_size];
169        const final = blk: {
170            if (H.init_u8s) |init| {
171                if (H.has_crypto_api) {
172                    break :blk @as(u64, @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length])));
173                } else {
174                    break :blk H.ty.hash(init, small_key);
175                }
176            }
177            if (H.init_u64) |init| {
178                break :blk H.ty.hash(init, small_key);
179            }
180            break :blk H.ty.hash(small_key);
181        };
182        sum +%= final;
183    }
184    const elapsed_ns = timer.read();
185
186    const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
187    const size_float: f64 = @floatFromInt(key_count * key_size);
188    const throughput: u64 = @intFromFloat(size_float / elapsed_s);
189
190    std.mem.doNotOptimizeAway(sum);
191
192    return Result{
193        .hash = sum,
194        .throughput = throughput,
195    };
196}
197
198// the array and array pointer benchmarks for xxhash are very sensitive to in-lining,
199// if you see strange performance changes consider using `.never_inline` or `.always_inline`
200// to ensure the changes are not only due to the optimiser inlining the benchmark differently
201pub fn benchmarkHashSmallKeysArrayPtr(
202    comptime H: anytype,
203    comptime key_size: usize,
204    bytes: usize,
205    allocator: std.mem.Allocator,
206) !Result {
207    var blocks = try allocator.alloc(u8, bytes);
208    defer allocator.free(blocks);
209    random.bytes(blocks);
210
211    const key_count = bytes / key_size;
212
213    var timer = try Timer.start();
214
215    var sum: u64 = 0;
216    for (0..key_count) |i| {
217        const small_key = blocks[i * key_size ..][0..key_size];
218        const final: u64 = blk: {
219            if (H.init_u8s) |init| {
220                if (H.has_crypto_api) {
221                    break :blk @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length]));
222                } else {
223                    break :blk H.ty.hash(init, small_key);
224                }
225            }
226            if (H.init_u64) |init| {
227                break :blk H.ty.hash(init, small_key);
228            }
229            break :blk H.ty.hash(small_key);
230        };
231        sum +%= final;
232    }
233    const elapsed_ns = timer.read();
234
235    const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
236    const throughput: u64 = @intFromFloat(@as(f64, @floatFromInt(bytes)) / elapsed_s);
237
238    std.mem.doNotOptimizeAway(sum);
239
240    return Result{
241        .hash = sum,
242        .throughput = throughput,
243    };
244}
245
246// the array and array pointer benchmarks for xxhash are very sensitive to in-lining,
247// if you see strange performance changes consider using `.never_inline` or `.always_inline`
248// to ensure the changes are not only due to the optimiser inlining the benchmark differently
249pub fn benchmarkHashSmallKeysArray(
250    comptime H: anytype,
251    comptime key_size: usize,
252    bytes: usize,
253    allocator: std.mem.Allocator,
254) !Result {
255    var blocks = try allocator.alloc(u8, bytes);
256    defer allocator.free(blocks);
257    random.bytes(blocks);
258
259    const key_count = bytes / key_size;
260
261    var i: usize = 0;
262    var timer = try Timer.start();
263
264    var sum: u64 = 0;
265    while (i < key_count) : (i += 1) {
266        const small_key = blocks[i * key_size ..][0..key_size];
267        const final: u64 = blk: {
268            if (H.init_u8s) |init| {
269                if (H.has_crypto_api) {
270                    break :blk @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length]));
271                } else {
272                    break :blk H.ty.hash(init, small_key.*);
273                }
274            }
275            if (H.init_u64) |init| {
276                break :blk H.ty.hash(init, small_key.*);
277            }
278            break :blk H.ty.hash(small_key.*);
279        };
280        sum +%= final;
281    }
282    const elapsed_ns = timer.read();
283
284    const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
285    const throughput: u64 = @intFromFloat(@as(f64, @floatFromInt(bytes)) / elapsed_s);
286
287    std.mem.doNotOptimizeAway(sum);
288
289    return Result{
290        .hash = sum,
291        .throughput = throughput,
292    };
293}
294
295pub fn benchmarkHashSmallApi(comptime H: anytype, key_size: usize, bytes: usize, allocator: std.mem.Allocator) !Result {
296    var blocks = try allocator.alloc(u8, bytes);
297    defer allocator.free(blocks);
298    random.bytes(blocks);
299
300    const key_count = bytes / key_size;
301
302    var timer = try Timer.start();
303
304    var sum: u64 = 0;
305    for (0..key_count) |i| {
306        const small_key = blocks[i * key_size ..][0..key_size];
307        const final: u64 = blk: {
308            if (H.init_u8s) |init| {
309                if (H.has_crypto_api) {
310                    break :blk @truncate(H.ty.toInt(small_key, init[0..H.ty.key_length]));
311                } else {
312                    break :blk H.ty.hashSmall(init, small_key);
313                }
314            }
315            if (H.init_u64) |init| {
316                break :blk H.ty.hashSmall(init, small_key);
317            }
318            break :blk H.ty.hashSmall(small_key);
319        };
320        sum +%= final;
321    }
322    const elapsed_ns = timer.read();
323
324    const elapsed_s = @as(f64, @floatFromInt(elapsed_ns)) / time.ns_per_s;
325    const throughput: u64 = @intFromFloat(@as(f64, @floatFromInt(bytes)) / elapsed_s);
326
327    std.mem.doNotOptimizeAway(sum);
328
329    return Result{
330        .throughput = throughput,
331        .hash = sum,
332    };
333}
334
335fn usage() void {
336    std.debug.print(
337        \\throughput_test [options]
338        \\
339        \\Options:
340        \\  --filter    [test-name]
341        \\  --seed      [int]
342        \\  --count     [int]
343        \\  --key-size  [int]
344        \\  --iterative-only
345        \\  --small-key-only
346        \\  --help
347        \\
348    , .{});
349}
350
351fn mode(comptime x: comptime_int) comptime_int {
352    return if (builtin.mode == .Debug) x / 64 else x;
353}
354
355pub fn main() !void {
356    var stdout_buffer: [0x100]u8 = undefined;
357    var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
358    const stdout = &stdout_writer.interface;
359
360    var buffer: [1024]u8 = undefined;
361    var fixed = std.heap.FixedBufferAllocator.init(buffer[0..]);
362    const args = try std.process.argsAlloc(fixed.allocator());
363
364    var filter: ?[]u8 = "";
365    var count: usize = mode(128 * MiB);
366    var key_size: ?usize = null;
367    var seed: u32 = 0;
368    var test_small_key_only = false;
369    var test_iterative_only = false;
370    var test_arrays = false;
371
372    const default_small_key_size = 32;
373
374    var i: usize = 1;
375    while (i < args.len) : (i += 1) {
376        if (std.mem.eql(u8, args[i], "--mode")) {
377            try stdout.print("{}\n", .{builtin.mode});
378            try stdout.flush();
379            return;
380        } else if (std.mem.eql(u8, args[i], "--seed")) {
381            i += 1;
382            if (i == args.len) {
383                usage();
384                std.process.exit(1);
385            }
386
387            seed = try std.fmt.parseUnsigned(u32, args[i], 10);
388            // we seed later
389        } else if (std.mem.eql(u8, args[i], "--filter")) {
390            i += 1;
391            if (i == args.len) {
392                usage();
393                std.process.exit(1);
394            }
395
396            filter = args[i];
397        } else if (std.mem.eql(u8, args[i], "--count")) {
398            i += 1;
399            if (i == args.len) {
400                usage();
401                std.process.exit(1);
402            }
403
404            const c = try std.fmt.parseUnsigned(usize, args[i], 10);
405            count = c * MiB;
406        } else if (std.mem.eql(u8, args[i], "--key-size")) {
407            i += 1;
408            if (i == args.len) {
409                usage();
410                std.process.exit(1);
411            }
412
413            key_size = try std.fmt.parseUnsigned(usize, args[i], 10);
414            if (key_size.? > block_size) {
415                try stdout.print("key_size cannot exceed block size of {}\n", .{block_size});
416                try stdout.flush();
417                std.process.exit(1);
418            }
419        } else if (std.mem.eql(u8, args[i], "--iterative-only")) {
420            test_iterative_only = true;
421        } else if (std.mem.eql(u8, args[i], "--small-key-only")) {
422            test_small_key_only = true;
423        } else if (std.mem.eql(u8, args[i], "--include-array")) {
424            test_arrays = true;
425        } else if (std.mem.eql(u8, args[i], "--help")) {
426            usage();
427            return;
428        } else {
429            usage();
430            std.process.exit(1);
431        }
432    }
433
434    if (test_iterative_only and test_small_key_only) {
435        try stdout.print("Cannot use iterative-only and small-key-only together!\n", .{});
436        try stdout.flush();
437        usage();
438        std.process.exit(1);
439    }
440
441    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
442    defer std.testing.expect(gpa.deinit() == .ok) catch @panic("leak");
443    const allocator = gpa.allocator();
444
445    inline for (hashes) |H| {
446        if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) hash: {
447            if (!test_iterative_only or H.has_iterative_api) {
448                try stdout.print("{s}\n", .{H.name});
449                try stdout.flush();
450
451                // Always reseed prior to every call so we are hashing the same buffer contents.
452                // This allows easier comparison between different implementations.
453                if (H.has_iterative_api and !test_small_key_only) {
454                    prng.seed(seed);
455                    const result = try benchmarkHash(H, count, allocator);
456                    try stdout.print("   iterative: {:5} MiB/s [{x:0<16}]\n", .{ result.throughput / (1 * MiB), result.hash });
457                    try stdout.flush();
458                }
459
460                if (!test_iterative_only) {
461                    if (key_size) |size| {
462                        prng.seed(seed);
463                        const result_small = try benchmarkHashSmallKeys(H, size, count, allocator);
464                        try stdout.print("  small keys: {:3}B {:5} MiB/s {} Hashes/s [{x:0<16}]\n", .{
465                            size,
466                            result_small.throughput / (1 * MiB),
467                            result_small.throughput / size,
468                            result_small.hash,
469                        });
470                        try stdout.flush();
471
472                        if (!test_arrays) break :hash;
473                        if (H.has_anytype_api) |sizes| {
474                            inline for (sizes) |exact_size| {
475                                if (size == exact_size) {
476                                    prng.seed(seed);
477                                    const result_array = try benchmarkHashSmallKeysArray(H, exact_size, count, allocator);
478                                    prng.seed(seed);
479                                    const result_ptr = try benchmarkHashSmallKeysArrayPtr(H, exact_size, count, allocator);
480                                    try stdout.print("       array: {:5} MiB/s [{x:0<16}]\n", .{
481                                        result_array.throughput / (1 * MiB),
482                                        result_array.hash,
483                                    });
484                                    try stdout.print("   array ptr: {:5} MiB/s [{x:0<16}]\n", .{
485                                        result_ptr.throughput / (1 * MiB),
486                                        result_ptr.hash,
487                                    });
488                                    try stdout.flush();
489                                }
490                            }
491                        }
492                    } else {
493                        prng.seed(seed);
494                        const result_small = try benchmarkHashSmallKeys(H, default_small_key_size, count, allocator);
495                        try stdout.print("  small keys: {:3}B {:5} MiB/s {} Hashes/s [{x:0<16}]\n", .{
496                            default_small_key_size,
497                            result_small.throughput / (1 * MiB),
498                            result_small.throughput / default_small_key_size,
499                            result_small.hash,
500                        });
501                        try stdout.flush();
502
503                        if (!test_arrays) break :hash;
504                        if (H.has_anytype_api) |sizes| {
505                            try stdout.print("       array:\n", .{});
506                            inline for (sizes) |exact_size| {
507                                prng.seed(seed);
508                                const result = try benchmarkHashSmallKeysArray(H, exact_size, count, allocator);
509                                try stdout.print("       {d: >3}B {:5} MiB/s [{x:0<16}]\n", .{
510                                    exact_size,
511                                    result.throughput / (1 * MiB),
512                                    result.hash,
513                                });
514                                try stdout.flush();
515                            }
516                            try stdout.print("   array ptr: \n", .{});
517                            inline for (sizes) |exact_size| {
518                                prng.seed(seed);
519                                const result = try benchmarkHashSmallKeysArrayPtr(H, exact_size, count, allocator);
520                                try stdout.print("       {d: >3}B {:5} MiB/s [{x:0<16}]\n", .{
521                                    exact_size,
522                                    result.throughput / (1 * MiB),
523                                    result.hash,
524                                });
525                                try stdout.flush();
526                            }
527                        }
528                    }
529                }
530            }
531        }
532    }
533}