master
  1/// This is a thin wrapper around a primitive value to prevent accidental data races.
  2pub fn Value(comptime T: type) type {
  3    return extern struct {
  4        /// Care must be taken to avoid data races when interacting with this field directly.
  5        raw: T,
  6
  7        const Self = @This();
  8
  9        pub fn init(value: T) Self {
 10            return .{ .raw = value };
 11        }
 12
 13        pub inline fn load(self: *const Self, comptime order: AtomicOrder) T {
 14            return @atomicLoad(T, &self.raw, order);
 15        }
 16
 17        pub inline fn store(self: *Self, value: T, comptime order: AtomicOrder) void {
 18            @atomicStore(T, &self.raw, value, order);
 19        }
 20
 21        pub inline fn swap(self: *Self, operand: T, comptime order: AtomicOrder) T {
 22            return @atomicRmw(T, &self.raw, .Xchg, operand, order);
 23        }
 24
 25        pub inline fn cmpxchgWeak(
 26            self: *Self,
 27            expected_value: T,
 28            new_value: T,
 29            comptime success_order: AtomicOrder,
 30            comptime fail_order: AtomicOrder,
 31        ) ?T {
 32            return @cmpxchgWeak(T, &self.raw, expected_value, new_value, success_order, fail_order);
 33        }
 34
 35        pub inline fn cmpxchgStrong(
 36            self: *Self,
 37            expected_value: T,
 38            new_value: T,
 39            comptime success_order: AtomicOrder,
 40            comptime fail_order: AtomicOrder,
 41        ) ?T {
 42            return @cmpxchgStrong(T, &self.raw, expected_value, new_value, success_order, fail_order);
 43        }
 44
 45        pub inline fn fetchAdd(self: *Self, operand: T, comptime order: AtomicOrder) T {
 46            return @atomicRmw(T, &self.raw, .Add, operand, order);
 47        }
 48
 49        pub inline fn fetchSub(self: *Self, operand: T, comptime order: AtomicOrder) T {
 50            return @atomicRmw(T, &self.raw, .Sub, operand, order);
 51        }
 52
 53        pub inline fn fetchMin(self: *Self, operand: T, comptime order: AtomicOrder) T {
 54            return @atomicRmw(T, &self.raw, .Min, operand, order);
 55        }
 56
 57        pub inline fn fetchMax(self: *Self, operand: T, comptime order: AtomicOrder) T {
 58            return @atomicRmw(T, &self.raw, .Max, operand, order);
 59        }
 60
 61        pub inline fn fetchAnd(self: *Self, operand: T, comptime order: AtomicOrder) T {
 62            return @atomicRmw(T, &self.raw, .And, operand, order);
 63        }
 64
 65        pub inline fn fetchNand(self: *Self, operand: T, comptime order: AtomicOrder) T {
 66            return @atomicRmw(T, &self.raw, .Nand, operand, order);
 67        }
 68
 69        pub inline fn fetchXor(self: *Self, operand: T, comptime order: AtomicOrder) T {
 70            return @atomicRmw(T, &self.raw, .Xor, operand, order);
 71        }
 72
 73        pub inline fn fetchOr(self: *Self, operand: T, comptime order: AtomicOrder) T {
 74            return @atomicRmw(T, &self.raw, .Or, operand, order);
 75        }
 76
 77        pub inline fn rmw(
 78            self: *Self,
 79            comptime op: std.builtin.AtomicRmwOp,
 80            operand: T,
 81            comptime order: AtomicOrder,
 82        ) T {
 83            return @atomicRmw(T, &self.raw, op, operand, order);
 84        }
 85
 86        const Bit = std.math.Log2Int(T);
 87
 88        /// Marked `inline` so that if `bit` is comptime-known, the instruction
 89        /// can be lowered to a more efficient machine code instruction if
 90        /// possible.
 91        pub inline fn bitSet(self: *Self, bit: Bit, comptime order: AtomicOrder) u1 {
 92            const mask = @as(T, 1) << bit;
 93            const value = self.fetchOr(mask, order);
 94            return @intFromBool(value & mask != 0);
 95        }
 96
 97        /// Marked `inline` so that if `bit` is comptime-known, the instruction
 98        /// can be lowered to a more efficient machine code instruction if
 99        /// possible.
100        pub inline fn bitReset(self: *Self, bit: Bit, comptime order: AtomicOrder) u1 {
101            const mask = @as(T, 1) << bit;
102            const value = self.fetchAnd(~mask, order);
103            return @intFromBool(value & mask != 0);
104        }
105
106        /// Marked `inline` so that if `bit` is comptime-known, the instruction
107        /// can be lowered to a more efficient machine code instruction if
108        /// possible.
109        pub inline fn bitToggle(self: *Self, bit: Bit, comptime order: AtomicOrder) u1 {
110            const mask = @as(T, 1) << bit;
111            const value = self.fetchXor(mask, order);
112            return @intFromBool(value & mask != 0);
113        }
114    };
115}
116
117test Value {
118    const RefCount = struct {
119        count: Value(usize),
120        dropFn: *const fn (*RefCount) void,
121
122        const RefCount = @This();
123
124        fn ref(rc: *RefCount) void {
125            // no synchronization necessary; just updating a counter.
126            _ = rc.count.fetchAdd(1, .monotonic);
127        }
128
129        fn unref(rc: *RefCount) void {
130            // release ensures code before unref() happens-before the
131            // count is decremented as dropFn could be called by then.
132            if (rc.count.fetchSub(1, .release) == 1) {
133                // seeing 1 in the counter means that other unref()s have happened,
134                // but it doesn't mean that uses before each unref() are visible.
135                // The load acquires the release-sequence created by previous unref()s
136                // in order to ensure visibility of uses before dropping.
137                _ = rc.count.load(.acquire);
138                (rc.dropFn)(rc);
139            }
140        }
141
142        fn noop(rc: *RefCount) void {
143            _ = rc;
144        }
145    };
146
147    var ref_count: RefCount = .{
148        .count = Value(usize).init(0),
149        .dropFn = RefCount.noop,
150    };
151    ref_count.ref();
152    ref_count.unref();
153}
154
155test "Value.swap" {
156    var x = Value(usize).init(5);
157    try testing.expectEqual(@as(usize, 5), x.swap(10, .seq_cst));
158    try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
159
160    const E = enum(usize) { a, b, c };
161    var y = Value(E).init(.c);
162    try testing.expectEqual(E.c, y.swap(.a, .seq_cst));
163    try testing.expectEqual(E.a, y.load(.seq_cst));
164
165    var z = Value(f32).init(5.0);
166    try testing.expectEqual(@as(f32, 5.0), z.swap(10.0, .seq_cst));
167    try testing.expectEqual(@as(f32, 10.0), z.load(.seq_cst));
168
169    var a = Value(bool).init(false);
170    try testing.expectEqual(false, a.swap(true, .seq_cst));
171    try testing.expectEqual(true, a.load(.seq_cst));
172
173    var b = Value(?*u8).init(null);
174    try testing.expectEqual(@as(?*u8, null), b.swap(@as(?*u8, @ptrFromInt(@alignOf(u8))), .seq_cst));
175    try testing.expectEqual(@as(?*u8, @ptrFromInt(@alignOf(u8))), b.load(.seq_cst));
176}
177
178test "Value.store" {
179    var x = Value(usize).init(5);
180    x.store(10, .seq_cst);
181    try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
182}
183
184test "Value.cmpxchgWeak" {
185    var x = Value(usize).init(0);
186
187    try testing.expectEqual(@as(?usize, 0), x.cmpxchgWeak(1, 0, .seq_cst, .seq_cst));
188    try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
189
190    while (x.cmpxchgWeak(0, 1, .seq_cst, .seq_cst)) |_| {}
191    try testing.expectEqual(@as(usize, 1), x.load(.seq_cst));
192
193    while (x.cmpxchgWeak(1, 0, .seq_cst, .seq_cst)) |_| {}
194    try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
195}
196
197test "Value.cmpxchgStrong" {
198    var x = Value(usize).init(0);
199    try testing.expectEqual(@as(?usize, 0), x.cmpxchgStrong(1, 0, .seq_cst, .seq_cst));
200    try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
201    try testing.expectEqual(@as(?usize, null), x.cmpxchgStrong(0, 1, .seq_cst, .seq_cst));
202    try testing.expectEqual(@as(usize, 1), x.load(.seq_cst));
203    try testing.expectEqual(@as(?usize, null), x.cmpxchgStrong(1, 0, .seq_cst, .seq_cst));
204    try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
205}
206
207test "Value.fetchAdd" {
208    var x = Value(usize).init(5);
209    try testing.expectEqual(@as(usize, 5), x.fetchAdd(5, .seq_cst));
210    try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
211    try testing.expectEqual(@as(usize, 10), x.fetchAdd(std.math.maxInt(usize), .seq_cst));
212    try testing.expectEqual(@as(usize, 9), x.load(.seq_cst));
213}
214
215test "Value.fetchSub" {
216    var x = Value(usize).init(5);
217    try testing.expectEqual(@as(usize, 5), x.fetchSub(5, .seq_cst));
218    try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
219    try testing.expectEqual(@as(usize, 0), x.fetchSub(1, .seq_cst));
220    try testing.expectEqual(@as(usize, std.math.maxInt(usize)), x.load(.seq_cst));
221}
222
223test "Value.fetchMin" {
224    var x = Value(usize).init(5);
225    try testing.expectEqual(@as(usize, 5), x.fetchMin(0, .seq_cst));
226    try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
227    try testing.expectEqual(@as(usize, 0), x.fetchMin(10, .seq_cst));
228    try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
229}
230
231test "Value.fetchMax" {
232    var x = Value(usize).init(5);
233    try testing.expectEqual(@as(usize, 5), x.fetchMax(10, .seq_cst));
234    try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
235    try testing.expectEqual(@as(usize, 10), x.fetchMax(5, .seq_cst));
236    try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
237}
238
239test "Value.fetchAnd" {
240    var x = Value(usize).init(0b11);
241    try testing.expectEqual(@as(usize, 0b11), x.fetchAnd(0b10, .seq_cst));
242    try testing.expectEqual(@as(usize, 0b10), x.load(.seq_cst));
243    try testing.expectEqual(@as(usize, 0b10), x.fetchAnd(0b00, .seq_cst));
244    try testing.expectEqual(@as(usize, 0b00), x.load(.seq_cst));
245}
246
247test "Value.fetchNand" {
248    var x = Value(usize).init(0b11);
249    try testing.expectEqual(@as(usize, 0b11), x.fetchNand(0b10, .seq_cst));
250    try testing.expectEqual(~@as(usize, 0b10), x.load(.seq_cst));
251    try testing.expectEqual(~@as(usize, 0b10), x.fetchNand(0b00, .seq_cst));
252    try testing.expectEqual(~@as(usize, 0b00), x.load(.seq_cst));
253}
254
255test "Value.fetchOr" {
256    var x = Value(usize).init(0b11);
257    try testing.expectEqual(@as(usize, 0b11), x.fetchOr(0b100, .seq_cst));
258    try testing.expectEqual(@as(usize, 0b111), x.load(.seq_cst));
259    try testing.expectEqual(@as(usize, 0b111), x.fetchOr(0b010, .seq_cst));
260    try testing.expectEqual(@as(usize, 0b111), x.load(.seq_cst));
261}
262
263test "Value.fetchXor" {
264    var x = Value(usize).init(0b11);
265    try testing.expectEqual(@as(usize, 0b11), x.fetchXor(0b10, .seq_cst));
266    try testing.expectEqual(@as(usize, 0b01), x.load(.seq_cst));
267    try testing.expectEqual(@as(usize, 0b01), x.fetchXor(0b01, .seq_cst));
268    try testing.expectEqual(@as(usize, 0b00), x.load(.seq_cst));
269}
270
271test "Value.bitSet" {
272    var x = Value(usize).init(0);
273
274    for (0..@bitSizeOf(usize)) |bit_index| {
275        const bit = @as(std.math.Log2Int(usize), @intCast(bit_index));
276        const mask = @as(usize, 1) << bit;
277
278        // setting the bit should change the bit
279        try testing.expect(x.load(.seq_cst) & mask == 0);
280        try testing.expectEqual(@as(u1, 0), x.bitSet(bit, .seq_cst));
281        try testing.expect(x.load(.seq_cst) & mask != 0);
282
283        // setting it again shouldn't change the bit
284        try testing.expectEqual(@as(u1, 1), x.bitSet(bit, .seq_cst));
285        try testing.expect(x.load(.seq_cst) & mask != 0);
286
287        // all the previous bits should have not changed (still be set)
288        for (0..bit_index) |prev_bit_index| {
289            const prev_bit = @as(std.math.Log2Int(usize), @intCast(prev_bit_index));
290            const prev_mask = @as(usize, 1) << prev_bit;
291            try testing.expect(x.load(.seq_cst) & prev_mask != 0);
292        }
293    }
294}
295
296test "Value.bitReset" {
297    var x = Value(usize).init(0);
298
299    for (0..@bitSizeOf(usize)) |bit_index| {
300        const bit = @as(std.math.Log2Int(usize), @intCast(bit_index));
301        const mask = @as(usize, 1) << bit;
302        x.raw |= mask;
303
304        // unsetting the bit should change the bit
305        try testing.expect(x.load(.seq_cst) & mask != 0);
306        try testing.expectEqual(@as(u1, 1), x.bitReset(bit, .seq_cst));
307        try testing.expect(x.load(.seq_cst) & mask == 0);
308
309        // unsetting it again shouldn't change the bit
310        try testing.expectEqual(@as(u1, 0), x.bitReset(bit, .seq_cst));
311        try testing.expect(x.load(.seq_cst) & mask == 0);
312
313        // all the previous bits should have not changed (still be reset)
314        for (0..bit_index) |prev_bit_index| {
315            const prev_bit = @as(std.math.Log2Int(usize), @intCast(prev_bit_index));
316            const prev_mask = @as(usize, 1) << prev_bit;
317            try testing.expect(x.load(.seq_cst) & prev_mask == 0);
318        }
319    }
320}
321
322test "Value.bitToggle" {
323    var x = Value(usize).init(0);
324
325    for (0..@bitSizeOf(usize)) |bit_index| {
326        const bit = @as(std.math.Log2Int(usize), @intCast(bit_index));
327        const mask = @as(usize, 1) << bit;
328
329        // toggling the bit should change the bit
330        try testing.expect(x.load(.seq_cst) & mask == 0);
331        try testing.expectEqual(@as(u1, 0), x.bitToggle(bit, .seq_cst));
332        try testing.expect(x.load(.seq_cst) & mask != 0);
333
334        // toggling it again *should* change the bit
335        try testing.expectEqual(@as(u1, 1), x.bitToggle(bit, .seq_cst));
336        try testing.expect(x.load(.seq_cst) & mask == 0);
337
338        // all the previous bits should have not changed (still be toggled back)
339        for (0..bit_index) |prev_bit_index| {
340            const prev_bit = @as(std.math.Log2Int(usize), @intCast(prev_bit_index));
341            const prev_mask = @as(usize, 1) << prev_bit;
342            try testing.expect(x.load(.seq_cst) & prev_mask == 0);
343        }
344    }
345}
346
347/// Signals to the processor that the caller is inside a busy-wait spin-loop.
348pub inline fn spinLoopHint() void {
349    switch (builtin.target.cpu.arch) {
350        // No-op instruction that can hint to save (or share with a hardware-thread)
351        // pipelining/power resources
352        // https://software.intel.com/content/www/us/en/develop/articles/benefitting-power-and-performance-sleep-loops.html
353        .x86,
354        .x86_64,
355        => asm volatile ("pause"),
356
357        // No-op instruction that serves as a hardware-thread resource yield hint.
358        // https://stackoverflow.com/a/7588941
359        .powerpc,
360        .powerpcle,
361        .powerpc64,
362        .powerpc64le,
363        => asm volatile ("or 27, 27, 27"),
364
365        // `isb` appears more reliable for releasing execution resources than `yield`
366        // on common aarch64 CPUs.
367        // https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8258604
368        // https://bugs.mysql.com/bug.php?id=100664
369        .aarch64,
370        .aarch64_be,
371        => asm volatile ("isb"),
372
373        // `yield` was introduced in v6k but is also available on v6m.
374        // https://www.keil.com/support/man/docs/armasm/armasm_dom1361289926796.htm
375        .arm,
376        .armeb,
377        .thumb,
378        .thumbeb,
379        => if (comptime builtin.cpu.hasAny(.arm, &.{ .has_v6k, .has_v6m })) {
380            asm volatile ("yield");
381        },
382
383        // The 8-bit immediate specifies the amount of cycles to pause for. We can't really be too
384        // opinionated here.
385        .hexagon,
386        => asm volatile ("pause(#1)"),
387
388        .riscv32,
389        .riscv32be,
390        .riscv64,
391        .riscv64be,
392        => if (comptime builtin.cpu.has(.riscv, .zihintpause)) {
393            asm volatile ("pause");
394        },
395
396        else => {},
397    }
398}
399
400test spinLoopHint {
401    for (0..10) |_| {
402        spinLoopHint();
403    }
404}
405
406pub fn cacheLineForCpu(cpu: std.Target.Cpu) u16 {
407    return switch (cpu.arch) {
408        // x86_64: Starting from Intel's Sandy Bridge, the spatial prefetcher pulls in pairs of 64-byte cache lines at a time.
409        // - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
410        // - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107
411        //
412        // aarch64: Some big.LITTLE ARM archs have "big" cores with 128-byte cache lines:
413        // - https://www.mono-project.com/news/2016/09/12/arm64-icache/
414        // - https://cpufun.substack.com/p/more-m1-fun-hardware-information
415        //
416        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/arc/Kconfig#L212
417        // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
418        .x86_64,
419        .aarch64,
420        .aarch64_be,
421        .arc,
422        .arceb,
423        .powerpc64,
424        .powerpc64le,
425        => 128,
426
427        // https://github.com/llvm/llvm-project/blob/e379094328e49731a606304f7e3559d4f1fa96f9/clang/lib/Basic/Targets/Hexagon.h#L145-L151
428        .hexagon,
429        => if (cpu.has(.hexagon, .v73)) 64 else 32,
430
431        // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7
432        // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7
433        // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7
434        // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9
435        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/sparc/include/asm/cache.h#L14
436        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/microblaze/include/asm/cache.h#L15
437        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/sh/include/cpu-sh4/cpu/cache.h#L10
438        .arm,
439        .armeb,
440        .thumb,
441        .thumbeb,
442        .microblaze,
443        .microblazeel,
444        .mips,
445        .mipsel,
446        .mips64,
447        .mips64el,
448        .sh,
449        .sheb,
450        .sparc,
451        .sparc64,
452        => 32,
453
454        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/m68k/include/asm/cache.h#L10
455        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/openrisc/include/asm/cache.h#L24
456        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/parisc/include/asm/cache.h#L16
457        .hppa,
458        .hppa64,
459        .m68k,
460        .or1k,
461        => 16,
462
463        // - https://www.ti.com/lit/pdf/slaa498
464        .msp430,
465        => 8,
466
467        // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7
468        // - https://sxauroratsubasa.sakura.ne.jp/documents/guide/pdfs/Aurora_ISA_guide.pdf
469        .s390x,
470        .ve,
471        => 256,
472
473        // Other x86 and WASM platforms have 64-byte cache lines.
474        // The rest of the architectures are assumed to be similar.
475        // - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9
476        // - https://github.com/golang/go/blob/0a9321ad7f8c91e1b0c7184731257df923977eb9/src/internal/cpu/cpu_loong64.go#L11
477        // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7
478        // - https://github.com/golang/go/blob/19e923182e590ae6568c2c714f20f32512aeb3e3/src/internal/cpu/cpu_riscv64.go#L7
479        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/xtensa/variants/csp/include/variant/core.h#L209
480        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/csky/Kconfig#L183
481        // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/alpha/include/asm/cache.h#L11
482        // - https://www.xmos.com/download/The-XMOS-XS3-Architecture.pdf
483        else => 64,
484    };
485}
486
487/// The estimated size of the CPU's cache line when atomically updating memory.
488/// Add this much padding or align to this boundary to avoid atomically-updated
489/// memory from forcing cache invalidations on near, but non-atomic, memory.
490///
491/// https://en.wikipedia.org/wiki/False_sharing
492/// https://github.com/golang/go/search?q=CacheLinePadSize
493pub const cache_line: comptime_int = cacheLineForCpu(builtin.cpu);
494
495test "current CPU has a cache line size" {
496    _ = cache_line;
497}
498
499const std = @import("std.zig");
500const builtin = @import("builtin");
501const AtomicOrder = std.builtin.AtomicOrder;
502const testing = std.testing;