master
1/// This is a thin wrapper around a primitive value to prevent accidental data races.
2pub fn Value(comptime T: type) type {
3 return extern struct {
4 /// Care must be taken to avoid data races when interacting with this field directly.
5 raw: T,
6
7 const Self = @This();
8
9 pub fn init(value: T) Self {
10 return .{ .raw = value };
11 }
12
13 pub inline fn load(self: *const Self, comptime order: AtomicOrder) T {
14 return @atomicLoad(T, &self.raw, order);
15 }
16
17 pub inline fn store(self: *Self, value: T, comptime order: AtomicOrder) void {
18 @atomicStore(T, &self.raw, value, order);
19 }
20
21 pub inline fn swap(self: *Self, operand: T, comptime order: AtomicOrder) T {
22 return @atomicRmw(T, &self.raw, .Xchg, operand, order);
23 }
24
25 pub inline fn cmpxchgWeak(
26 self: *Self,
27 expected_value: T,
28 new_value: T,
29 comptime success_order: AtomicOrder,
30 comptime fail_order: AtomicOrder,
31 ) ?T {
32 return @cmpxchgWeak(T, &self.raw, expected_value, new_value, success_order, fail_order);
33 }
34
35 pub inline fn cmpxchgStrong(
36 self: *Self,
37 expected_value: T,
38 new_value: T,
39 comptime success_order: AtomicOrder,
40 comptime fail_order: AtomicOrder,
41 ) ?T {
42 return @cmpxchgStrong(T, &self.raw, expected_value, new_value, success_order, fail_order);
43 }
44
45 pub inline fn fetchAdd(self: *Self, operand: T, comptime order: AtomicOrder) T {
46 return @atomicRmw(T, &self.raw, .Add, operand, order);
47 }
48
49 pub inline fn fetchSub(self: *Self, operand: T, comptime order: AtomicOrder) T {
50 return @atomicRmw(T, &self.raw, .Sub, operand, order);
51 }
52
53 pub inline fn fetchMin(self: *Self, operand: T, comptime order: AtomicOrder) T {
54 return @atomicRmw(T, &self.raw, .Min, operand, order);
55 }
56
57 pub inline fn fetchMax(self: *Self, operand: T, comptime order: AtomicOrder) T {
58 return @atomicRmw(T, &self.raw, .Max, operand, order);
59 }
60
61 pub inline fn fetchAnd(self: *Self, operand: T, comptime order: AtomicOrder) T {
62 return @atomicRmw(T, &self.raw, .And, operand, order);
63 }
64
65 pub inline fn fetchNand(self: *Self, operand: T, comptime order: AtomicOrder) T {
66 return @atomicRmw(T, &self.raw, .Nand, operand, order);
67 }
68
69 pub inline fn fetchXor(self: *Self, operand: T, comptime order: AtomicOrder) T {
70 return @atomicRmw(T, &self.raw, .Xor, operand, order);
71 }
72
73 pub inline fn fetchOr(self: *Self, operand: T, comptime order: AtomicOrder) T {
74 return @atomicRmw(T, &self.raw, .Or, operand, order);
75 }
76
77 pub inline fn rmw(
78 self: *Self,
79 comptime op: std.builtin.AtomicRmwOp,
80 operand: T,
81 comptime order: AtomicOrder,
82 ) T {
83 return @atomicRmw(T, &self.raw, op, operand, order);
84 }
85
86 const Bit = std.math.Log2Int(T);
87
88 /// Marked `inline` so that if `bit` is comptime-known, the instruction
89 /// can be lowered to a more efficient machine code instruction if
90 /// possible.
91 pub inline fn bitSet(self: *Self, bit: Bit, comptime order: AtomicOrder) u1 {
92 const mask = @as(T, 1) << bit;
93 const value = self.fetchOr(mask, order);
94 return @intFromBool(value & mask != 0);
95 }
96
97 /// Marked `inline` so that if `bit` is comptime-known, the instruction
98 /// can be lowered to a more efficient machine code instruction if
99 /// possible.
100 pub inline fn bitReset(self: *Self, bit: Bit, comptime order: AtomicOrder) u1 {
101 const mask = @as(T, 1) << bit;
102 const value = self.fetchAnd(~mask, order);
103 return @intFromBool(value & mask != 0);
104 }
105
106 /// Marked `inline` so that if `bit` is comptime-known, the instruction
107 /// can be lowered to a more efficient machine code instruction if
108 /// possible.
109 pub inline fn bitToggle(self: *Self, bit: Bit, comptime order: AtomicOrder) u1 {
110 const mask = @as(T, 1) << bit;
111 const value = self.fetchXor(mask, order);
112 return @intFromBool(value & mask != 0);
113 }
114 };
115}
116
117test Value {
118 const RefCount = struct {
119 count: Value(usize),
120 dropFn: *const fn (*RefCount) void,
121
122 const RefCount = @This();
123
124 fn ref(rc: *RefCount) void {
125 // no synchronization necessary; just updating a counter.
126 _ = rc.count.fetchAdd(1, .monotonic);
127 }
128
129 fn unref(rc: *RefCount) void {
130 // release ensures code before unref() happens-before the
131 // count is decremented as dropFn could be called by then.
132 if (rc.count.fetchSub(1, .release) == 1) {
133 // seeing 1 in the counter means that other unref()s have happened,
134 // but it doesn't mean that uses before each unref() are visible.
135 // The load acquires the release-sequence created by previous unref()s
136 // in order to ensure visibility of uses before dropping.
137 _ = rc.count.load(.acquire);
138 (rc.dropFn)(rc);
139 }
140 }
141
142 fn noop(rc: *RefCount) void {
143 _ = rc;
144 }
145 };
146
147 var ref_count: RefCount = .{
148 .count = Value(usize).init(0),
149 .dropFn = RefCount.noop,
150 };
151 ref_count.ref();
152 ref_count.unref();
153}
154
155test "Value.swap" {
156 var x = Value(usize).init(5);
157 try testing.expectEqual(@as(usize, 5), x.swap(10, .seq_cst));
158 try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
159
160 const E = enum(usize) { a, b, c };
161 var y = Value(E).init(.c);
162 try testing.expectEqual(E.c, y.swap(.a, .seq_cst));
163 try testing.expectEqual(E.a, y.load(.seq_cst));
164
165 var z = Value(f32).init(5.0);
166 try testing.expectEqual(@as(f32, 5.0), z.swap(10.0, .seq_cst));
167 try testing.expectEqual(@as(f32, 10.0), z.load(.seq_cst));
168
169 var a = Value(bool).init(false);
170 try testing.expectEqual(false, a.swap(true, .seq_cst));
171 try testing.expectEqual(true, a.load(.seq_cst));
172
173 var b = Value(?*u8).init(null);
174 try testing.expectEqual(@as(?*u8, null), b.swap(@as(?*u8, @ptrFromInt(@alignOf(u8))), .seq_cst));
175 try testing.expectEqual(@as(?*u8, @ptrFromInt(@alignOf(u8))), b.load(.seq_cst));
176}
177
178test "Value.store" {
179 var x = Value(usize).init(5);
180 x.store(10, .seq_cst);
181 try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
182}
183
184test "Value.cmpxchgWeak" {
185 var x = Value(usize).init(0);
186
187 try testing.expectEqual(@as(?usize, 0), x.cmpxchgWeak(1, 0, .seq_cst, .seq_cst));
188 try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
189
190 while (x.cmpxchgWeak(0, 1, .seq_cst, .seq_cst)) |_| {}
191 try testing.expectEqual(@as(usize, 1), x.load(.seq_cst));
192
193 while (x.cmpxchgWeak(1, 0, .seq_cst, .seq_cst)) |_| {}
194 try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
195}
196
197test "Value.cmpxchgStrong" {
198 var x = Value(usize).init(0);
199 try testing.expectEqual(@as(?usize, 0), x.cmpxchgStrong(1, 0, .seq_cst, .seq_cst));
200 try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
201 try testing.expectEqual(@as(?usize, null), x.cmpxchgStrong(0, 1, .seq_cst, .seq_cst));
202 try testing.expectEqual(@as(usize, 1), x.load(.seq_cst));
203 try testing.expectEqual(@as(?usize, null), x.cmpxchgStrong(1, 0, .seq_cst, .seq_cst));
204 try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
205}
206
207test "Value.fetchAdd" {
208 var x = Value(usize).init(5);
209 try testing.expectEqual(@as(usize, 5), x.fetchAdd(5, .seq_cst));
210 try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
211 try testing.expectEqual(@as(usize, 10), x.fetchAdd(std.math.maxInt(usize), .seq_cst));
212 try testing.expectEqual(@as(usize, 9), x.load(.seq_cst));
213}
214
215test "Value.fetchSub" {
216 var x = Value(usize).init(5);
217 try testing.expectEqual(@as(usize, 5), x.fetchSub(5, .seq_cst));
218 try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
219 try testing.expectEqual(@as(usize, 0), x.fetchSub(1, .seq_cst));
220 try testing.expectEqual(@as(usize, std.math.maxInt(usize)), x.load(.seq_cst));
221}
222
223test "Value.fetchMin" {
224 var x = Value(usize).init(5);
225 try testing.expectEqual(@as(usize, 5), x.fetchMin(0, .seq_cst));
226 try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
227 try testing.expectEqual(@as(usize, 0), x.fetchMin(10, .seq_cst));
228 try testing.expectEqual(@as(usize, 0), x.load(.seq_cst));
229}
230
231test "Value.fetchMax" {
232 var x = Value(usize).init(5);
233 try testing.expectEqual(@as(usize, 5), x.fetchMax(10, .seq_cst));
234 try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
235 try testing.expectEqual(@as(usize, 10), x.fetchMax(5, .seq_cst));
236 try testing.expectEqual(@as(usize, 10), x.load(.seq_cst));
237}
238
239test "Value.fetchAnd" {
240 var x = Value(usize).init(0b11);
241 try testing.expectEqual(@as(usize, 0b11), x.fetchAnd(0b10, .seq_cst));
242 try testing.expectEqual(@as(usize, 0b10), x.load(.seq_cst));
243 try testing.expectEqual(@as(usize, 0b10), x.fetchAnd(0b00, .seq_cst));
244 try testing.expectEqual(@as(usize, 0b00), x.load(.seq_cst));
245}
246
247test "Value.fetchNand" {
248 var x = Value(usize).init(0b11);
249 try testing.expectEqual(@as(usize, 0b11), x.fetchNand(0b10, .seq_cst));
250 try testing.expectEqual(~@as(usize, 0b10), x.load(.seq_cst));
251 try testing.expectEqual(~@as(usize, 0b10), x.fetchNand(0b00, .seq_cst));
252 try testing.expectEqual(~@as(usize, 0b00), x.load(.seq_cst));
253}
254
255test "Value.fetchOr" {
256 var x = Value(usize).init(0b11);
257 try testing.expectEqual(@as(usize, 0b11), x.fetchOr(0b100, .seq_cst));
258 try testing.expectEqual(@as(usize, 0b111), x.load(.seq_cst));
259 try testing.expectEqual(@as(usize, 0b111), x.fetchOr(0b010, .seq_cst));
260 try testing.expectEqual(@as(usize, 0b111), x.load(.seq_cst));
261}
262
263test "Value.fetchXor" {
264 var x = Value(usize).init(0b11);
265 try testing.expectEqual(@as(usize, 0b11), x.fetchXor(0b10, .seq_cst));
266 try testing.expectEqual(@as(usize, 0b01), x.load(.seq_cst));
267 try testing.expectEqual(@as(usize, 0b01), x.fetchXor(0b01, .seq_cst));
268 try testing.expectEqual(@as(usize, 0b00), x.load(.seq_cst));
269}
270
271test "Value.bitSet" {
272 var x = Value(usize).init(0);
273
274 for (0..@bitSizeOf(usize)) |bit_index| {
275 const bit = @as(std.math.Log2Int(usize), @intCast(bit_index));
276 const mask = @as(usize, 1) << bit;
277
278 // setting the bit should change the bit
279 try testing.expect(x.load(.seq_cst) & mask == 0);
280 try testing.expectEqual(@as(u1, 0), x.bitSet(bit, .seq_cst));
281 try testing.expect(x.load(.seq_cst) & mask != 0);
282
283 // setting it again shouldn't change the bit
284 try testing.expectEqual(@as(u1, 1), x.bitSet(bit, .seq_cst));
285 try testing.expect(x.load(.seq_cst) & mask != 0);
286
287 // all the previous bits should have not changed (still be set)
288 for (0..bit_index) |prev_bit_index| {
289 const prev_bit = @as(std.math.Log2Int(usize), @intCast(prev_bit_index));
290 const prev_mask = @as(usize, 1) << prev_bit;
291 try testing.expect(x.load(.seq_cst) & prev_mask != 0);
292 }
293 }
294}
295
296test "Value.bitReset" {
297 var x = Value(usize).init(0);
298
299 for (0..@bitSizeOf(usize)) |bit_index| {
300 const bit = @as(std.math.Log2Int(usize), @intCast(bit_index));
301 const mask = @as(usize, 1) << bit;
302 x.raw |= mask;
303
304 // unsetting the bit should change the bit
305 try testing.expect(x.load(.seq_cst) & mask != 0);
306 try testing.expectEqual(@as(u1, 1), x.bitReset(bit, .seq_cst));
307 try testing.expect(x.load(.seq_cst) & mask == 0);
308
309 // unsetting it again shouldn't change the bit
310 try testing.expectEqual(@as(u1, 0), x.bitReset(bit, .seq_cst));
311 try testing.expect(x.load(.seq_cst) & mask == 0);
312
313 // all the previous bits should have not changed (still be reset)
314 for (0..bit_index) |prev_bit_index| {
315 const prev_bit = @as(std.math.Log2Int(usize), @intCast(prev_bit_index));
316 const prev_mask = @as(usize, 1) << prev_bit;
317 try testing.expect(x.load(.seq_cst) & prev_mask == 0);
318 }
319 }
320}
321
322test "Value.bitToggle" {
323 var x = Value(usize).init(0);
324
325 for (0..@bitSizeOf(usize)) |bit_index| {
326 const bit = @as(std.math.Log2Int(usize), @intCast(bit_index));
327 const mask = @as(usize, 1) << bit;
328
329 // toggling the bit should change the bit
330 try testing.expect(x.load(.seq_cst) & mask == 0);
331 try testing.expectEqual(@as(u1, 0), x.bitToggle(bit, .seq_cst));
332 try testing.expect(x.load(.seq_cst) & mask != 0);
333
334 // toggling it again *should* change the bit
335 try testing.expectEqual(@as(u1, 1), x.bitToggle(bit, .seq_cst));
336 try testing.expect(x.load(.seq_cst) & mask == 0);
337
338 // all the previous bits should have not changed (still be toggled back)
339 for (0..bit_index) |prev_bit_index| {
340 const prev_bit = @as(std.math.Log2Int(usize), @intCast(prev_bit_index));
341 const prev_mask = @as(usize, 1) << prev_bit;
342 try testing.expect(x.load(.seq_cst) & prev_mask == 0);
343 }
344 }
345}
346
347/// Signals to the processor that the caller is inside a busy-wait spin-loop.
348pub inline fn spinLoopHint() void {
349 switch (builtin.target.cpu.arch) {
350 // No-op instruction that can hint to save (or share with a hardware-thread)
351 // pipelining/power resources
352 // https://software.intel.com/content/www/us/en/develop/articles/benefitting-power-and-performance-sleep-loops.html
353 .x86,
354 .x86_64,
355 => asm volatile ("pause"),
356
357 // No-op instruction that serves as a hardware-thread resource yield hint.
358 // https://stackoverflow.com/a/7588941
359 .powerpc,
360 .powerpcle,
361 .powerpc64,
362 .powerpc64le,
363 => asm volatile ("or 27, 27, 27"),
364
365 // `isb` appears more reliable for releasing execution resources than `yield`
366 // on common aarch64 CPUs.
367 // https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8258604
368 // https://bugs.mysql.com/bug.php?id=100664
369 .aarch64,
370 .aarch64_be,
371 => asm volatile ("isb"),
372
373 // `yield` was introduced in v6k but is also available on v6m.
374 // https://www.keil.com/support/man/docs/armasm/armasm_dom1361289926796.htm
375 .arm,
376 .armeb,
377 .thumb,
378 .thumbeb,
379 => if (comptime builtin.cpu.hasAny(.arm, &.{ .has_v6k, .has_v6m })) {
380 asm volatile ("yield");
381 },
382
383 // The 8-bit immediate specifies the amount of cycles to pause for. We can't really be too
384 // opinionated here.
385 .hexagon,
386 => asm volatile ("pause(#1)"),
387
388 .riscv32,
389 .riscv32be,
390 .riscv64,
391 .riscv64be,
392 => if (comptime builtin.cpu.has(.riscv, .zihintpause)) {
393 asm volatile ("pause");
394 },
395
396 else => {},
397 }
398}
399
400test spinLoopHint {
401 for (0..10) |_| {
402 spinLoopHint();
403 }
404}
405
406pub fn cacheLineForCpu(cpu: std.Target.Cpu) u16 {
407 return switch (cpu.arch) {
408 // x86_64: Starting from Intel's Sandy Bridge, the spatial prefetcher pulls in pairs of 64-byte cache lines at a time.
409 // - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
410 // - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107
411 //
412 // aarch64: Some big.LITTLE ARM archs have "big" cores with 128-byte cache lines:
413 // - https://www.mono-project.com/news/2016/09/12/arm64-icache/
414 // - https://cpufun.substack.com/p/more-m1-fun-hardware-information
415 //
416 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/arc/Kconfig#L212
417 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
418 .x86_64,
419 .aarch64,
420 .aarch64_be,
421 .arc,
422 .arceb,
423 .powerpc64,
424 .powerpc64le,
425 => 128,
426
427 // https://github.com/llvm/llvm-project/blob/e379094328e49731a606304f7e3559d4f1fa96f9/clang/lib/Basic/Targets/Hexagon.h#L145-L151
428 .hexagon,
429 => if (cpu.has(.hexagon, .v73)) 64 else 32,
430
431 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7
432 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7
433 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7
434 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9
435 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/sparc/include/asm/cache.h#L14
436 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/microblaze/include/asm/cache.h#L15
437 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/sh/include/cpu-sh4/cpu/cache.h#L10
438 .arm,
439 .armeb,
440 .thumb,
441 .thumbeb,
442 .microblaze,
443 .microblazeel,
444 .mips,
445 .mipsel,
446 .mips64,
447 .mips64el,
448 .sh,
449 .sheb,
450 .sparc,
451 .sparc64,
452 => 32,
453
454 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/m68k/include/asm/cache.h#L10
455 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/openrisc/include/asm/cache.h#L24
456 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/parisc/include/asm/cache.h#L16
457 .hppa,
458 .hppa64,
459 .m68k,
460 .or1k,
461 => 16,
462
463 // - https://www.ti.com/lit/pdf/slaa498
464 .msp430,
465 => 8,
466
467 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7
468 // - https://sxauroratsubasa.sakura.ne.jp/documents/guide/pdfs/Aurora_ISA_guide.pdf
469 .s390x,
470 .ve,
471 => 256,
472
473 // Other x86 and WASM platforms have 64-byte cache lines.
474 // The rest of the architectures are assumed to be similar.
475 // - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9
476 // - https://github.com/golang/go/blob/0a9321ad7f8c91e1b0c7184731257df923977eb9/src/internal/cpu/cpu_loong64.go#L11
477 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7
478 // - https://github.com/golang/go/blob/19e923182e590ae6568c2c714f20f32512aeb3e3/src/internal/cpu/cpu_riscv64.go#L7
479 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/xtensa/variants/csp/include/variant/core.h#L209
480 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/csky/Kconfig#L183
481 // - https://github.com/torvalds/linux/blob/3a7e02c040b130b5545e4b115aada7bacd80a2b6/arch/alpha/include/asm/cache.h#L11
482 // - https://www.xmos.com/download/The-XMOS-XS3-Architecture.pdf
483 else => 64,
484 };
485}
486
487/// The estimated size of the CPU's cache line when atomically updating memory.
488/// Add this much padding or align to this boundary to avoid atomically-updated
489/// memory from forcing cache invalidations on near, but non-atomic, memory.
490///
491/// https://en.wikipedia.org/wiki/False_sharing
492/// https://github.com/golang/go/search?q=CacheLinePadSize
493pub const cache_line: comptime_int = cacheLineForCpu(builtin.cpu);
494
495test "current CPU has a cache line size" {
496 _ = cache_line;
497}
498
499const std = @import("std.zig");
500const builtin = @import("builtin");
501const AtomicOrder = std.builtin.AtomicOrder;
502const testing = std.testing;