master
1const std = @import("std");
2const Allocator = std.mem.Allocator;
3
4const AtomicOp = enum {
5 cas,
6 swp,
7 ldadd,
8 ldclr,
9 ldeor,
10 ldset,
11};
12
13pub fn main() !void {
14 var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
15 defer arena_instance.deinit();
16 const arena = arena_instance.allocator();
17
18 //const args = try std.process.argsAlloc(arena);
19
20 var stdout_buffer: [2000]u8 = undefined;
21 var stdout_writer = std.fs.File.stdout().writerStreaming(&stdout_buffer);
22 const w = &stdout_writer.interface;
23
24 try w.writeAll(
25 \\//! This file is generated by tools/gen_outline_atomics.zig.
26 \\const builtin = @import("builtin");
27 \\const std = @import("std");
28 \\const common = @import("common.zig");
29 \\const always_has_lse = builtin.cpu.has(.aarch64, .lse);
30 \\
31 \\/// This default is overridden at runtime after inspecting CPU properties.
32 \\/// It is intentionally not exported in order to make the machine code that
33 \\/// uses it a statically predicted direct branch rather than using the PLT,
34 \\/// which ARM is concerned would have too much overhead.
35 \\var __aarch64_have_lse_atomics: u8 = @intFromBool(always_has_lse);
36 \\
37 \\
38 );
39
40 var footer = std.array_list.Managed(u8).init(arena);
41 try footer.appendSlice("\ncomptime {\n");
42
43 for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| {
44 for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| {
45 for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| {
46 if (n == .sixteen and op != .cas) continue;
47
48 const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{
49 @tagName(op), n.toBytes(), @tagName(order),
50 });
51 try writeFunction(arena, w, name, op, n, order);
52 try footer.print(" @export(&{s}, .{{ .name = \"{s}\", .linkage = common.linkage, .visibility = common.visibility }});\n", .{
53 name, name,
54 });
55 }
56 }
57 }
58
59 try w.writeAll(footer.items);
60 try w.writeAll("}\n");
61 try w.flush();
62}
63
64fn writeFunction(
65 arena: Allocator,
66 w: anytype,
67 name: []const u8,
68 op: AtomicOp,
69 n: N,
70 order: Ordering,
71) !void {
72 const body = switch (op) {
73 .cas => try generateCas(arena, n, order),
74 .swp => try generateSwp(arena, n, order),
75 .ldadd => try generateLd(arena, n, order, .ldadd),
76 .ldclr => try generateLd(arena, n, order, .ldclr),
77 .ldeor => try generateLd(arena, n, order, .ldeor),
78 .ldset => try generateLd(arena, n, order, .ldset),
79 };
80 const fn_sig = try std.fmt.allocPrint(
81 arena,
82 "fn {[name]s}() align(16) callconv(.naked) void {{",
83 .{ .name = name },
84 );
85 try w.writeAll(fn_sig);
86 try w.writeAll(
87 \\
88 \\ @setRuntimeSafety(false);
89 \\ asm volatile (
90 \\
91 );
92 var iter = std.mem.splitScalar(u8, body, '\n');
93 while (iter.next()) |line| {
94 try w.writeAll(" \\\\");
95 try w.writeAll(line);
96 try w.writeAll("\n");
97 }
98 try w.writeAll(
99 \\ :
100 \\ : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
101 \\ : "w15", "w16", "w17", "memory"
102 \\ );
103 \\ unreachable;
104 \\}
105 \\
106 );
107}
108
109const N = enum(u8) {
110 one = 1,
111 two = 2,
112 four = 4,
113 eight = 8,
114 sixteen = 16,
115
116 const Defines = struct {
117 s: []const u8,
118 uxt: []const u8,
119 b: []const u8,
120 };
121
122 fn defines(n: N) Defines {
123 const s = switch (n) {
124 .one => "b",
125 .two => "h",
126 else => "",
127 };
128 const uxt = switch (n) {
129 .one => "uxtb",
130 .two => "uxth",
131 .four, .eight, .sixteen => "mov",
132 };
133 const b = switch (n) {
134 .one => "0x00000000",
135 .two => "0x40000000",
136 .four => "0x80000000",
137 .eight => "0xc0000000",
138 else => "0x00000000",
139 };
140 return Defines{
141 .s = s,
142 .uxt = uxt,
143 .b = b,
144 };
145 }
146
147 fn register(n: N) []const u8 {
148 return if (@intFromEnum(n) < 8) "w" else "x";
149 }
150
151 fn toBytes(n: N) u8 {
152 return @intFromEnum(n);
153 }
154
155 fn toBits(n: N) u8 {
156 return n.toBytes() * 8;
157 }
158};
159
160const Ordering = enum {
161 relax,
162 acq,
163 rel,
164 acq_rel,
165
166 const Defines = struct {
167 suff: []const u8,
168 a: []const u8,
169 l: []const u8,
170 m: []const u8,
171 n: []const u8,
172 };
173 fn defines(self: @This()) Defines {
174 const suff = switch (self) {
175 .relax => "_relax",
176 .acq => "_acq",
177 .rel => "_rel",
178 .acq_rel => "_acq_rel",
179 };
180 const a = switch (self) {
181 .relax => "",
182 .acq => "a",
183 .rel => "",
184 .acq_rel => "a",
185 };
186 const l = switch (self) {
187 .relax => "",
188 .acq => "",
189 .rel => "l",
190 .acq_rel => "l",
191 };
192 const m = switch (self) {
193 .relax => "0x000000",
194 .acq => "0x400000",
195 .rel => "0x008000",
196 .acq_rel => "0x408000",
197 };
198 const n = switch (self) {
199 .relax => "0x000000",
200 .acq => "0x800000",
201 .rel => "0x400000",
202 .acq_rel => "0xc00000",
203 };
204 return .{ .suff = suff, .a = a, .l = l, .m = m, .n = n };
205 }
206};
207
208const LdName = enum { ldadd, ldclr, ldeor, ldset };
209
210fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 {
211 const s_def = n.defines();
212 const o_def = order.defines();
213
214 const reg = n.register();
215
216 if (@intFromEnum(n) < 16) {
217 const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}", .{ s_def.b, o_def.m });
218 const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s });
219 const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s });
220
221 return try std.fmt.allocPrint(arena,
222 \\ cbz w16, 8f
223 \\ {[cas]s}
224 \\ ret
225 \\8:
226 \\ {[uxt]s} {[reg]s}16, {[reg]s}0
227 \\0:
228 \\ {[ldxr]s} {[reg]s}0, [x2]
229 \\ cmp {[reg]s}0, {[reg]s}16
230 \\ bne 1f
231 \\ {[stxr]s} w17, {[reg]s}1, [x2]
232 \\ cbnz w17, 0b
233 \\1:
234 \\ ret
235 , .{
236 .cas = cas,
237 .uxt = s_def.uxt,
238 .ldxr = ldxr,
239 .stxr = stxr,
240 .reg = reg,
241 });
242 } else {
243 const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}", .{o_def.m});
244 const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a});
245 const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l});
246
247 return try std.fmt.allocPrint(arena,
248 \\ cbz w16, 8f
249 \\ {[casp]s}
250 \\ ret
251 \\8:
252 \\ mov x16, x0
253 \\ mov x17, x1
254 \\0:
255 \\ {[ldxp]s} x0, x1, [x4]
256 \\ cmp x0, x16
257 \\ ccmp x1, x17, #0, eq
258 \\ bne 1f
259 \\ {[stxp]s} w15, x2, x3, [x4]
260 \\ cbnz w15, 0b
261 \\1:
262 \\ ret
263 , .{
264 .casp = casp,
265 .ldxp = ldxp,
266 .stxp = stxp,
267 });
268 }
269}
270
271fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 {
272 const s_def = n.defines();
273 const o_def = order.defines();
274 const reg = n.register();
275
276 return try std.fmt.allocPrint(arena,
277 \\ cbz w16, 8f
278 \\ .inst 0x38208020 + {[b]s} + {[n]s}
279 \\ ret
280 \\8:
281 \\ mov {[reg]s}16, {[reg]s}0
282 \\0:
283 \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
284 \\ st{[l]s}xr{[s]s} w17, {[reg]s}16, [x1]
285 \\ cbnz w17, 0b
286 \\1:
287 \\ ret
288 , .{
289 .b = s_def.b,
290 .n = o_def.n,
291 .reg = reg,
292 .s = s_def.s,
293 .a = o_def.a,
294 .l = o_def.l,
295 });
296}
297
298fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 {
299 const s_def = n.defines();
300 const o_def = order.defines();
301 const op = switch (ld) {
302 .ldadd => "add",
303 .ldclr => "bic",
304 .ldeor => "eor",
305 .ldset => "orr",
306 };
307 const op_n = switch (ld) {
308 .ldadd => "0x0000",
309 .ldclr => "0x1000",
310 .ldeor => "0x2000",
311 .ldset => "0x3000",
312 };
313
314 const reg = n.register();
315
316 return try std.fmt.allocPrint(arena,
317 \\ cbz w16, 8f
318 \\ .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
319 \\ ret
320 \\8:
321 \\ mov {[reg]s}16, {[reg]s}0
322 \\0:
323 \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1]
324 \\ {[op]s} {[reg]s}17, {[reg]s}0, {[reg]s}16
325 \\ st{[l]s}xr{[s]s} w15, {[reg]s}17, [x1]
326 \\ cbnz w15, 0b
327 \\1:
328 \\ ret
329 , .{
330 .op_n = op_n,
331 .b = s_def.b,
332 .n = o_def.n,
333 .s = s_def.s,
334 .a = o_def.a,
335 .l = o_def.l,
336 .op = op,
337 .reg = reg,
338 });
339}