master
  1const std = @import("std");
  2const Allocator = std.mem.Allocator;
  3
  4const AtomicOp = enum {
  5    cas,
  6    swp,
  7    ldadd,
  8    ldclr,
  9    ldeor,
 10    ldset,
 11};
 12
 13pub fn main() !void {
 14    var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
 15    defer arena_instance.deinit();
 16    const arena = arena_instance.allocator();
 17
 18    //const args = try std.process.argsAlloc(arena);
 19
 20    var stdout_buffer: [2000]u8 = undefined;
 21    var stdout_writer = std.fs.File.stdout().writerStreaming(&stdout_buffer);
 22    const w = &stdout_writer.interface;
 23
 24    try w.writeAll(
 25        \\//! This file is generated by tools/gen_outline_atomics.zig.
 26        \\const builtin = @import("builtin");
 27        \\const std = @import("std");
 28        \\const common = @import("common.zig");
 29        \\const always_has_lse = builtin.cpu.has(.aarch64, .lse);
 30        \\
 31        \\/// This default is overridden at runtime after inspecting CPU properties.
 32        \\/// It is intentionally not exported in order to make the machine code that
 33        \\/// uses it a statically predicted direct branch rather than using the PLT,
 34        \\/// which ARM is concerned would have too much overhead.
 35        \\var __aarch64_have_lse_atomics: u8 = @intFromBool(always_has_lse);
 36        \\
 37        \\
 38    );
 39
 40    var footer = std.array_list.Managed(u8).init(arena);
 41    try footer.appendSlice("\ncomptime {\n");
 42
 43    for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| {
 44        for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| {
 45            for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| {
 46                if (n == .sixteen and op != .cas) continue;
 47
 48                const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{
 49                    @tagName(op), n.toBytes(), @tagName(order),
 50                });
 51                try writeFunction(arena, w, name, op, n, order);
 52                try footer.print("    @export(&{s}, .{{ .name = \"{s}\", .linkage = common.linkage, .visibility = common.visibility }});\n", .{
 53                    name, name,
 54                });
 55            }
 56        }
 57    }
 58
 59    try w.writeAll(footer.items);
 60    try w.writeAll("}\n");
 61    try w.flush();
 62}
 63
 64fn writeFunction(
 65    arena: Allocator,
 66    w: anytype,
 67    name: []const u8,
 68    op: AtomicOp,
 69    n: N,
 70    order: Ordering,
 71) !void {
 72    const body = switch (op) {
 73        .cas => try generateCas(arena, n, order),
 74        .swp => try generateSwp(arena, n, order),
 75        .ldadd => try generateLd(arena, n, order, .ldadd),
 76        .ldclr => try generateLd(arena, n, order, .ldclr),
 77        .ldeor => try generateLd(arena, n, order, .ldeor),
 78        .ldset => try generateLd(arena, n, order, .ldset),
 79    };
 80    const fn_sig = try std.fmt.allocPrint(
 81        arena,
 82        "fn {[name]s}() align(16) callconv(.naked) void {{",
 83        .{ .name = name },
 84    );
 85    try w.writeAll(fn_sig);
 86    try w.writeAll(
 87        \\
 88        \\    @setRuntimeSafety(false);
 89        \\    asm volatile (
 90        \\
 91    );
 92    var iter = std.mem.splitScalar(u8, body, '\n');
 93    while (iter.next()) |line| {
 94        try w.writeAll("        \\\\");
 95        try w.writeAll(line);
 96        try w.writeAll("\n");
 97    }
 98    try w.writeAll(
 99        \\        :
100        \\        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
101        \\        : "w15", "w16", "w17", "memory"
102        \\    );
103        \\    unreachable;
104        \\}
105        \\
106    );
107}
108
109const N = enum(u8) {
110    one = 1,
111    two = 2,
112    four = 4,
113    eight = 8,
114    sixteen = 16,
115
116    const Defines = struct {
117        s: []const u8,
118        uxt: []const u8,
119        b: []const u8,
120    };
121
122    fn defines(n: N) Defines {
123        const s = switch (n) {
124            .one => "b",
125            .two => "h",
126            else => "",
127        };
128        const uxt = switch (n) {
129            .one => "uxtb",
130            .two => "uxth",
131            .four, .eight, .sixteen => "mov",
132        };
133        const b = switch (n) {
134            .one => "0x00000000",
135            .two => "0x40000000",
136            .four => "0x80000000",
137            .eight => "0xc0000000",
138            else => "0x00000000",
139        };
140        return Defines{
141            .s = s,
142            .uxt = uxt,
143            .b = b,
144        };
145    }
146
147    fn register(n: N) []const u8 {
148        return if (@intFromEnum(n) < 8) "w" else "x";
149    }
150
151    fn toBytes(n: N) u8 {
152        return @intFromEnum(n);
153    }
154
155    fn toBits(n: N) u8 {
156        return n.toBytes() * 8;
157    }
158};
159
160const Ordering = enum {
161    relax,
162    acq,
163    rel,
164    acq_rel,
165
166    const Defines = struct {
167        suff: []const u8,
168        a: []const u8,
169        l: []const u8,
170        m: []const u8,
171        n: []const u8,
172    };
173    fn defines(self: @This()) Defines {
174        const suff = switch (self) {
175            .relax => "_relax",
176            .acq => "_acq",
177            .rel => "_rel",
178            .acq_rel => "_acq_rel",
179        };
180        const a = switch (self) {
181            .relax => "",
182            .acq => "a",
183            .rel => "",
184            .acq_rel => "a",
185        };
186        const l = switch (self) {
187            .relax => "",
188            .acq => "",
189            .rel => "l",
190            .acq_rel => "l",
191        };
192        const m = switch (self) {
193            .relax => "0x000000",
194            .acq => "0x400000",
195            .rel => "0x008000",
196            .acq_rel => "0x408000",
197        };
198        const n = switch (self) {
199            .relax => "0x000000",
200            .acq => "0x800000",
201            .rel => "0x400000",
202            .acq_rel => "0xc00000",
203        };
204        return .{ .suff = suff, .a = a, .l = l, .m = m, .n = n };
205    }
206};
207
208const LdName = enum { ldadd, ldclr, ldeor, ldset };
209
210fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 {
211    const s_def = n.defines();
212    const o_def = order.defines();
213
214    const reg = n.register();
215
216    if (@intFromEnum(n) < 16) {
217        const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}", .{ s_def.b, o_def.m });
218        const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s });
219        const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s });
220
221        return try std.fmt.allocPrint(arena,
222            \\        cbz     w16, 8f
223            \\        {[cas]s}
224            \\        ret
225            \\8:
226            \\        {[uxt]s}    {[reg]s}16, {[reg]s}0
227            \\0:
228            \\        {[ldxr]s}   {[reg]s}0, [x2]
229            \\        cmp    {[reg]s}0, {[reg]s}16
230            \\        bne    1f
231            \\        {[stxr]s}   w17, {[reg]s}1, [x2]
232            \\        cbnz   w17, 0b
233            \\1:
234            \\        ret
235        , .{
236            .cas = cas,
237            .uxt = s_def.uxt,
238            .ldxr = ldxr,
239            .stxr = stxr,
240            .reg = reg,
241        });
242    } else {
243        const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}", .{o_def.m});
244        const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a});
245        const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l});
246
247        return try std.fmt.allocPrint(arena,
248            \\        cbz     w16, 8f
249            \\        {[casp]s}
250            \\        ret
251            \\8:
252            \\        mov    x16, x0
253            \\        mov    x17, x1
254            \\0:
255            \\        {[ldxp]s}   x0, x1, [x4]
256            \\        cmp    x0, x16
257            \\        ccmp   x1, x17, #0, eq
258            \\        bne    1f
259            \\        {[stxp]s}   w15, x2, x3, [x4]
260            \\        cbnz   w15, 0b
261            \\1:
262            \\        ret
263        , .{
264            .casp = casp,
265            .ldxp = ldxp,
266            .stxp = stxp,
267        });
268    }
269}
270
271fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 {
272    const s_def = n.defines();
273    const o_def = order.defines();
274    const reg = n.register();
275
276    return try std.fmt.allocPrint(arena,
277        \\        cbz     w16, 8f
278        \\        .inst 0x38208020 + {[b]s} + {[n]s}
279        \\        ret
280        \\8:
281        \\        mov    {[reg]s}16, {[reg]s}0
282        \\0:
283        \\        ld{[a]s}xr{[s]s}   {[reg]s}0, [x1]
284        \\        st{[l]s}xr{[s]s}   w17, {[reg]s}16, [x1]
285        \\        cbnz   w17, 0b
286        \\1:
287        \\        ret
288    , .{
289        .b = s_def.b,
290        .n = o_def.n,
291        .reg = reg,
292        .s = s_def.s,
293        .a = o_def.a,
294        .l = o_def.l,
295    });
296}
297
298fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 {
299    const s_def = n.defines();
300    const o_def = order.defines();
301    const op = switch (ld) {
302        .ldadd => "add",
303        .ldclr => "bic",
304        .ldeor => "eor",
305        .ldset => "orr",
306    };
307    const op_n = switch (ld) {
308        .ldadd => "0x0000",
309        .ldclr => "0x1000",
310        .ldeor => "0x2000",
311        .ldset => "0x3000",
312    };
313
314    const reg = n.register();
315
316    return try std.fmt.allocPrint(arena,
317        \\        cbz     w16, 8f
318        \\        .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
319        \\        ret
320        \\8:
321        \\        mov    {[reg]s}16, {[reg]s}0
322        \\0:
323        \\        ld{[a]s}xr{[s]s}   {[reg]s}0, [x1]
324        \\        {[op]s}     {[reg]s}17, {[reg]s}0, {[reg]s}16
325        \\        st{[l]s}xr{[s]s}   w15, {[reg]s}17, [x1]
326        \\        cbnz   w15, 0b
327        \\1:
328        \\        ret
329    , .{
330        .op_n = op_n,
331        .b = s_def.b,
332        .n = o_def.n,
333        .s = s_def.s,
334        .a = o_def.a,
335        .l = o_def.l,
336        .op = op,
337        .reg = reg,
338    });
339}