Commit 2bbaf95ebe

Andrew Kelley <andrew@ziglang.org>
2022-11-22 01:17:50
Merge pull request #11828 from devins2518/arm-atomics
compiler_rt: aarch64 outline atomics
1 parent a3232c6
lib/compiler_rt/aarch64_outline_atomics.zig
@@ -0,0 +1,2227 @@
+//! This file is generated by tools/gen_outline_atomics.zig.
+const builtin = @import("builtin");
+const std = @import("std");
+const linkage = @import("./common.zig").linkage;
+const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse);
+
+/// This default is overridden at runtime after inspecting CPU properties.
+/// It is intentionally not exported in order to make the machine code that
+/// uses it a statically predicted direct branch rather than using the PLT,
+/// which ARM is concerned would have too much overhead.
+var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse);
+
+fn __aarch64_cas1_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        uxtb    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxrb   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp1_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x00000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        stxrb   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd1_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr1_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor1_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset1_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas1_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        uxtb    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxrb   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp1_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x00000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        stxrb   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd1_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr1_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor1_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset1_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas1_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x008000
+        \\        ret
+        \\8:
+        \\        uxtb    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrb   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp1_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x00000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        stlxrb   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd1_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr1_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor1_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset1_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrb   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas1_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x408000
+        \\        ret
+        \\8:
+        \\        uxtb    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrb   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp1_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x00000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        stlxrb   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd1_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr1_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor1_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset1_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrb   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas2_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        uxth    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxrh   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp2_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x40000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        stxrh   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd2_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr2_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor2_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset2_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas2_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        uxth    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxrh   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp2_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x40000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        stxrh   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd2_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr2_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor2_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset2_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas2_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x008000
+        \\        ret
+        \\8:
+        \\        uxth    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrh   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp2_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x40000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        stlxrh   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd2_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr2_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor2_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset2_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxrh   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas2_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x408000
+        \\        ret
+        \\8:
+        \\        uxth    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrh   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp2_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x40000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        stlxrh   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd2_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr2_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor2_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset2_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxrh   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas4_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxr   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp4_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x80000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        stxr   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd4_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr4_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor4_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset4_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas4_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxr   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp4_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x80000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        stxr   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd4_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr4_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor4_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset4_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas4_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x008000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxr   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp4_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x80000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        stlxr   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd4_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr4_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor4_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset4_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas4_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x408000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxr   w17, w1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp4_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0x80000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        stlxr   w17, w16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd4_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr4_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor4_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset4_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    w16, w0
+        \\0:
+        \\        ldaxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas8_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x2]
+        \\        cmp    x0, x16
+        \\        bne    1f
+        \\        stxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp8_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0xc0000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        stxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd8_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        add     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr8_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        bic     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor8_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        eor     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset8_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        orr     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas8_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x2]
+        \\        cmp    x0, x16
+        \\        bne    1f
+        \\        stxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp8_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0xc0000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        stxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd8_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        add     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr8_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        bic     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor8_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        eor     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset8_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x800000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        orr     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas8_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x008000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x2]
+        \\        cmp    x0, x16
+        \\        bne    1f
+        \\        stlxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp8_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0xc0000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        stlxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd8_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        add     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr8_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        bic     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor8_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        eor     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset8_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldxr   x0, [x1]
+        \\        orr     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas8_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x408000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x2]
+        \\        cmp    x0, x16
+        \\        bne    1f
+        \\        stlxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_swp8_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + 0xc0000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        stlxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldadd8_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        add     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldclr8_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        bic     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldeor8_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        eor     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_ldset8_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0xc00000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\0:
+        \\        ldaxr   x0, [x1]
+        \\        orr     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas16_relax() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x48207c82 + 0x000000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\        mov    x17, x1
+        \\0:
+        \\        ldxp   x0, x1, [x4]
+        \\        cmp    x0, x16
+        \\        ccmp   x1, x17, #0, eq
+        \\        bne    1f
+        \\        stxp   w15, x2, x3, [x4]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas16_acq() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x48207c82 + 0x400000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\        mov    x17, x1
+        \\0:
+        \\        ldaxp   x0, x1, [x4]
+        \\        cmp    x0, x16
+        \\        ccmp   x1, x17, #0, eq
+        \\        bne    1f
+        \\        stxp   w15, x2, x3, [x4]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas16_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x48207c82 + 0x008000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\        mov    x17, x1
+        \\0:
+        \\        ldxp   x0, x1, [x4]
+        \\        cmp    x0, x16
+        \\        ccmp   x1, x17, #0, eq
+        \\        bne    1f
+        \\        stlxp   w15, x2, x3, [x4]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+fn __aarch64_cas16_acq_rel() align(16) callconv(.Naked) void {
+    @setRuntimeSafety(false);
+    asm volatile (
+        \\        cbz     w16, 8f
+        \\        .inst 0x48207c82 + 0x408000
+        \\        ret
+        \\8:
+        \\        mov    x16, x0
+        \\        mov    x17, x1
+        \\0:
+        \\        ldaxp   x0, x1, [x4]
+        \\        cmp    x0, x16
+        \\        ccmp   x1, x17, #0, eq
+        \\        bne    1f
+        \\        stlxp   w15, x2, x3, [x4]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+        :
+        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        : "w15", "w16", "w17", "memory"
+    );
+    unreachable;
+}
+
+comptime {
+    @export(__aarch64_cas1_relax, .{ .name = "__aarch64_cas1_relax", .linkage = linkage });
+    @export(__aarch64_swp1_relax, .{ .name = "__aarch64_swp1_relax", .linkage = linkage });
+    @export(__aarch64_ldadd1_relax, .{ .name = "__aarch64_ldadd1_relax", .linkage = linkage });
+    @export(__aarch64_ldclr1_relax, .{ .name = "__aarch64_ldclr1_relax", .linkage = linkage });
+    @export(__aarch64_ldeor1_relax, .{ .name = "__aarch64_ldeor1_relax", .linkage = linkage });
+    @export(__aarch64_ldset1_relax, .{ .name = "__aarch64_ldset1_relax", .linkage = linkage });
+    @export(__aarch64_cas1_acq, .{ .name = "__aarch64_cas1_acq", .linkage = linkage });
+    @export(__aarch64_swp1_acq, .{ .name = "__aarch64_swp1_acq", .linkage = linkage });
+    @export(__aarch64_ldadd1_acq, .{ .name = "__aarch64_ldadd1_acq", .linkage = linkage });
+    @export(__aarch64_ldclr1_acq, .{ .name = "__aarch64_ldclr1_acq", .linkage = linkage });
+    @export(__aarch64_ldeor1_acq, .{ .name = "__aarch64_ldeor1_acq", .linkage = linkage });
+    @export(__aarch64_ldset1_acq, .{ .name = "__aarch64_ldset1_acq", .linkage = linkage });
+    @export(__aarch64_cas1_rel, .{ .name = "__aarch64_cas1_rel", .linkage = linkage });
+    @export(__aarch64_swp1_rel, .{ .name = "__aarch64_swp1_rel", .linkage = linkage });
+    @export(__aarch64_ldadd1_rel, .{ .name = "__aarch64_ldadd1_rel", .linkage = linkage });
+    @export(__aarch64_ldclr1_rel, .{ .name = "__aarch64_ldclr1_rel", .linkage = linkage });
+    @export(__aarch64_ldeor1_rel, .{ .name = "__aarch64_ldeor1_rel", .linkage = linkage });
+    @export(__aarch64_ldset1_rel, .{ .name = "__aarch64_ldset1_rel", .linkage = linkage });
+    @export(__aarch64_cas1_acq_rel, .{ .name = "__aarch64_cas1_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp1_acq_rel, .{ .name = "__aarch64_swp1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd1_acq_rel, .{ .name = "__aarch64_ldadd1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr1_acq_rel, .{ .name = "__aarch64_ldclr1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor1_acq_rel, .{ .name = "__aarch64_ldeor1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset1_acq_rel, .{ .name = "__aarch64_ldset1_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas2_relax, .{ .name = "__aarch64_cas2_relax", .linkage = linkage });
+    @export(__aarch64_swp2_relax, .{ .name = "__aarch64_swp2_relax", .linkage = linkage });
+    @export(__aarch64_ldadd2_relax, .{ .name = "__aarch64_ldadd2_relax", .linkage = linkage });
+    @export(__aarch64_ldclr2_relax, .{ .name = "__aarch64_ldclr2_relax", .linkage = linkage });
+    @export(__aarch64_ldeor2_relax, .{ .name = "__aarch64_ldeor2_relax", .linkage = linkage });
+    @export(__aarch64_ldset2_relax, .{ .name = "__aarch64_ldset2_relax", .linkage = linkage });
+    @export(__aarch64_cas2_acq, .{ .name = "__aarch64_cas2_acq", .linkage = linkage });
+    @export(__aarch64_swp2_acq, .{ .name = "__aarch64_swp2_acq", .linkage = linkage });
+    @export(__aarch64_ldadd2_acq, .{ .name = "__aarch64_ldadd2_acq", .linkage = linkage });
+    @export(__aarch64_ldclr2_acq, .{ .name = "__aarch64_ldclr2_acq", .linkage = linkage });
+    @export(__aarch64_ldeor2_acq, .{ .name = "__aarch64_ldeor2_acq", .linkage = linkage });
+    @export(__aarch64_ldset2_acq, .{ .name = "__aarch64_ldset2_acq", .linkage = linkage });
+    @export(__aarch64_cas2_rel, .{ .name = "__aarch64_cas2_rel", .linkage = linkage });
+    @export(__aarch64_swp2_rel, .{ .name = "__aarch64_swp2_rel", .linkage = linkage });
+    @export(__aarch64_ldadd2_rel, .{ .name = "__aarch64_ldadd2_rel", .linkage = linkage });
+    @export(__aarch64_ldclr2_rel, .{ .name = "__aarch64_ldclr2_rel", .linkage = linkage });
+    @export(__aarch64_ldeor2_rel, .{ .name = "__aarch64_ldeor2_rel", .linkage = linkage });
+    @export(__aarch64_ldset2_rel, .{ .name = "__aarch64_ldset2_rel", .linkage = linkage });
+    @export(__aarch64_cas2_acq_rel, .{ .name = "__aarch64_cas2_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp2_acq_rel, .{ .name = "__aarch64_swp2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd2_acq_rel, .{ .name = "__aarch64_ldadd2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr2_acq_rel, .{ .name = "__aarch64_ldclr2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor2_acq_rel, .{ .name = "__aarch64_ldeor2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset2_acq_rel, .{ .name = "__aarch64_ldset2_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas4_relax, .{ .name = "__aarch64_cas4_relax", .linkage = linkage });
+    @export(__aarch64_swp4_relax, .{ .name = "__aarch64_swp4_relax", .linkage = linkage });
+    @export(__aarch64_ldadd4_relax, .{ .name = "__aarch64_ldadd4_relax", .linkage = linkage });
+    @export(__aarch64_ldclr4_relax, .{ .name = "__aarch64_ldclr4_relax", .linkage = linkage });
+    @export(__aarch64_ldeor4_relax, .{ .name = "__aarch64_ldeor4_relax", .linkage = linkage });
+    @export(__aarch64_ldset4_relax, .{ .name = "__aarch64_ldset4_relax", .linkage = linkage });
+    @export(__aarch64_cas4_acq, .{ .name = "__aarch64_cas4_acq", .linkage = linkage });
+    @export(__aarch64_swp4_acq, .{ .name = "__aarch64_swp4_acq", .linkage = linkage });
+    @export(__aarch64_ldadd4_acq, .{ .name = "__aarch64_ldadd4_acq", .linkage = linkage });
+    @export(__aarch64_ldclr4_acq, .{ .name = "__aarch64_ldclr4_acq", .linkage = linkage });
+    @export(__aarch64_ldeor4_acq, .{ .name = "__aarch64_ldeor4_acq", .linkage = linkage });
+    @export(__aarch64_ldset4_acq, .{ .name = "__aarch64_ldset4_acq", .linkage = linkage });
+    @export(__aarch64_cas4_rel, .{ .name = "__aarch64_cas4_rel", .linkage = linkage });
+    @export(__aarch64_swp4_rel, .{ .name = "__aarch64_swp4_rel", .linkage = linkage });
+    @export(__aarch64_ldadd4_rel, .{ .name = "__aarch64_ldadd4_rel", .linkage = linkage });
+    @export(__aarch64_ldclr4_rel, .{ .name = "__aarch64_ldclr4_rel", .linkage = linkage });
+    @export(__aarch64_ldeor4_rel, .{ .name = "__aarch64_ldeor4_rel", .linkage = linkage });
+    @export(__aarch64_ldset4_rel, .{ .name = "__aarch64_ldset4_rel", .linkage = linkage });
+    @export(__aarch64_cas4_acq_rel, .{ .name = "__aarch64_cas4_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp4_acq_rel, .{ .name = "__aarch64_swp4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd4_acq_rel, .{ .name = "__aarch64_ldadd4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr4_acq_rel, .{ .name = "__aarch64_ldclr4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor4_acq_rel, .{ .name = "__aarch64_ldeor4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset4_acq_rel, .{ .name = "__aarch64_ldset4_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas8_relax, .{ .name = "__aarch64_cas8_relax", .linkage = linkage });
+    @export(__aarch64_swp8_relax, .{ .name = "__aarch64_swp8_relax", .linkage = linkage });
+    @export(__aarch64_ldadd8_relax, .{ .name = "__aarch64_ldadd8_relax", .linkage = linkage });
+    @export(__aarch64_ldclr8_relax, .{ .name = "__aarch64_ldclr8_relax", .linkage = linkage });
+    @export(__aarch64_ldeor8_relax, .{ .name = "__aarch64_ldeor8_relax", .linkage = linkage });
+    @export(__aarch64_ldset8_relax, .{ .name = "__aarch64_ldset8_relax", .linkage = linkage });
+    @export(__aarch64_cas8_acq, .{ .name = "__aarch64_cas8_acq", .linkage = linkage });
+    @export(__aarch64_swp8_acq, .{ .name = "__aarch64_swp8_acq", .linkage = linkage });
+    @export(__aarch64_ldadd8_acq, .{ .name = "__aarch64_ldadd8_acq", .linkage = linkage });
+    @export(__aarch64_ldclr8_acq, .{ .name = "__aarch64_ldclr8_acq", .linkage = linkage });
+    @export(__aarch64_ldeor8_acq, .{ .name = "__aarch64_ldeor8_acq", .linkage = linkage });
+    @export(__aarch64_ldset8_acq, .{ .name = "__aarch64_ldset8_acq", .linkage = linkage });
+    @export(__aarch64_cas8_rel, .{ .name = "__aarch64_cas8_rel", .linkage = linkage });
+    @export(__aarch64_swp8_rel, .{ .name = "__aarch64_swp8_rel", .linkage = linkage });
+    @export(__aarch64_ldadd8_rel, .{ .name = "__aarch64_ldadd8_rel", .linkage = linkage });
+    @export(__aarch64_ldclr8_rel, .{ .name = "__aarch64_ldclr8_rel", .linkage = linkage });
+    @export(__aarch64_ldeor8_rel, .{ .name = "__aarch64_ldeor8_rel", .linkage = linkage });
+    @export(__aarch64_ldset8_rel, .{ .name = "__aarch64_ldset8_rel", .linkage = linkage });
+    @export(__aarch64_cas8_acq_rel, .{ .name = "__aarch64_cas8_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp8_acq_rel, .{ .name = "__aarch64_swp8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd8_acq_rel, .{ .name = "__aarch64_ldadd8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr8_acq_rel, .{ .name = "__aarch64_ldclr8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor8_acq_rel, .{ .name = "__aarch64_ldeor8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset8_acq_rel, .{ .name = "__aarch64_ldset8_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas16_relax, .{ .name = "__aarch64_cas16_relax", .linkage = linkage });
+    @export(__aarch64_cas16_acq, .{ .name = "__aarch64_cas16_acq", .linkage = linkage });
+    @export(__aarch64_cas16_rel, .{ .name = "__aarch64_cas16_rel", .linkage = linkage });
+    @export(__aarch64_cas16_acq_rel, .{ .name = "__aarch64_cas16_acq_rel", .linkage = linkage });
+}
lib/compiler_rt.zig
@@ -1,8 +1,15 @@
+const builtin = @import("builtin");
+
 pub const panic = @import("compiler_rt/common.zig").panic;
 
 comptime {
     _ = @import("compiler_rt/atomics.zig");
 
+    // macOS has these functions inside libSystem.
+    if (builtin.cpu.arch.isAARCH64() and !builtin.os.tag.isDarwin()) {
+        _ = @import("compiler_rt/aarch64_outline_atomics.zig");
+    }
+
     _ = @import("compiler_rt/addf3.zig");
     _ = @import("compiler_rt/addhf3.zig");
     _ = @import("compiler_rt/addsf3.zig");
tools/gen_outline_atomics.zig
@@ -0,0 +1,338 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+const AtomicOp = enum {
+    cas,
+    swp,
+    ldadd,
+    ldclr,
+    ldeor,
+    ldset,
+};
+
+pub fn main() !void {
+    var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena_instance.deinit();
+    const arena = arena_instance.allocator();
+
+    //const args = try std.process.argsAlloc(arena);
+
+    var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
+    const w = bw.writer();
+
+    try w.writeAll(
+        \\//! This file is generated by tools/gen_outline_atomics.zig.
+        \\const builtin = @import("builtin");
+        \\const std = @import("std");
+        \\const linkage = @import("./common.zig").linkage;
+        \\const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse);
+        \\
+        \\/// This default is overridden at runtime after inspecting CPU properties.
+        \\/// It is intentionally not exported in order to make the machine code that
+        \\/// uses it a statically predicted direct branch rather than using the PLT,
+        \\/// which ARM is concerned would have too much overhead.
+        \\var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse);
+        \\
+        \\
+    );
+
+    var footer = std.ArrayList(u8).init(arena);
+    try footer.appendSlice("\ncomptime {\n");
+
+    for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| {
+        for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| {
+            for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| {
+                if (n == .sixteen and op != .cas) continue;
+
+                const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{
+                    @tagName(op), n.toBytes(), @tagName(order),
+                });
+                try writeFunction(arena, w, name, op, n, order);
+                try footer.writer().print("    @export({s}, .{{ .name = \"{s}\", .linkage = linkage }});\n", .{
+                    name, name,
+                });
+            }
+        }
+    }
+
+    try w.writeAll(footer.items);
+    try w.writeAll("}\n");
+    try bw.flush();
+}
+
+fn writeFunction(
+    arena: Allocator,
+    w: anytype,
+    name: []const u8,
+    op: AtomicOp,
+    n: N,
+    order: Ordering,
+) !void {
+    const body = switch (op) {
+        .cas => try generateCas(arena, n, order),
+        .swp => try generateSwp(arena, n, order),
+        .ldadd => try generateLd(arena, n, order, .ldadd),
+        .ldclr => try generateLd(arena, n, order, .ldclr),
+        .ldeor => try generateLd(arena, n, order, .ldeor),
+        .ldset => try generateLd(arena, n, order, .ldset),
+    };
+    const fn_sig = try std.fmt.allocPrint(
+        arena,
+        "fn {[name]s}() align(16) callconv(.Naked) void {{",
+        .{ .name = name },
+    );
+    try w.writeAll(fn_sig);
+    try w.writeAll(
+        \\
+        \\    @setRuntimeSafety(false);
+        \\    asm volatile (
+        \\
+    );
+    var iter = std.mem.split(u8, body, "\n");
+    while (iter.next()) |line| {
+        try w.writeAll("        \\\\");
+        try w.writeAll(line);
+        try w.writeAll("\n");
+    }
+    try w.writeAll(
+        \\        :
+        \\        : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
+        \\        : "w15", "w16", "w17", "memory"
+        \\    );
+        \\    unreachable;
+        \\}
+        \\
+    );
+}
+
+const N = enum(u8) {
+    one = 1,
+    two = 2,
+    four = 4,
+    eight = 8,
+    sixteen = 16,
+
+    const Defines = struct {
+        s: []const u8,
+        uxt: []const u8,
+        b: []const u8,
+    };
+
+    fn defines(n: N) Defines {
+        const s = switch (n) {
+            .one => "b",
+            .two => "h",
+            else => "",
+        };
+        const uxt = switch (n) {
+            .one => "uxtb",
+            .two => "uxth",
+            .four, .eight, .sixteen => "mov",
+        };
+        const b = switch (n) {
+            .one => "0x00000000",
+            .two => "0x40000000",
+            .four => "0x80000000",
+            .eight => "0xc0000000",
+            else => "0x00000000",
+        };
+        return Defines{
+            .s = s,
+            .uxt = uxt,
+            .b = b,
+        };
+    }
+
+    fn register(n: N) []const u8 {
+        return if (@enumToInt(n) < 8) "w" else "x";
+    }
+
+    fn toBytes(n: N) u8 {
+        return @enumToInt(n);
+    }
+
+    fn toBits(n: N) u8 {
+        return n.toBytes() * 8;
+    }
+};
+
+const Ordering = enum {
+    relax,
+    acq,
+    rel,
+    acq_rel,
+
+    const Defines = struct {
+        suff: []const u8,
+        a: []const u8,
+        l: []const u8,
+        m: []const u8,
+        n: []const u8,
+    };
+    fn defines(self: @This()) Defines {
+        const suff = switch (self) {
+            .relax => "_relax",
+            .acq => "_acq",
+            .rel => "_rel",
+            .acq_rel => "_acq_rel",
+        };
+        const a = switch (self) {
+            .relax => "",
+            .acq => "a",
+            .rel => "",
+            .acq_rel => "a",
+        };
+        const l = switch (self) {
+            .relax => "",
+            .acq => "",
+            .rel => "l",
+            .acq_rel => "l",
+        };
+        const m = switch (self) {
+            .relax => "0x000000",
+            .acq => "0x400000",
+            .rel => "0x008000",
+            .acq_rel => "0x408000",
+        };
+        const n = switch (self) {
+            .relax => "0x000000",
+            .acq => "0x800000",
+            .rel => "0x400000",
+            .acq_rel => "0xc00000",
+        };
+        return .{ .suff = suff, .a = a, .l = l, .m = m, .n = n };
+    }
+};
+
+const LdName = enum { ldadd, ldclr, ldeor, ldset };
+
+fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 {
+    const s_def = n.defines();
+    const o_def = order.defines();
+
+    const reg = n.register();
+
+    if (@enumToInt(n) < 16) {
+        const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}", .{ s_def.b, o_def.m });
+        const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s });
+        const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s });
+
+        return try std.fmt.allocPrint(arena,
+            \\        cbz     w16, 8f
+            \\        {[cas]s}
+            \\        ret
+            \\8:
+            \\        {[uxt]s}    {[reg]s}16, {[reg]s}0
+            \\0:
+            \\        {[ldxr]s}   {[reg]s}0, [x2]
+            \\        cmp    {[reg]s}0, {[reg]s}16
+            \\        bne    1f
+            \\        {[stxr]s}   w17, {[reg]s}1, [x2]
+            \\        cbnz   w17, 0b
+            \\1:
+            \\        ret
+        , .{
+            .cas = cas,
+            .uxt = s_def.uxt,
+            .ldxr = ldxr,
+            .stxr = stxr,
+            .reg = reg,
+        });
+    } else {
+        const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}", .{o_def.m});
+        const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a});
+        const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l});
+
+        return try std.fmt.allocPrint(arena,
+            \\        cbz     w16, 8f
+            \\        {[casp]s}
+            \\        ret
+            \\8:
+            \\        mov    x16, x0
+            \\        mov    x17, x1
+            \\0:
+            \\        {[ldxp]s}   x0, x1, [x4]
+            \\        cmp    x0, x16
+            \\        ccmp   x1, x17, #0, eq
+            \\        bne    1f
+            \\        {[stxp]s}   w15, x2, x3, [x4]
+            \\        cbnz   w15, 0b
+            \\1:
+            \\        ret
+        , .{
+            .casp = casp,
+            .ldxp = ldxp,
+            .stxp = stxp,
+        });
+    }
+}
+
+fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 {
+    const s_def = n.defines();
+    const o_def = order.defines();
+    const reg = n.register();
+
+    return try std.fmt.allocPrint(arena,
+        \\        cbz     w16, 8f
+        \\        .inst 0x38208020 + {[b]s} + {[n]s}
+        \\        ret
+        \\8:
+        \\        mov    {[reg]s}16, {[reg]s}0
+        \\0:
+        \\        ld{[a]s}xr{[s]s}   {[reg]s}0, [x1]
+        \\        st{[l]s}xr{[s]s}   w17, {[reg]s}16, [x1]
+        \\        cbnz   w17, 0b
+        \\1:
+        \\        ret
+    , .{
+        .b = s_def.b,
+        .n = o_def.n,
+        .reg = reg,
+        .s = s_def.s,
+        .a = o_def.a,
+        .l = o_def.l,
+    });
+}
+
+fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 {
+    const s_def = n.defines();
+    const o_def = order.defines();
+    const op = switch (ld) {
+        .ldadd => "add",
+        .ldclr => "bic",
+        .ldeor => "eor",
+        .ldset => "orr",
+    };
+    const op_n = switch (ld) {
+        .ldadd => "0x0000",
+        .ldclr => "0x1000",
+        .ldeor => "0x2000",
+        .ldset => "0x3000",
+    };
+
+    const reg = n.register();
+
+    return try std.fmt.allocPrint(arena,
+        \\        cbz     w16, 8f
+        \\        .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
+        \\        ret
+        \\8:
+        \\        mov    {[reg]s}16, {[reg]s}0
+        \\0:
+        \\        ld{[a]s}xr{[s]s}   {[reg]s}0, [x1]
+        \\        {[op]s}     {[reg]s}17, {[reg]s}0, {[reg]s}16
+        \\        st{[l]s}xr{[s]s}   w15, {[reg]s}17, [x1]
+        \\        cbnz   w15, 0b
+        \\1:
+        \\        ret
+    , .{
+        .op_n = op_n,
+        .b = s_def.b,
+        .n = o_def.n,
+        .s = s_def.s,
+        .a = o_def.a,
+        .l = o_def.l,
+        .op = op,
+        .reg = reg,
+    });
+}