Commit 95ee8ab77d

Andrew Kelley <andrew@ziglang.org>
2022-11-21 20:36:26
simplify outline atomics
* Rely on libSystem when targeting macOS. * Make tools/gen_outline_atomics.zig more idiomatic. * Remove the CPU detection / auxval checking from compiler_rt. This functionality belongs in a different component. Zig's compiler_rt must not rely on constructors. Instead it will export a symbol for setting the value, and start code can detect and activate it. * Remove the separate logic for inline assembly when the target does or does not have lse support. `.inst` works in both cases.
1 parent a8f2d00
Changed files (3)
lib/compiler_rt/lse_atomics.zig
@@ -1,23 +1,20 @@
-const std = @import("std");
+//! This file is generated by tools/gen_outline_atomics.zig.
 const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const is_test = builtin.is_test;
-const target = std.Target;
-const os_tag = builtin.os.tag;
-const is_darwin = target.Os.Tag.isDarwin(os_tag);
-const has_lse = target.aarch64.featureSetHas(builtin.target.cpu.features, .lse);
-const linkage = if (is_test)
-    std.builtin.GlobalLinkage.Internal
-else
-    std.builtin.GlobalLinkage.Strong;
+const std = @import("std");
+const linkage = @import("./common.zig").linkage;
+const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse);
 
-fn cas1RelaxDarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
+/// This default is overridden at runtime after inspecting CPU properties.
+/// It is intentionally not exported in order to make the machine code that
+/// uses it a statically predicted direct branch rather than using the PLT,
+/// which ARM is concerned would have too much overhead.
+var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse);
 
+fn __aarch64_cas1_relax(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+    @setRuntimeSafety(false);
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        casb w0, w1, [x2]
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x000000
         \\
         \\        cbz     wzr, 1f
         \\8:
@@ -37,107 +34,81 @@ fn cas1RelaxDarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1RelaxDarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp1_relax(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x000000
-        \\
+        \\        .inst 0x38208020 + 0x00000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrb   w17, w1, [x2]
+        \\        ldxrb   w0, [x1]
+        \\        stxrb   w17, w16, [x1]
         \\        cbnz   w17, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1RelaxNondarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd1_relax(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        casb w0, w1, [x2]
-        \\
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
+        \\        ldxrb   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1RelaxNondarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr1_relax(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x000000
-        \\
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
+        \\        ldxrb   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas1_relax = if (is_darwin)
-    if (has_lse)
-        cas1RelaxDarwinLse
-    else
-        cas1RelaxDarwinNolse
-else if (has_lse)
-    cas1RelaxNondarwinLse
-else
-    cas1RelaxNondarwinNolse;
-fn swp1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor1_relax(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        swpb  w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
         \\        ldxrb   w0, [x1]
-        \\        stxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
+        \\        eor     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
         : [val] "{w0}" (val),
@@ -146,20 +117,19 @@ fn swp1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset1_relax(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0x000000
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
         \\        ldxrb   w0, [x1]
-        \\        stxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
+        \\        orr     w17, w0, w16
+        \\        stxrb   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
         : [val] "{w0}" (val),
@@ -168,40 +138,40 @@ fn swp1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas1_acq(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        swpb  w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x400000
+        \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    w16, w0
+        \\        uxtb    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        stxrb   w17, w16, [x1]
+        \\        ldaxrb   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxrb   w17, w1, [x2]
         \\        cbnz   w17, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp1_acq(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0x000000
+        \\        .inst 0x38208020 + 0x00000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
+        \\        ldaxrb   w0, [x1]
         \\        stxrb   w17, w16, [x1]
         \\        cbnz   w17, 0b
         \\1:
@@ -212,27 +182,16 @@ fn swp1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_swp1_relax = if (is_darwin)
-    if (has_lse)
-        swp1RelaxDarwinLse
-    else
-        swp1RelaxDarwinNolse
-else if (has_lse)
-    swp1RelaxNondarwinLse
-else
-    swp1RelaxNondarwinNolse;
-fn ldadd1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd1_acq(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
+        \\        ldaxrb   w0, [x1]
         \\        add     w17, w0, w16
         \\        stxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
@@ -244,19 +203,17 @@ fn ldadd1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr1_acq(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x000000
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        add     w17, w0, w16
+        \\        ldaxrb   w0, [x1]
+        \\        bic     w17, w0, w16
         \\        stxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
@@ -267,19 +224,17 @@ fn ldadd1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor1_acq(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        add     w17, w0, w16
+        \\        ldaxrb   w0, [x1]
+        \\        eor     w17, w0, w16
         \\        stxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
@@ -290,19 +245,17 @@ fn ldadd1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset1_acq(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x000000
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        add     w17, w0, w16
+        \\        ldaxrb   w0, [x1]
+        \\        orr     w17, w0, w16
         \\        stxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
@@ -313,53 +266,42 @@ fn ldadd1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldadd1_relax = if (is_darwin)
-    if (has_lse)
-        ldadd1RelaxDarwinLse
-    else
-        ldadd1RelaxDarwinNolse
-else if (has_lse)
-    ldadd1RelaxNondarwinLse
-else
-    ldadd1RelaxNondarwinNolse;
-fn ldclr1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas1_rel(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclrb w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x008000
+        \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    w16, w0
+        \\        uxtb    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldxrb   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrb   w17, w1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp1_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x000000
+        \\        .inst 0x38208020 + 0x00000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
         \\        ldxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        stlxrb   w17, w16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
         : [val] "{w0}" (val),
@@ -368,20 +310,18 @@ fn ldclr1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd1_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclrb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
         \\        ldxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -391,20 +331,18 @@ fn ldclr1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr1_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x000000
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
         \\        ldxrb   w0, [x1]
         \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -414,29 +352,18 @@ fn ldclr1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldclr1_relax = if (is_darwin)
-    if (has_lse)
-        ldclr1RelaxDarwinLse
-    else
-        ldclr1RelaxDarwinNolse
-else if (has_lse)
-    ldclr1RelaxNondarwinLse
-else
-    ldclr1RelaxNondarwinNolse;
-fn ldeor1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor1_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeorb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
         \\        ldxrb   w0, [x1]
         \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -446,20 +373,18 @@ fn ldeor1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset1_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x000000
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
         \\        ldxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -469,44 +394,42 @@ fn ldeor1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas1_acq_rel(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeorb w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x00000000 + 0x408000
+        \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    w16, w0
+        \\        uxtb    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldaxrb   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrb   w17, w1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp1_acq_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x000000
+        \\        .inst 0x38208020 + 0x00000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldaxrb   w0, [x1]
+        \\        stlxrb   w17, w16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
         : [val] "{w0}" (val),
@@ -515,29 +438,18 @@ fn ldeor1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldeor1_relax = if (is_darwin)
-    if (has_lse)
-        ldeor1RelaxDarwinLse
-    else
-        ldeor1RelaxDarwinNolse
-else if (has_lse)
-    ldeor1RelaxNondarwinLse
-else
-    ldeor1RelaxNondarwinNolse;
-fn ldset1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd1_acq_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldsetb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrb   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -547,20 +459,18 @@ fn ldset1RelaxDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr1_acq_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x000000
+        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrb   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -570,20 +480,18 @@ fn ldset1RelaxDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor1_acq_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldsetb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrb   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -593,20 +501,18 @@ fn ldset1RelaxNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset1_acq_rel(val: u8, ptr: *u8) callconv(.C) u8 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x000000
+        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
+        \\        ldaxrb   w0, [x1]
         \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        stlxrb   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={w0}" (-> u8),
@@ -616,34 +522,23 @@ fn ldset1RelaxNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldset1_relax = if (is_darwin)
-    if (has_lse)
-        ldset1RelaxDarwinLse
-    else
-        ldset1RelaxDarwinNolse
-else if (has_lse)
-    ldset1RelaxNondarwinLse
-else
-    ldset1RelaxNondarwinNolse;
-fn cas1AcqDarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas2_relax(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        casab w0, w1, [x2]
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x000000
         \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        uxth    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x2]
+        \\        ldxrh   w0, [x2]
         \\        cmp    w0, w16
         \\        bne    1f
-        \\        stxrb   w17, w1, [x2]
+        \\        stxrh   w17, w1, [x2]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [expected] "{w0}" (expected),
           [desired] "{w1}" (desired),
           [ptr] "{x2}" (ptr),
@@ -651,613 +546,511 @@ fn cas1AcqDarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1AcqDarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp2_relax(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x400000
-        \\
+        \\        .inst 0x38208020 + 0x40000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrb   w17, w1, [x2]
+        \\        ldxrh   w0, [x1]
+        \\        stxrh   w17, w16, [x1]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={w0}" (-> u16),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1AcqNondarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd2_relax(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        casab w0, w1, [x2]
-        \\
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
+        \\        ldxrh   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={w0}" (-> u16),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1AcqNondarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr2_relax(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x400000
-        \\
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
+        \\        ldxrh   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={w0}" (-> u16),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas1_acq = if (is_darwin)
-    if (has_lse)
-        cas1AcqDarwinLse
-    else
-        cas1AcqDarwinNolse
-else if (has_lse)
-    cas1AcqNondarwinLse
-else
-    cas1AcqNondarwinNolse;
-fn swp1AcqDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor2_relax(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        swpab  w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
+        \\        ldxrh   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1AcqDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset2_relax(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0x800000
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
+        \\        ldxrh   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1AcqNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas2_acq(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        swpab  w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x400000
+        \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    w16, w0
+        \\        uxth    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stxrb   w17, w16, [x1]
+        \\        ldaxrh   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxrh   w17, w1, [x2]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [ret] "={w0}" (-> u16),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1AcqNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp2_acq(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0x800000
+        \\        .inst 0x38208020 + 0x40000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stxrb   w17, w16, [x1]
+        \\        ldaxrh   w0, [x1]
+        \\        stxrh   w17, w16, [x1]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_swp1_acq = if (is_darwin)
-    if (has_lse)
-        swp1AcqDarwinLse
-    else
-        swp1AcqDarwinNolse
-else if (has_lse)
-    swp1AcqNondarwinLse
-else
-    swp1AcqNondarwinNolse;
-fn ldadd1AcqDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd2_acq(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddab w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
+        \\        ldaxrh   w0, [x1]
         \\        add     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        stxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1AcqDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr2_acq(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x800000
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrh   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1AcqNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor2_acq(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddab w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrh   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1AcqNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset2_acq(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x800000
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrh   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldadd1_acq = if (is_darwin)
-    if (has_lse)
-        ldadd1AcqDarwinLse
-    else
-        ldadd1AcqDarwinNolse
-else if (has_lse)
-    ldadd1AcqNondarwinLse
-else
-    ldadd1AcqNondarwinNolse;
-fn ldclr1AcqDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas2_rel(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclrab w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x008000
+        \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    w16, w0
+        \\        uxth    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldxrh   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrh   w17, w1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [ret] "={w0}" (-> u16),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1AcqDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp2_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x800000
+        \\        .inst 0x38208020 + 0x40000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldxrh   w0, [x1]
+        \\        stlxrh   w17, w16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1AcqNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd2_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclrab w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldxrh   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1AcqNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr2_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x800000
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
+        \\        ldxrh   w0, [x1]
         \\        bic     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldclr1_acq = if (is_darwin)
-    if (has_lse)
-        ldclr1AcqDarwinLse
-    else
-        ldclr1AcqDarwinNolse
-else if (has_lse)
-    ldclr1AcqNondarwinLse
-else
-    ldclr1AcqNondarwinNolse;
-fn ldeor1AcqDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor2_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeorab w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
+        \\        ldxrh   w0, [x1]
         \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1AcqDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset2_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x800000
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldxrh   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1AcqNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas2_acq_rel(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeorab w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x40000000 + 0x408000
+        \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    w16, w0
+        \\        uxth    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldaxrh   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxrh   w17, w1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [ret] "={w0}" (-> u16),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1AcqNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp2_acq_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x800000
+        \\        .inst 0x38208020 + 0x40000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldaxrh   w0, [x1]
+        \\        stlxrh   w17, w16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldeor1_acq = if (is_darwin)
-    if (has_lse)
-        ldeor1AcqDarwinLse
-    else
-        ldeor1AcqDarwinNolse
-else if (has_lse)
-    ldeor1AcqNondarwinLse
-else
-    ldeor1AcqNondarwinNolse;
-fn ldset1AcqDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd2_acq_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldsetab w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrh   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset1AcqDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr2_acq_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x800000
+        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrh   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset1AcqNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor2_acq_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldsetab w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        ldaxrh   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset1AcqNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset2_acq_rel(val: u16, ptr: *u16) callconv(.C) u16 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x800000
+        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldaxrb   w0, [x1]
+        \\        ldaxrh   w0, [x1]
         \\        orr     w17, w0, w16
-        \\        stxrb   w15, w17, [x1]
+        \\        stlxrh   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u16),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldset1_acq = if (is_darwin)
-    if (has_lse)
-        ldset1AcqDarwinLse
-    else
-        ldset1AcqDarwinNolse
-else if (has_lse)
-    ldset1AcqNondarwinLse
-else
-    ldset1AcqNondarwinNolse;
-fn cas1RelDarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas4_relax(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caslb w0, w1, [x2]
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x000000
         \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x2]
+        \\        ldxr   w0, [x2]
         \\        cmp    w0, w16
         \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
+        \\        stxr   w17, w1, [x2]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [expected] "{w0}" (expected),
           [desired] "{w1}" (desired),
           [ptr] "{x2}" (ptr),
@@ -1265,8311 +1058,530 @@ fn cas1RelDarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1RelDarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp4_relax(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x008000
-        \\
+        \\        .inst 0x38208020 + 0x80000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
+        \\        ldxr   w0, [x1]
+        \\        stxr   w17, w16, [x1]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1RelNondarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd4_relax(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caslb w0, w1, [x2]
-        \\
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
+        \\        ldxr   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas1RelNondarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr4_relax(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x008000
-        \\
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
-        \\        uxtb    w16, w0
+        \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
+        \\        ldxr   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas1_rel = if (is_darwin)
-    if (has_lse)
-        cas1RelDarwinLse
-    else
-        cas1RelDarwinNolse
-else if (has_lse)
-    cas1RelNondarwinLse
-else
-    cas1RelNondarwinNolse;
-fn swp1RelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor4_relax(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        swplb  w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
+        \\        ldxr   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1RelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset4_relax(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0x400000
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
+        \\        ldxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
+        \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1RelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas4_acq(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        swplb  w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x400000
+        \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
+        \\        ldaxr   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stxr   w17, w1, [x2]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [ret] "={w0}" (-> u32),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn swp1RelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp4_acq(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0x400000
+        \\        .inst 0x38208020 + 0x80000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        stxr   w17, w16, [x1]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_swp1_rel = if (is_darwin)
-    if (has_lse)
-        swp1RelDarwinLse
-    else
-        swp1RelDarwinNolse
-else if (has_lse)
-    swp1RelNondarwinLse
-else
-    swp1RelNondarwinNolse;
-fn ldadd1RelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd4_acq(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddlb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
+        \\        ldaxr   w0, [x1]
         \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        stxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1RelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr4_acq(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x400000
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1RelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor4_acq(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddlb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd1RelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset4_acq(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0x400000
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldadd1_rel = if (is_darwin)
-    if (has_lse)
-        ldadd1RelDarwinLse
-    else
-        ldadd1RelDarwinNolse
-else if (has_lse)
-    ldadd1RelNondarwinLse
-else
-    ldadd1RelNondarwinNolse;
-fn ldclr1RelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas4_rel(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclrlb w0, w0, [x1]
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x008000
+        \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldxr   w0, [x2]
+        \\        cmp    w0, w16
+        \\        bne    1f
+        \\        stlxr   w17, w1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [ret] "={w0}" (-> u32),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1RelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_swp4_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x400000
+        \\        .inst 0x38208020 + 0x80000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldxr   w0, [x1]
+        \\        stlxr   w17, w16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1RelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldadd4_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclrlb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        ldxr   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr1RelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldclr4_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0x400000
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
+        \\        ldxr   w0, [x1]
         \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldclr1_rel = if (is_darwin)
-    if (has_lse)
-        ldclr1RelDarwinLse
-    else
-        ldclr1RelDarwinNolse
-else if (has_lse)
-    ldclr1RelNondarwinLse
-else
-    ldclr1RelNondarwinNolse;
-fn ldeor1RelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldeor4_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeorlb w0, w0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
+        \\        ldxr   w0, [x1]
         \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1RelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_ldset4_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x400000
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    w16, w0
         \\0:
-        \\        ldxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
+        \\        ldxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={w0}" (-> u8),
+        : [ret] "={w0}" (-> u32),
         : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor1RelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
+fn __aarch64_cas4_acq_rel(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeorlb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor1RelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor1_rel = if (is_darwin)
-    if (has_lse)
-        ldeor1RelDarwinLse
-    else
-        ldeor1RelDarwinNolse
-else if (has_lse)
-    ldeor1RelNondarwinLse
-else
-    ldeor1RelNondarwinNolse;
-fn ldset1RelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetlb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset1RelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset1RelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetlb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset1RelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset1_rel = if (is_darwin)
-    if (has_lse)
-        ldset1RelDarwinLse
-    else
-        ldset1RelDarwinNolse
-else if (has_lse)
-    ldset1RelNondarwinLse
-else
-    ldset1RelNondarwinNolse;
-fn cas1AcqRelDarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casalb w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxtb    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas1AcqRelDarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x408000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxtb    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas1AcqRelNondarwinLse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casalb w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxtb    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas1AcqRelNondarwinNolse(expected: u8, desired: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x00000000 + 0x408000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxtb    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrb   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas1_acq_rel = if (is_darwin)
-    if (has_lse)
-        cas1AcqRelDarwinLse
-    else
-        cas1AcqRelDarwinNolse
-else if (has_lse)
-    cas1AcqRelNondarwinLse
-else
-    cas1AcqRelNondarwinNolse;
-fn swp1AcqRelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpalb  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp1AcqRelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp1AcqRelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpalb  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp1AcqRelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        stlxrb   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp1_acq_rel = if (is_darwin)
-    if (has_lse)
-        swp1AcqRelDarwinLse
-    else
-        swp1AcqRelDarwinNolse
-else if (has_lse)
-    swp1AcqRelNondarwinLse
-else
-    swp1AcqRelNondarwinNolse;
-fn ldadd1AcqRelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddalb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd1AcqRelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd1AcqRelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddalb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd1AcqRelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd1_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldadd1AcqRelDarwinLse
-    else
-        ldadd1AcqRelDarwinNolse
-else if (has_lse)
-    ldadd1AcqRelNondarwinLse
-else
-    ldadd1AcqRelNondarwinNolse;
-fn ldclr1AcqRelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclralb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr1AcqRelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr1AcqRelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclralb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr1AcqRelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr1_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldclr1AcqRelDarwinLse
-    else
-        ldclr1AcqRelDarwinNolse
-else if (has_lse)
-    ldclr1AcqRelNondarwinLse
-else
-    ldclr1AcqRelNondarwinNolse;
-fn ldeor1AcqRelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeoralb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor1AcqRelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor1AcqRelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeoralb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor1AcqRelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor1_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldeor1AcqRelDarwinLse
-    else
-        ldeor1AcqRelDarwinNolse
-else if (has_lse)
-    ldeor1AcqRelNondarwinLse
-else
-    ldeor1AcqRelNondarwinNolse;
-fn ldset1AcqRelDarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetalb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset1AcqRelDarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset1AcqRelNondarwinLse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetalb w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset1AcqRelNondarwinNolse(val: u8, ptr: *u8) callconv(.C) u8 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x00000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrb   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrb   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u8),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset1_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldset1AcqRelDarwinLse
-    else
-        ldset1AcqRelDarwinNolse
-else if (has_lse)
-    ldset1AcqRelNondarwinLse
-else
-    ldset1AcqRelNondarwinNolse;
-fn cas2RelaxDarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        cash w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2RelaxDarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x000000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2RelaxNondarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        cash w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2RelaxNondarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x000000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas2_relax = if (is_darwin)
-    if (has_lse)
-        cas2RelaxDarwinLse
-    else
-        cas2RelaxDarwinNolse
-else if (has_lse)
-    cas2RelaxNondarwinLse
-else
-    cas2RelaxNondarwinNolse;
-fn swp2RelaxDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swph  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2RelaxDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2RelaxNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swph  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2RelaxNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp2_relax = if (is_darwin)
-    if (has_lse)
-        swp2RelaxDarwinLse
-    else
-        swp2RelaxDarwinNolse
-else if (has_lse)
-    swp2RelaxNondarwinLse
-else
-    swp2RelaxNondarwinNolse;
-fn ldadd2RelaxDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2RelaxDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2RelaxNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2RelaxNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd2_relax = if (is_darwin)
-    if (has_lse)
-        ldadd2RelaxDarwinLse
-    else
-        ldadd2RelaxDarwinNolse
-else if (has_lse)
-    ldadd2RelaxNondarwinLse
-else
-    ldadd2RelaxNondarwinNolse;
-fn ldclr2RelaxDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2RelaxDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2RelaxNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2RelaxNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr2_relax = if (is_darwin)
-    if (has_lse)
-        ldclr2RelaxDarwinLse
-    else
-        ldclr2RelaxDarwinNolse
-else if (has_lse)
-    ldclr2RelaxNondarwinLse
-else
-    ldclr2RelaxNondarwinNolse;
-fn ldeor2RelaxDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2RelaxDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2RelaxNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2RelaxNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor2_relax = if (is_darwin)
-    if (has_lse)
-        ldeor2RelaxDarwinLse
-    else
-        ldeor2RelaxDarwinNolse
-else if (has_lse)
-    ldeor2RelaxNondarwinLse
-else
-    ldeor2RelaxNondarwinNolse;
-fn ldset2RelaxDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldseth w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2RelaxDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2RelaxNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldseth w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2RelaxNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset2_relax = if (is_darwin)
-    if (has_lse)
-        ldset2RelaxDarwinLse
-    else
-        ldset2RelaxDarwinNolse
-else if (has_lse)
-    ldset2RelaxNondarwinLse
-else
-    ldset2RelaxNondarwinNolse;
-fn cas2AcqDarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casah w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2AcqDarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x400000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2AcqNondarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casah w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2AcqNondarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x400000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas2_acq = if (is_darwin)
-    if (has_lse)
-        cas2AcqDarwinLse
-    else
-        cas2AcqDarwinNolse
-else if (has_lse)
-    cas2AcqNondarwinLse
-else
-    cas2AcqNondarwinNolse;
-fn swp2AcqDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpah  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2AcqDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2AcqNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpah  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2AcqNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp2_acq = if (is_darwin)
-    if (has_lse)
-        swp2AcqDarwinLse
-    else
-        swp2AcqDarwinNolse
-else if (has_lse)
-    swp2AcqNondarwinLse
-else
-    swp2AcqNondarwinNolse;
-fn ldadd2AcqDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2AcqDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2AcqNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2AcqNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd2_acq = if (is_darwin)
-    if (has_lse)
-        ldadd2AcqDarwinLse
-    else
-        ldadd2AcqDarwinNolse
-else if (has_lse)
-    ldadd2AcqNondarwinLse
-else
-    ldadd2AcqNondarwinNolse;
-fn ldclr2AcqDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2AcqDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2AcqNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2AcqNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr2_acq = if (is_darwin)
-    if (has_lse)
-        ldclr2AcqDarwinLse
-    else
-        ldclr2AcqDarwinNolse
-else if (has_lse)
-    ldclr2AcqNondarwinLse
-else
-    ldclr2AcqNondarwinNolse;
-fn ldeor2AcqDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2AcqDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2AcqNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2AcqNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor2_acq = if (is_darwin)
-    if (has_lse)
-        ldeor2AcqDarwinLse
-    else
-        ldeor2AcqDarwinNolse
-else if (has_lse)
-    ldeor2AcqNondarwinLse
-else
-    ldeor2AcqNondarwinNolse;
-fn ldset2AcqDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2AcqDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2AcqNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetah w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2AcqNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset2_acq = if (is_darwin)
-    if (has_lse)
-        ldset2AcqDarwinLse
-    else
-        ldset2AcqDarwinNolse
-else if (has_lse)
-    ldset2AcqNondarwinLse
-else
-    ldset2AcqNondarwinNolse;
-fn cas2RelDarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        caslh w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2RelDarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x008000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2RelNondarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        caslh w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2RelNondarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x008000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas2_rel = if (is_darwin)
-    if (has_lse)
-        cas2RelDarwinLse
-    else
-        cas2RelDarwinNolse
-else if (has_lse)
-    cas2RelNondarwinLse
-else
-    cas2RelNondarwinNolse;
-fn swp2RelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swplh  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2RelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2RelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swplh  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2RelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp2_rel = if (is_darwin)
-    if (has_lse)
-        swp2RelDarwinLse
-    else
-        swp2RelDarwinNolse
-else if (has_lse)
-    swp2RelNondarwinLse
-else
-    swp2RelNondarwinNolse;
-fn ldadd2RelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2RelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2RelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2RelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd2_rel = if (is_darwin)
-    if (has_lse)
-        ldadd2RelDarwinLse
-    else
-        ldadd2RelDarwinNolse
-else if (has_lse)
-    ldadd2RelNondarwinLse
-else
-    ldadd2RelNondarwinNolse;
-fn ldclr2RelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2RelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2RelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2RelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr2_rel = if (is_darwin)
-    if (has_lse)
-        ldclr2RelDarwinLse
-    else
-        ldclr2RelDarwinNolse
-else if (has_lse)
-    ldclr2RelNondarwinLse
-else
-    ldclr2RelNondarwinNolse;
-fn ldeor2RelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2RelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2RelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2RelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor2_rel = if (is_darwin)
-    if (has_lse)
-        ldeor2RelDarwinLse
-    else
-        ldeor2RelDarwinNolse
-else if (has_lse)
-    ldeor2RelNondarwinLse
-else
-    ldeor2RelNondarwinNolse;
-fn ldset2RelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2RelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2RelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetlh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2RelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset2_rel = if (is_darwin)
-    if (has_lse)
-        ldset2RelDarwinLse
-    else
-        ldset2RelDarwinNolse
-else if (has_lse)
-    ldset2RelNondarwinLse
-else
-    ldset2RelNondarwinNolse;
-fn cas2AcqRelDarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casalh w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2AcqRelDarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x408000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2AcqRelNondarwinLse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casalh w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas2AcqRelNondarwinNolse(expected: u16, desired: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x40000000 + 0x408000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        uxth    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxrh   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas2_acq_rel = if (is_darwin)
-    if (has_lse)
-        cas2AcqRelDarwinLse
-    else
-        cas2AcqRelDarwinNolse
-else if (has_lse)
-    cas2AcqRelNondarwinLse
-else
-    cas2AcqRelNondarwinNolse;
-fn swp2AcqRelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpalh  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2AcqRelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2AcqRelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpalh  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp2AcqRelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        stlxrh   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp2_acq_rel = if (is_darwin)
-    if (has_lse)
-        swp2AcqRelDarwinLse
-    else
-        swp2AcqRelDarwinNolse
-else if (has_lse)
-    swp2AcqRelNondarwinLse
-else
-    swp2AcqRelNondarwinNolse;
-fn ldadd2AcqRelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddalh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2AcqRelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2AcqRelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddalh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd2AcqRelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd2_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldadd2AcqRelDarwinLse
-    else
-        ldadd2AcqRelDarwinNolse
-else if (has_lse)
-    ldadd2AcqRelNondarwinLse
-else
-    ldadd2AcqRelNondarwinNolse;
-fn ldclr2AcqRelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclralh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2AcqRelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2AcqRelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclralh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr2AcqRelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr2_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldclr2AcqRelDarwinLse
-    else
-        ldclr2AcqRelDarwinNolse
-else if (has_lse)
-    ldclr2AcqRelNondarwinLse
-else
-    ldclr2AcqRelNondarwinNolse;
-fn ldeor2AcqRelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeoralh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2AcqRelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2AcqRelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeoralh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor2AcqRelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor2_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldeor2AcqRelDarwinLse
-    else
-        ldeor2AcqRelDarwinNolse
-else if (has_lse)
-    ldeor2AcqRelNondarwinLse
-else
-    ldeor2AcqRelNondarwinNolse;
-fn ldset2AcqRelDarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetalh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2AcqRelDarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2AcqRelNondarwinLse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetalh w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset2AcqRelNondarwinNolse(val: u16, ptr: *u16) callconv(.C) u16 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x40000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxrh   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxrh   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u16),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset2_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldset2AcqRelDarwinLse
-    else
-        ldset2AcqRelDarwinNolse
-else if (has_lse)
-    ldset2AcqRelNondarwinLse
-else
-    ldset2AcqRelNondarwinNolse;
-fn cas4RelaxDarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        cas w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4RelaxDarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x000000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4RelaxNondarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        cas w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4RelaxNondarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x000000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas4_relax = if (is_darwin)
-    if (has_lse)
-        cas4RelaxDarwinLse
-    else
-        cas4RelaxDarwinNolse
-else if (has_lse)
-    cas4RelaxNondarwinLse
-else
-    cas4RelaxNondarwinNolse;
-fn swp4RelaxDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swp  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4RelaxDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4RelaxNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swp  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4RelaxNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp4_relax = if (is_darwin)
-    if (has_lse)
-        swp4RelaxDarwinLse
-    else
-        swp4RelaxDarwinNolse
-else if (has_lse)
-    swp4RelaxNondarwinLse
-else
-    swp4RelaxNondarwinNolse;
-fn ldadd4RelaxDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadd w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4RelaxDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4RelaxNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadd w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4RelaxNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd4_relax = if (is_darwin)
-    if (has_lse)
-        ldadd4RelaxDarwinLse
-    else
-        ldadd4RelaxDarwinNolse
-else if (has_lse)
-    ldadd4RelaxNondarwinLse
-else
-    ldadd4RelaxNondarwinNolse;
-fn ldclr4RelaxDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclr w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4RelaxDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4RelaxNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclr w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4RelaxNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr4_relax = if (is_darwin)
-    if (has_lse)
-        ldclr4RelaxDarwinLse
-    else
-        ldclr4RelaxDarwinNolse
-else if (has_lse)
-    ldclr4RelaxNondarwinLse
-else
-    ldclr4RelaxNondarwinNolse;
-fn ldeor4RelaxDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeor w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4RelaxDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4RelaxNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeor w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4RelaxNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor4_relax = if (is_darwin)
-    if (has_lse)
-        ldeor4RelaxDarwinLse
-    else
-        ldeor4RelaxDarwinNolse
-else if (has_lse)
-    ldeor4RelaxNondarwinLse
-else
-    ldeor4RelaxNondarwinNolse;
-fn ldset4RelaxDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldset w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4RelaxDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4RelaxNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldset w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4RelaxNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset4_relax = if (is_darwin)
-    if (has_lse)
-        ldset4RelaxDarwinLse
-    else
-        ldset4RelaxDarwinNolse
-else if (has_lse)
-    ldset4RelaxNondarwinLse
-else
-    ldset4RelaxNondarwinNolse;
-fn cas4AcqDarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casa w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4AcqDarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x400000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4AcqNondarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casa w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4AcqNondarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x400000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas4_acq = if (is_darwin)
-    if (has_lse)
-        cas4AcqDarwinLse
-    else
-        cas4AcqDarwinNolse
-else if (has_lse)
-    cas4AcqNondarwinLse
-else
-    cas4AcqNondarwinNolse;
-fn swp4AcqDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpa  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4AcqDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4AcqNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpa  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4AcqNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp4_acq = if (is_darwin)
-    if (has_lse)
-        swp4AcqDarwinLse
-    else
-        swp4AcqDarwinNolse
-else if (has_lse)
-    swp4AcqNondarwinLse
-else
-    swp4AcqNondarwinNolse;
-fn ldadd4AcqDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadda w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4AcqDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4AcqNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadda w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4AcqNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd4_acq = if (is_darwin)
-    if (has_lse)
-        ldadd4AcqDarwinLse
-    else
-        ldadd4AcqDarwinNolse
-else if (has_lse)
-    ldadd4AcqNondarwinLse
-else
-    ldadd4AcqNondarwinNolse;
-fn ldclr4AcqDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclra w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4AcqDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4AcqNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclra w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4AcqNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr4_acq = if (is_darwin)
-    if (has_lse)
-        ldclr4AcqDarwinLse
-    else
-        ldclr4AcqDarwinNolse
-else if (has_lse)
-    ldclr4AcqNondarwinLse
-else
-    ldclr4AcqNondarwinNolse;
-fn ldeor4AcqDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeora w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4AcqDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4AcqNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeora w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4AcqNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor4_acq = if (is_darwin)
-    if (has_lse)
-        ldeor4AcqDarwinLse
-    else
-        ldeor4AcqDarwinNolse
-else if (has_lse)
-    ldeor4AcqNondarwinLse
-else
-    ldeor4AcqNondarwinNolse;
-fn ldset4AcqDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldseta w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4AcqDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4AcqNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldseta w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4AcqNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset4_acq = if (is_darwin)
-    if (has_lse)
-        ldset4AcqDarwinLse
-    else
-        ldset4AcqDarwinNolse
-else if (has_lse)
-    ldset4AcqNondarwinLse
-else
-    ldset4AcqNondarwinNolse;
-fn cas4RelDarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casl w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4RelDarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x008000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4RelNondarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casl w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4RelNondarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x008000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas4_rel = if (is_darwin)
-    if (has_lse)
-        cas4RelDarwinLse
-    else
-        cas4RelDarwinNolse
-else if (has_lse)
-    cas4RelNondarwinLse
-else
-    cas4RelNondarwinNolse;
-fn swp4RelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpl  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4RelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4RelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpl  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4RelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp4_rel = if (is_darwin)
-    if (has_lse)
-        swp4RelDarwinLse
-    else
-        swp4RelDarwinNolse
-else if (has_lse)
-    swp4RelNondarwinLse
-else
-    swp4RelNondarwinNolse;
-fn ldadd4RelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4RelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4RelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4RelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd4_rel = if (is_darwin)
-    if (has_lse)
-        ldadd4RelDarwinLse
-    else
-        ldadd4RelDarwinNolse
-else if (has_lse)
-    ldadd4RelNondarwinLse
-else
-    ldadd4RelNondarwinNolse;
-fn ldclr4RelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4RelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4RelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4RelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr4_rel = if (is_darwin)
-    if (has_lse)
-        ldclr4RelDarwinLse
-    else
-        ldclr4RelDarwinNolse
-else if (has_lse)
-    ldclr4RelNondarwinLse
-else
-    ldclr4RelNondarwinNolse;
-fn ldeor4RelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4RelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4RelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4RelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor4_rel = if (is_darwin)
-    if (has_lse)
-        ldeor4RelDarwinLse
-    else
-        ldeor4RelDarwinNolse
-else if (has_lse)
-    ldeor4RelNondarwinLse
-else
-    ldeor4RelNondarwinNolse;
-fn ldset4RelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4RelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4RelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetl w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4RelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset4_rel = if (is_darwin)
-    if (has_lse)
-        ldset4RelDarwinLse
-    else
-        ldset4RelDarwinNolse
-else if (has_lse)
-    ldset4RelNondarwinLse
-else
-    ldset4RelNondarwinNolse;
-fn cas4AcqRelDarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casal w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4AcqRelDarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x408000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4AcqRelNondarwinLse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casal w0, w1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas4AcqRelNondarwinNolse(expected: u32, desired: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0x80000000 + 0x408000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x2]
-        \\        cmp    w0, w16
-        \\        bne    1f
-        \\        stlxr   w17, w1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [expected] "{w0}" (expected),
-          [desired] "{w1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas4_acq_rel = if (is_darwin)
-    if (has_lse)
-        cas4AcqRelDarwinLse
-    else
-        cas4AcqRelDarwinNolse
-else if (has_lse)
-    cas4AcqRelNondarwinLse
-else
-    cas4AcqRelNondarwinNolse;
-fn swp4AcqRelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpal  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4AcqRelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4AcqRelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpal  w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp4AcqRelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        stlxr   w17, w16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp4_acq_rel = if (is_darwin)
-    if (has_lse)
-        swp4AcqRelDarwinLse
-    else
-        swp4AcqRelDarwinNolse
-else if (has_lse)
-    swp4AcqRelNondarwinLse
-else
-    swp4AcqRelNondarwinNolse;
-fn ldadd4AcqRelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddal w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4AcqRelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4AcqRelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddal w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd4AcqRelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        add     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd4_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldadd4AcqRelDarwinLse
-    else
-        ldadd4AcqRelDarwinNolse
-else if (has_lse)
-    ldadd4AcqRelNondarwinLse
-else
-    ldadd4AcqRelNondarwinNolse;
-fn ldclr4AcqRelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclral w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4AcqRelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4AcqRelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclral w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr4AcqRelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        bic     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr4_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldclr4AcqRelDarwinLse
-    else
-        ldclr4AcqRelDarwinNolse
-else if (has_lse)
-    ldclr4AcqRelNondarwinLse
-else
-    ldclr4AcqRelNondarwinNolse;
-fn ldeor4AcqRelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeoral w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4AcqRelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4AcqRelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeoral w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor4AcqRelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        eor     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor4_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldeor4AcqRelDarwinLse
-    else
-        ldeor4AcqRelDarwinNolse
-else if (has_lse)
-    ldeor4AcqRelNondarwinLse
-else
-    ldeor4AcqRelNondarwinNolse;
-fn ldset4AcqRelDarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetal w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4AcqRelDarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4AcqRelNondarwinLse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetal w0, w0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset4AcqRelNondarwinNolse(val: u32, ptr: *u32) callconv(.C) u32 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    w16, w0
-        \\0:
-        \\        ldaxr   w0, [x1]
-        \\        orr     w17, w0, w16
-        \\        stlxr   w15, w17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={w0}" (-> u32),
-        : [val] "{w0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset4_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldset4AcqRelDarwinLse
-    else
-        ldset4AcqRelDarwinNolse
-else if (has_lse)
-    ldset4AcqRelNondarwinLse
-else
-    ldset4AcqRelNondarwinNolse;
-fn cas8RelaxDarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        cas x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8RelaxDarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x000000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8RelaxNondarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        cas x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8RelaxNondarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x000000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas8_relax = if (is_darwin)
-    if (has_lse)
-        cas8RelaxDarwinLse
-    else
-        cas8RelaxDarwinNolse
-else if (has_lse)
-    cas8RelaxNondarwinLse
-else
-    cas8RelaxNondarwinNolse;
-fn swp8RelaxDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swp  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8RelaxDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8RelaxNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swp  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8RelaxNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp8_relax = if (is_darwin)
-    if (has_lse)
-        swp8RelaxDarwinLse
-    else
-        swp8RelaxDarwinNolse
-else if (has_lse)
-    swp8RelaxNondarwinLse
-else
-    swp8RelaxNondarwinNolse;
-fn ldadd8RelaxDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadd x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8RelaxDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8RelaxNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadd x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8RelaxNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd8_relax = if (is_darwin)
-    if (has_lse)
-        ldadd8RelaxDarwinLse
-    else
-        ldadd8RelaxDarwinNolse
-else if (has_lse)
-    ldadd8RelaxNondarwinLse
-else
-    ldadd8RelaxNondarwinNolse;
-fn ldclr8RelaxDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclr x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8RelaxDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8RelaxNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclr x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8RelaxNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr8_relax = if (is_darwin)
-    if (has_lse)
-        ldclr8RelaxDarwinLse
-    else
-        ldclr8RelaxDarwinNolse
-else if (has_lse)
-    ldclr8RelaxNondarwinLse
-else
-    ldclr8RelaxNondarwinNolse;
-fn ldeor8RelaxDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeor x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8RelaxDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8RelaxNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeor x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8RelaxNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor8_relax = if (is_darwin)
-    if (has_lse)
-        ldeor8RelaxDarwinLse
-    else
-        ldeor8RelaxDarwinNolse
-else if (has_lse)
-    ldeor8RelaxNondarwinLse
-else
-    ldeor8RelaxNondarwinNolse;
-fn ldset8RelaxDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldset x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8RelaxDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8RelaxNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldset x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8RelaxNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x000000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset8_relax = if (is_darwin)
-    if (has_lse)
-        ldset8RelaxDarwinLse
-    else
-        ldset8RelaxDarwinNolse
-else if (has_lse)
-    ldset8RelaxNondarwinLse
-else
-    ldset8RelaxNondarwinNolse;
-fn cas8AcqDarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casa x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8AcqDarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x400000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8AcqNondarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casa x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8AcqNondarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x400000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas8_acq = if (is_darwin)
-    if (has_lse)
-        cas8AcqDarwinLse
-    else
-        cas8AcqDarwinNolse
-else if (has_lse)
-    cas8AcqNondarwinLse
-else
-    cas8AcqNondarwinNolse;
-fn swp8AcqDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpa  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8AcqDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8AcqNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpa  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8AcqNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp8_acq = if (is_darwin)
-    if (has_lse)
-        swp8AcqDarwinLse
-    else
-        swp8AcqDarwinNolse
-else if (has_lse)
-    swp8AcqNondarwinLse
-else
-    swp8AcqNondarwinNolse;
-fn ldadd8AcqDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadda x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8AcqDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8AcqNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldadda x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8AcqNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd8_acq = if (is_darwin)
-    if (has_lse)
-        ldadd8AcqDarwinLse
-    else
-        ldadd8AcqDarwinNolse
-else if (has_lse)
-    ldadd8AcqNondarwinLse
-else
-    ldadd8AcqNondarwinNolse;
-fn ldclr8AcqDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclra x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8AcqDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8AcqNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclra x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8AcqNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr8_acq = if (is_darwin)
-    if (has_lse)
-        ldclr8AcqDarwinLse
-    else
-        ldclr8AcqDarwinNolse
-else if (has_lse)
-    ldclr8AcqNondarwinLse
-else
-    ldclr8AcqNondarwinNolse;
-fn ldeor8AcqDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeora x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8AcqDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8AcqNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeora x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8AcqNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor8_acq = if (is_darwin)
-    if (has_lse)
-        ldeor8AcqDarwinLse
-    else
-        ldeor8AcqDarwinNolse
-else if (has_lse)
-    ldeor8AcqNondarwinLse
-else
-    ldeor8AcqNondarwinNolse;
-fn ldset8AcqDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldseta x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8AcqDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8AcqNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldseta x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8AcqNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x800000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset8_acq = if (is_darwin)
-    if (has_lse)
-        ldset8AcqDarwinLse
-    else
-        ldset8AcqDarwinNolse
-else if (has_lse)
-    ldset8AcqNondarwinLse
-else
-    ldset8AcqNondarwinNolse;
-fn cas8RelDarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casl x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8RelDarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x008000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8RelNondarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casl x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8RelNondarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x008000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_cas8_rel = if (is_darwin)
-    if (has_lse)
-        cas8RelDarwinLse
-    else
-        cas8RelDarwinNolse
-else if (has_lse)
-    cas8RelNondarwinLse
-else
-    cas8RelNondarwinNolse;
-fn swp8RelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpl  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8RelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8RelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpl  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8RelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_swp8_rel = if (is_darwin)
-    if (has_lse)
-        swp8RelDarwinLse
-    else
-        swp8RelDarwinNolse
-else if (has_lse)
-    swp8RelNondarwinLse
-else
-    swp8RelNondarwinNolse;
-fn ldadd8RelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8RelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8RelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldaddl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldadd8RelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldadd8_rel = if (is_darwin)
-    if (has_lse)
-        ldadd8RelDarwinLse
-    else
-        ldadd8RelDarwinNolse
-else if (has_lse)
-    ldadd8RelNondarwinLse
-else
-    ldadd8RelNondarwinNolse;
-fn ldclr8RelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8RelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8RelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldclrl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldclr8RelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldclr8_rel = if (is_darwin)
-    if (has_lse)
-        ldclr8RelDarwinLse
-    else
-        ldclr8RelDarwinNolse
-else if (has_lse)
-    ldclr8RelNondarwinLse
-else
-    ldclr8RelNondarwinNolse;
-fn ldeor8RelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8RelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8RelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldeorl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldeor8RelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldeor8_rel = if (is_darwin)
-    if (has_lse)
-        ldeor8RelDarwinLse
-    else
-        ldeor8RelDarwinNolse
-else if (has_lse)
-    ldeor8RelNondarwinLse
-else
-    ldeor8RelNondarwinNolse;
-fn ldset8RelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8RelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8RelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        ldsetl x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn ldset8RelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x400000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-const __aarch64_ldset8_rel = if (is_darwin)
-    if (has_lse)
-        ldset8RelDarwinLse
-    else
-        ldset8RelDarwinNolse
-else if (has_lse)
-    ldset8RelNondarwinLse
-else
-    ldset8RelNondarwinNolse;
-fn cas8AcqRelDarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casal x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8AcqRelDarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x408000
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8AcqRelNondarwinLse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        casal x0, x1, [x2]
-        \\
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
-        \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn cas8AcqRelNondarwinNolse(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x08a07c41 + 0xc0000000 + 0x408000
+        \\        .inst 0x08a07c41 + 0x80000000 + 0x408000
         \\
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    x16, x0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxr   x0, [x2]
-        \\        cmp    x0, x16
+        \\        ldaxr   w0, [x2]
+        \\        cmp    w0, w16
         \\        bne    1f
-        \\        stlxr   w17, x1, [x2]
+        \\        stlxr   w17, w1, [x2]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={x0}" (-> u64),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
+        : [ret] "={w0}" (-> u32),
+        : [expected] "{w0}" (expected),
+          [desired] "{w1}" (desired),
           [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas8_acq_rel = if (is_darwin)
-    if (has_lse)
-        cas8AcqRelDarwinLse
-    else
-        cas8AcqRelDarwinNolse
-else if (has_lse)
-    cas8AcqRelNondarwinLse
-else
-    cas8AcqRelNondarwinNolse;
-fn swp8AcqRelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpal  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0xc00000
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8AcqRelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
-    @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
-    return asm volatile (
-        \\        cbz     w16, 8f
-        \\        swpal  x0, x0, [x1]
-        \\        cbz     wzr, 1f
-        \\8:
-        \\        mov    x16, x0
-        \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
-        \\        cbnz   w17, 0b
-        \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
-          [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
-        : "w15", "w16", "w17", "memory"
-    );
-}
-fn swp8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_swp4_acq_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38208020 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38208020 + 0x80000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    x16, x0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        stlxr   w17, x16, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        stlxr   w17, w16, [x1]
         \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_swp8_acq_rel = if (is_darwin)
-    if (has_lse)
-        swp8AcqRelDarwinLse
-    else
-        swp8AcqRelDarwinNolse
-else if (has_lse)
-    swp8AcqRelNondarwinLse
-else
-    swp8AcqRelNondarwinNolse;
-fn ldadd8AcqRelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldadd4_acq_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddal x0, x0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0x80000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    x16, x0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        add     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldclr4_acq_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38200020 + 0x1000 + 0x80000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    x16, x0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        bic     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd8AcqRelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldeor4_acq_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldaddal x0, x0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0x80000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    x16, x0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        eor     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldadd8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldset4_acq_rel(val: u32, ptr: *u32) callconv(.C) u32 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38200020 + 0x3000 + 0x80000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
-        \\        mov    x16, x0
+        \\        mov    w16, w0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        add     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        ldaxr   w0, [x1]
+        \\        orr     w17, w0, w16
+        \\        stlxr   w15, w17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
+        : [ret] "={w0}" (-> u32),
+        : [val] "{w0}" (val),
           [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldadd8_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldadd8AcqRelDarwinLse
-    else
-        ldadd8AcqRelDarwinNolse
-else if (has_lse)
-    ldadd8AcqRelNondarwinLse
-else
-    ldadd8AcqRelNondarwinNolse;
-fn ldclr8AcqRelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_cas8_relax(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclral x0, x0, [x1]
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x000000
+        \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldxr   x0, [x2]
+        \\        cmp    x0, x16
+        \\        bne    1f
+        \\        stxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [expected] "{x0}" (expected),
+          [desired] "{x1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_swp8_relax(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38208020 + 0xc0000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldxr   x0, [x1]
+        \\        stxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
         : [val] "{x0}" (val),
@@ -9578,20 +1590,18 @@ fn ldclr8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr8AcqRelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldadd8_relax(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldclral x0, x0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        ldxr   x0, [x1]
+        \\        add     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9601,20 +1611,18 @@ fn ldclr8AcqRelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldclr8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldclr8_relax(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
-        \\        ldaxr   x0, [x1]
+        \\        ldxr   x0, [x1]
         \\        bic     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9624,29 +1632,18 @@ fn ldclr8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldclr8_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldclr8AcqRelDarwinLse
-    else
-        ldclr8AcqRelDarwinNolse
-else if (has_lse)
-    ldclr8AcqRelNondarwinLse
-else
-    ldclr8AcqRelNondarwinNolse;
-fn ldeor8AcqRelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldeor8_relax(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeoral x0, x0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
-        \\        ldaxr   x0, [x1]
+        \\        ldxr   x0, [x1]
         \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9656,20 +1653,18 @@ fn ldeor8AcqRelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldset8_relax(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x000000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        ldxr   x0, [x1]
+        \\        orr     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9679,44 +1674,42 @@ fn ldeor8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor8AcqRelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_cas8_acq(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldeoral x0, x0, [x1]
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x400000
+        \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
-        \\        ldaxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
+        \\        ldaxr   x0, [x2]
+        \\        cmp    x0, x16
+        \\        bne    1f
+        \\        stxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
-        : [val] "{x0}" (val),
-          [ptr] "{x1}" (ptr),
+        : [expected] "{x0}" (expected),
+          [desired] "{x1}" (desired),
+          [ptr] "{x2}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldeor8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_swp8_acq(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38208020 + 0xc0000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
         \\        ldaxr   x0, [x1]
-        \\        eor     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
-        \\        cbnz   w15, 0b
+        \\        stxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
         : [val] "{x0}" (val),
@@ -9725,29 +1718,18 @@ fn ldeor8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldeor8_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldeor8AcqRelDarwinLse
-    else
-        ldeor8AcqRelDarwinNolse
-else if (has_lse)
-    ldeor8AcqRelNondarwinLse
-else
-    ldeor8AcqRelNondarwinNolse;
-fn ldset8AcqRelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldadd8_acq(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldsetal x0, x0, [x1]
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
         \\        ldaxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        add     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9757,20 +1739,18 @@ fn ldset8AcqRelDarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldclr8_acq(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
         \\        ldaxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        bic     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9780,20 +1760,18 @@ fn ldset8AcqRelDarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset8AcqRelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldeor8_acq(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        ldsetal x0, x0, [x1]
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
         \\        ldaxr   x0, [x1]
-        \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        eor     x17, x0, x16
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9803,20 +1781,18 @@ fn ldset8AcqRelNondarwinLse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-fn ldset8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
+fn __aarch64_ldset8_acq(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0xc00000
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x800000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\0:
         \\        ldaxr   x0, [x1]
         \\        orr     x17, x0, x16
-        \\        stlxr   w15, x17, [x1]
+        \\        stxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u64),
@@ -9826,36 +1802,23 @@ fn ldset8AcqRelNondarwinNolse(val: u64, ptr: *u64) callconv(.C) u64 {
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_ldset8_acq_rel = if (is_darwin)
-    if (has_lse)
-        ldset8AcqRelDarwinLse
-    else
-        ldset8AcqRelDarwinNolse
-else if (has_lse)
-    ldset8AcqRelNondarwinLse
-else
-    ldset8AcqRelNondarwinNolse;
-fn cas16RelaxDarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_cas8_rel(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        casp  x0, x1, x2, x3, [x4]
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x008000
         \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
+        \\        ldxr   x0, [x2]
         \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
         \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
-        \\        cbnz   w15, 0b
+        \\        stlxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
+        : [ret] "={x0}" (-> u64),
         : [expected] "{x0}" (expected),
           [desired] "{x1}" (desired),
           [ptr] "{x2}" (ptr),
@@ -9863,176 +1826,127 @@ fn cas16RelaxDarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16RelaxDarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_swp8_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x48207c82 + 0x000000
-        \\
+        \\        .inst 0x38208020 + 0xc0000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
-        \\        cbnz   w15, 0b
+        \\        ldxr   x0, [x1]
+        \\        stlxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16RelaxNondarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldadd8_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        casp  x0, x1, x2, x3, [x4]
-        \\
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
+        \\        ldxr   x0, [x1]
+        \\        add     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16RelaxNondarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldclr8_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x48207c82 + 0x000000
-        \\
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
+        \\        ldxr   x0, [x1]
+        \\        bic     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas16_relax = if (is_darwin)
-    if (has_lse)
-        cas16RelaxDarwinLse
-    else
-        cas16RelaxDarwinNolse
-else if (has_lse)
-    cas16RelaxNondarwinLse
-else
-    cas16RelaxNondarwinNolse;
-fn cas16AcqDarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldeor8_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caspa  x0, x1, x2, x3, [x4]
-        \\
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldaxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
+        \\        ldxr   x0, [x1]
+        \\        eor     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16AcqDarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldset8_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x48207c82 + 0x400000
-        \\
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0x400000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldaxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
+        \\        ldxr   x0, [x1]
+        \\        orr     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16AcqNondarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_cas8_acq_rel(expected: u64, desired: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caspa  x0, x1, x2, x3, [x4]
+        \\        .inst 0x08a07c41 + 0xc0000000 + 0x408000
         \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldaxp   x0, x1, [x4]
+        \\        ldaxr   x0, [x2]
         \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
         \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
-        \\        cbnz   w15, 0b
+        \\        stlxr   w17, x1, [x2]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
+        : [ret] "={x0}" (-> u64),
         : [expected] "{x0}" (expected),
           [desired] "{x1}" (desired),
           [ptr] "{x2}" (ptr),
@@ -10040,182 +1954,126 @@ fn cas16AcqNondarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C)
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16AcqNondarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_swp8_acq_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x48207c82 + 0x400000
-        \\
+        \\        .inst 0x38208020 + 0xc0000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldaxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stxp   w15, x2, x3, [x4]
-        \\        cbnz   w15, 0b
+        \\        ldaxr   x0, [x1]
+        \\        stlxr   w17, x16, [x1]
+        \\        cbnz   w17, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas16_acq = if (is_darwin)
-    if (has_lse)
-        cas16AcqDarwinLse
-    else
-        cas16AcqDarwinNolse
-else if (has_lse)
-    cas16AcqNondarwinLse
-else
-    cas16AcqNondarwinNolse;
-fn cas16RelDarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldadd8_acq_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caspl  x0, x1, x2, x3, [x4]
-        \\
+        \\        .inst 0x38200020 + 0x0000 + 0xc0000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stlxp   w15, x2, x3, [x4]
+        \\        ldaxr   x0, [x1]
+        \\        add     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16RelDarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldclr8_acq_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x48207c82 + 0x008000
-        \\
+        \\        .inst 0x38200020 + 0x1000 + 0xc0000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stlxp   w15, x2, x3, [x4]
+        \\        ldaxr   x0, [x1]
+        \\        bic     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16RelNondarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldeor8_acq_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caspl  x0, x1, x2, x3, [x4]
-        \\
+        \\        .inst 0x38200020 + 0x2000 + 0xc0000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stlxp   w15, x2, x3, [x4]
+        \\        ldaxr   x0, [x1]
+        \\        eor     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16RelNondarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_ldset8_acq_rel(val: u64, ptr: *u64) callconv(.C) u64 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x48207c82 + 0x008000
-        \\
+        \\        .inst 0x38200020 + 0x3000 + 0xc0000000 + 0xc00000
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
-        \\        mov    x17, x1
         \\0:
-        \\        ldxp   x0, x1, [x4]
-        \\        cmp    x0, x16
-        \\        ccmp   x1, x17, #0, eq
-        \\        bne    1f
-        \\        stlxp   w15, x2, x3, [x4]
+        \\        ldaxr   x0, [x1]
+        \\        orr     x17, x0, x16
+        \\        stlxr   w15, x17, [x1]
         \\        cbnz   w15, 0b
         \\1:
-        : [ret] "={x0}" (-> u128),
-        : [expected] "{x0}" (expected),
-          [desired] "{x1}" (desired),
-          [ptr] "{x2}" (ptr),
+        : [ret] "={x0}" (-> u64),
+        : [val] "{x0}" (val),
+          [ptr] "{x1}" (ptr),
           [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics),
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas16_rel = if (is_darwin)
-    if (has_lse)
-        cas16RelDarwinLse
-    else
-        cas16RelDarwinNolse
-else if (has_lse)
-    cas16RelNondarwinLse
-else
-    cas16RelNondarwinNolse;
-fn cas16AcqRelDarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_cas16_relax(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caspal  x0, x1, x2, x3, [x4]
+        \\        .inst 0x48207c82 + 0x000000
         \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\        mov    x17, x1
         \\0:
-        \\        ldaxp   x0, x1, [x4]
+        \\        ldxp   x0, x1, [x4]
         \\        cmp    x0, x16
         \\        ccmp   x1, x17, #0, eq
         \\        bne    1f
-        \\        stlxp   w15, x2, x3, [x4]
+        \\        stxp   w15, x2, x3, [x4]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u128),
@@ -10226,13 +2084,11 @@ fn cas16AcqRelDarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C)
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16AcqRelDarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_cas16_acq(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        .inst 0x48207c82 + 0x408000
+        \\        .inst 0x48207c82 + 0x400000
         \\
         \\        cbz     wzr, 1f
         \\8:
@@ -10243,7 +2099,7 @@ fn cas16AcqRelDarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C
         \\        cmp    x0, x16
         \\        ccmp   x1, x17, #0, eq
         \\        bne    1f
-        \\        stlxp   w15, x2, x3, [x4]
+        \\        stxp   w15, x2, x3, [x4]
         \\        cbnz   w15, 0b
         \\1:
         : [ret] "={x0}" (-> u128),
@@ -10254,20 +2110,18 @@ fn cas16AcqRelDarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16AcqRelNondarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_cas16_rel(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
-        \\        caspal  x0, x1, x2, x3, [x4]
+        \\        .inst 0x48207c82 + 0x008000
         \\
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    x16, x0
         \\        mov    x17, x1
         \\0:
-        \\        ldaxp   x0, x1, [x4]
+        \\        ldxp   x0, x1, [x4]
         \\        cmp    x0, x16
         \\        ccmp   x1, x17, #0, eq
         \\        bne    1f
@@ -10282,10 +2136,8 @@ fn cas16AcqRelNondarwinLse(expected: u128, desired: u128, ptr: *u128) callconv(.
         : "w15", "w16", "w17", "memory"
     );
 }
-fn cas16AcqRelNondarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
+fn __aarch64_cas16_acq_rel(expected: u128, desired: u128, ptr: *u128) callconv(.C) u128 {
     @setRuntimeSafety(false);
-    __init_aarch64_have_lse_atomics();
-
     return asm volatile (
         \\        cbz     w16, 8f
         \\        .inst 0x48207c82 + 0x408000
@@ -10310,126 +2162,106 @@ fn cas16AcqRelNondarwinNolse(expected: u128, desired: u128, ptr: *u128) callconv
         : "w15", "w16", "w17", "memory"
     );
 }
-const __aarch64_cas16_acq_rel = if (is_darwin)
-    if (has_lse)
-        cas16AcqRelDarwinLse
-    else
-        cas16AcqRelDarwinNolse
-else if (has_lse)
-    cas16AcqRelNondarwinLse
-else
-    cas16AcqRelNondarwinNolse;
-//TODO: Add linksection once implemented and remove init at writeFunction
-fn __init_aarch64_have_lse_atomics() callconv(.C) void {
-    const AT_HWCAP = 16;
-    const HWCAP_ATOMICS = 1 << 8;
-    const hwcap = std.os.linux.getauxval(AT_HWCAP);
-    __aarch64_have_lse_atomics = @boolToInt((hwcap & HWCAP_ATOMICS) != 0);
-}
-
-var __aarch64_have_lse_atomics: u8 = @boolToInt(has_lse);
 
 comptime {
-    if (arch.isAARCH64()) {
-        @export(__aarch64_cas1_relax, .{ .name = "__aarch64_cas1_relax", .linkage = linkage });
-        @export(__aarch64_cas1_acq, .{ .name = "__aarch64_cas1_acq", .linkage = linkage });
-        @export(__aarch64_cas1_rel, .{ .name = "__aarch64_cas1_rel", .linkage = linkage });
-        @export(__aarch64_cas1_acq_rel, .{ .name = "__aarch64_cas1_acq_rel", .linkage = linkage });
-        @export(__aarch64_cas2_relax, .{ .name = "__aarch64_cas2_relax", .linkage = linkage });
-        @export(__aarch64_cas2_acq, .{ .name = "__aarch64_cas2_acq", .linkage = linkage });
-        @export(__aarch64_cas2_rel, .{ .name = "__aarch64_cas2_rel", .linkage = linkage });
-        @export(__aarch64_cas2_acq_rel, .{ .name = "__aarch64_cas2_acq_rel", .linkage = linkage });
-        @export(__aarch64_cas4_relax, .{ .name = "__aarch64_cas4_relax", .linkage = linkage });
-        @export(__aarch64_cas4_acq, .{ .name = "__aarch64_cas4_acq", .linkage = linkage });
-        @export(__aarch64_cas4_rel, .{ .name = "__aarch64_cas4_rel", .linkage = linkage });
-        @export(__aarch64_cas4_acq_rel, .{ .name = "__aarch64_cas4_acq_rel", .linkage = linkage });
-        @export(__aarch64_cas8_relax, .{ .name = "__aarch64_cas8_relax", .linkage = linkage });
-        @export(__aarch64_cas8_acq, .{ .name = "__aarch64_cas8_acq", .linkage = linkage });
-        @export(__aarch64_cas8_rel, .{ .name = "__aarch64_cas8_rel", .linkage = linkage });
-        @export(__aarch64_cas8_acq_rel, .{ .name = "__aarch64_cas8_acq_rel", .linkage = linkage });
-        @export(__aarch64_cas16_relax, .{ .name = "__aarch64_cas16_relax", .linkage = linkage });
-        @export(__aarch64_cas16_acq, .{ .name = "__aarch64_cas16_acq", .linkage = linkage });
-        @export(__aarch64_cas16_rel, .{ .name = "__aarch64_cas16_rel", .linkage = linkage });
-        @export(__aarch64_cas16_acq_rel, .{ .name = "__aarch64_cas16_acq_rel", .linkage = linkage });
-        @export(__aarch64_swp1_relax, .{ .name = "__aarch64_swp1_relax", .linkage = linkage });
-        @export(__aarch64_swp1_acq, .{ .name = "__aarch64_swp1_acq", .linkage = linkage });
-        @export(__aarch64_swp1_rel, .{ .name = "__aarch64_swp1_rel", .linkage = linkage });
-        @export(__aarch64_swp1_acq_rel, .{ .name = "__aarch64_swp1_acq_rel", .linkage = linkage });
-        @export(__aarch64_swp2_relax, .{ .name = "__aarch64_swp2_relax", .linkage = linkage });
-        @export(__aarch64_swp2_acq, .{ .name = "__aarch64_swp2_acq", .linkage = linkage });
-        @export(__aarch64_swp2_rel, .{ .name = "__aarch64_swp2_rel", .linkage = linkage });
-        @export(__aarch64_swp2_acq_rel, .{ .name = "__aarch64_swp2_acq_rel", .linkage = linkage });
-        @export(__aarch64_swp4_relax, .{ .name = "__aarch64_swp4_relax", .linkage = linkage });
-        @export(__aarch64_swp4_acq, .{ .name = "__aarch64_swp4_acq", .linkage = linkage });
-        @export(__aarch64_swp4_rel, .{ .name = "__aarch64_swp4_rel", .linkage = linkage });
-        @export(__aarch64_swp4_acq_rel, .{ .name = "__aarch64_swp4_acq_rel", .linkage = linkage });
-        @export(__aarch64_swp8_relax, .{ .name = "__aarch64_swp8_relax", .linkage = linkage });
-        @export(__aarch64_swp8_acq, .{ .name = "__aarch64_swp8_acq", .linkage = linkage });
-        @export(__aarch64_swp8_rel, .{ .name = "__aarch64_swp8_rel", .linkage = linkage });
-        @export(__aarch64_swp8_acq_rel, .{ .name = "__aarch64_swp8_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldadd1_relax, .{ .name = "__aarch64_ldadd1_relax", .linkage = linkage });
-        @export(__aarch64_ldadd1_acq, .{ .name = "__aarch64_ldadd1_acq", .linkage = linkage });
-        @export(__aarch64_ldadd1_rel, .{ .name = "__aarch64_ldadd1_rel", .linkage = linkage });
-        @export(__aarch64_ldadd1_acq_rel, .{ .name = "__aarch64_ldadd1_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldadd2_relax, .{ .name = "__aarch64_ldadd2_relax", .linkage = linkage });
-        @export(__aarch64_ldadd2_acq, .{ .name = "__aarch64_ldadd2_acq", .linkage = linkage });
-        @export(__aarch64_ldadd2_rel, .{ .name = "__aarch64_ldadd2_rel", .linkage = linkage });
-        @export(__aarch64_ldadd2_acq_rel, .{ .name = "__aarch64_ldadd2_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldadd4_relax, .{ .name = "__aarch64_ldadd4_relax", .linkage = linkage });
-        @export(__aarch64_ldadd4_acq, .{ .name = "__aarch64_ldadd4_acq", .linkage = linkage });
-        @export(__aarch64_ldadd4_rel, .{ .name = "__aarch64_ldadd4_rel", .linkage = linkage });
-        @export(__aarch64_ldadd4_acq_rel, .{ .name = "__aarch64_ldadd4_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldadd8_relax, .{ .name = "__aarch64_ldadd8_relax", .linkage = linkage });
-        @export(__aarch64_ldadd8_acq, .{ .name = "__aarch64_ldadd8_acq", .linkage = linkage });
-        @export(__aarch64_ldadd8_rel, .{ .name = "__aarch64_ldadd8_rel", .linkage = linkage });
-        @export(__aarch64_ldadd8_acq_rel, .{ .name = "__aarch64_ldadd8_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldclr1_relax, .{ .name = "__aarch64_ldclr1_relax", .linkage = linkage });
-        @export(__aarch64_ldclr1_acq, .{ .name = "__aarch64_ldclr1_acq", .linkage = linkage });
-        @export(__aarch64_ldclr1_rel, .{ .name = "__aarch64_ldclr1_rel", .linkage = linkage });
-        @export(__aarch64_ldclr1_acq_rel, .{ .name = "__aarch64_ldclr1_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldclr2_relax, .{ .name = "__aarch64_ldclr2_relax", .linkage = linkage });
-        @export(__aarch64_ldclr2_acq, .{ .name = "__aarch64_ldclr2_acq", .linkage = linkage });
-        @export(__aarch64_ldclr2_rel, .{ .name = "__aarch64_ldclr2_rel", .linkage = linkage });
-        @export(__aarch64_ldclr2_acq_rel, .{ .name = "__aarch64_ldclr2_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldclr4_relax, .{ .name = "__aarch64_ldclr4_relax", .linkage = linkage });
-        @export(__aarch64_ldclr4_acq, .{ .name = "__aarch64_ldclr4_acq", .linkage = linkage });
-        @export(__aarch64_ldclr4_rel, .{ .name = "__aarch64_ldclr4_rel", .linkage = linkage });
-        @export(__aarch64_ldclr4_acq_rel, .{ .name = "__aarch64_ldclr4_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldclr8_relax, .{ .name = "__aarch64_ldclr8_relax", .linkage = linkage });
-        @export(__aarch64_ldclr8_acq, .{ .name = "__aarch64_ldclr8_acq", .linkage = linkage });
-        @export(__aarch64_ldclr8_rel, .{ .name = "__aarch64_ldclr8_rel", .linkage = linkage });
-        @export(__aarch64_ldclr8_acq_rel, .{ .name = "__aarch64_ldclr8_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldeor1_relax, .{ .name = "__aarch64_ldeor1_relax", .linkage = linkage });
-        @export(__aarch64_ldeor1_acq, .{ .name = "__aarch64_ldeor1_acq", .linkage = linkage });
-        @export(__aarch64_ldeor1_rel, .{ .name = "__aarch64_ldeor1_rel", .linkage = linkage });
-        @export(__aarch64_ldeor1_acq_rel, .{ .name = "__aarch64_ldeor1_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldeor2_relax, .{ .name = "__aarch64_ldeor2_relax", .linkage = linkage });
-        @export(__aarch64_ldeor2_acq, .{ .name = "__aarch64_ldeor2_acq", .linkage = linkage });
-        @export(__aarch64_ldeor2_rel, .{ .name = "__aarch64_ldeor2_rel", .linkage = linkage });
-        @export(__aarch64_ldeor2_acq_rel, .{ .name = "__aarch64_ldeor2_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldeor4_relax, .{ .name = "__aarch64_ldeor4_relax", .linkage = linkage });
-        @export(__aarch64_ldeor4_acq, .{ .name = "__aarch64_ldeor4_acq", .linkage = linkage });
-        @export(__aarch64_ldeor4_rel, .{ .name = "__aarch64_ldeor4_rel", .linkage = linkage });
-        @export(__aarch64_ldeor4_acq_rel, .{ .name = "__aarch64_ldeor4_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldeor8_relax, .{ .name = "__aarch64_ldeor8_relax", .linkage = linkage });
-        @export(__aarch64_ldeor8_acq, .{ .name = "__aarch64_ldeor8_acq", .linkage = linkage });
-        @export(__aarch64_ldeor8_rel, .{ .name = "__aarch64_ldeor8_rel", .linkage = linkage });
-        @export(__aarch64_ldeor8_acq_rel, .{ .name = "__aarch64_ldeor8_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldset1_relax, .{ .name = "__aarch64_ldset1_relax", .linkage = linkage });
-        @export(__aarch64_ldset1_acq, .{ .name = "__aarch64_ldset1_acq", .linkage = linkage });
-        @export(__aarch64_ldset1_rel, .{ .name = "__aarch64_ldset1_rel", .linkage = linkage });
-        @export(__aarch64_ldset1_acq_rel, .{ .name = "__aarch64_ldset1_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldset2_relax, .{ .name = "__aarch64_ldset2_relax", .linkage = linkage });
-        @export(__aarch64_ldset2_acq, .{ .name = "__aarch64_ldset2_acq", .linkage = linkage });
-        @export(__aarch64_ldset2_rel, .{ .name = "__aarch64_ldset2_rel", .linkage = linkage });
-        @export(__aarch64_ldset2_acq_rel, .{ .name = "__aarch64_ldset2_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldset4_relax, .{ .name = "__aarch64_ldset4_relax", .linkage = linkage });
-        @export(__aarch64_ldset4_acq, .{ .name = "__aarch64_ldset4_acq", .linkage = linkage });
-        @export(__aarch64_ldset4_rel, .{ .name = "__aarch64_ldset4_rel", .linkage = linkage });
-        @export(__aarch64_ldset4_acq_rel, .{ .name = "__aarch64_ldset4_acq_rel", .linkage = linkage });
-        @export(__aarch64_ldset8_relax, .{ .name = "__aarch64_ldset8_relax", .linkage = linkage });
-        @export(__aarch64_ldset8_acq, .{ .name = "__aarch64_ldset8_acq", .linkage = linkage });
-        @export(__aarch64_ldset8_rel, .{ .name = "__aarch64_ldset8_rel", .linkage = linkage });
-        @export(__aarch64_ldset8_acq_rel, .{ .name = "__aarch64_ldset8_acq_rel", .linkage = linkage });
-    }
+    @export(__aarch64_cas1_relax, .{ .name = "__aarch64_cas1_relax", .linkage = linkage });
+    @export(__aarch64_swp1_relax, .{ .name = "__aarch64_swp1_relax", .linkage = linkage });
+    @export(__aarch64_ldadd1_relax, .{ .name = "__aarch64_ldadd1_relax", .linkage = linkage });
+    @export(__aarch64_ldclr1_relax, .{ .name = "__aarch64_ldclr1_relax", .linkage = linkage });
+    @export(__aarch64_ldeor1_relax, .{ .name = "__aarch64_ldeor1_relax", .linkage = linkage });
+    @export(__aarch64_ldset1_relax, .{ .name = "__aarch64_ldset1_relax", .linkage = linkage });
+    @export(__aarch64_cas1_acq, .{ .name = "__aarch64_cas1_acq", .linkage = linkage });
+    @export(__aarch64_swp1_acq, .{ .name = "__aarch64_swp1_acq", .linkage = linkage });
+    @export(__aarch64_ldadd1_acq, .{ .name = "__aarch64_ldadd1_acq", .linkage = linkage });
+    @export(__aarch64_ldclr1_acq, .{ .name = "__aarch64_ldclr1_acq", .linkage = linkage });
+    @export(__aarch64_ldeor1_acq, .{ .name = "__aarch64_ldeor1_acq", .linkage = linkage });
+    @export(__aarch64_ldset1_acq, .{ .name = "__aarch64_ldset1_acq", .linkage = linkage });
+    @export(__aarch64_cas1_rel, .{ .name = "__aarch64_cas1_rel", .linkage = linkage });
+    @export(__aarch64_swp1_rel, .{ .name = "__aarch64_swp1_rel", .linkage = linkage });
+    @export(__aarch64_ldadd1_rel, .{ .name = "__aarch64_ldadd1_rel", .linkage = linkage });
+    @export(__aarch64_ldclr1_rel, .{ .name = "__aarch64_ldclr1_rel", .linkage = linkage });
+    @export(__aarch64_ldeor1_rel, .{ .name = "__aarch64_ldeor1_rel", .linkage = linkage });
+    @export(__aarch64_ldset1_rel, .{ .name = "__aarch64_ldset1_rel", .linkage = linkage });
+    @export(__aarch64_cas1_acq_rel, .{ .name = "__aarch64_cas1_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp1_acq_rel, .{ .name = "__aarch64_swp1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd1_acq_rel, .{ .name = "__aarch64_ldadd1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr1_acq_rel, .{ .name = "__aarch64_ldclr1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor1_acq_rel, .{ .name = "__aarch64_ldeor1_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset1_acq_rel, .{ .name = "__aarch64_ldset1_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas2_relax, .{ .name = "__aarch64_cas2_relax", .linkage = linkage });
+    @export(__aarch64_swp2_relax, .{ .name = "__aarch64_swp2_relax", .linkage = linkage });
+    @export(__aarch64_ldadd2_relax, .{ .name = "__aarch64_ldadd2_relax", .linkage = linkage });
+    @export(__aarch64_ldclr2_relax, .{ .name = "__aarch64_ldclr2_relax", .linkage = linkage });
+    @export(__aarch64_ldeor2_relax, .{ .name = "__aarch64_ldeor2_relax", .linkage = linkage });
+    @export(__aarch64_ldset2_relax, .{ .name = "__aarch64_ldset2_relax", .linkage = linkage });
+    @export(__aarch64_cas2_acq, .{ .name = "__aarch64_cas2_acq", .linkage = linkage });
+    @export(__aarch64_swp2_acq, .{ .name = "__aarch64_swp2_acq", .linkage = linkage });
+    @export(__aarch64_ldadd2_acq, .{ .name = "__aarch64_ldadd2_acq", .linkage = linkage });
+    @export(__aarch64_ldclr2_acq, .{ .name = "__aarch64_ldclr2_acq", .linkage = linkage });
+    @export(__aarch64_ldeor2_acq, .{ .name = "__aarch64_ldeor2_acq", .linkage = linkage });
+    @export(__aarch64_ldset2_acq, .{ .name = "__aarch64_ldset2_acq", .linkage = linkage });
+    @export(__aarch64_cas2_rel, .{ .name = "__aarch64_cas2_rel", .linkage = linkage });
+    @export(__aarch64_swp2_rel, .{ .name = "__aarch64_swp2_rel", .linkage = linkage });
+    @export(__aarch64_ldadd2_rel, .{ .name = "__aarch64_ldadd2_rel", .linkage = linkage });
+    @export(__aarch64_ldclr2_rel, .{ .name = "__aarch64_ldclr2_rel", .linkage = linkage });
+    @export(__aarch64_ldeor2_rel, .{ .name = "__aarch64_ldeor2_rel", .linkage = linkage });
+    @export(__aarch64_ldset2_rel, .{ .name = "__aarch64_ldset2_rel", .linkage = linkage });
+    @export(__aarch64_cas2_acq_rel, .{ .name = "__aarch64_cas2_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp2_acq_rel, .{ .name = "__aarch64_swp2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd2_acq_rel, .{ .name = "__aarch64_ldadd2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr2_acq_rel, .{ .name = "__aarch64_ldclr2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor2_acq_rel, .{ .name = "__aarch64_ldeor2_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset2_acq_rel, .{ .name = "__aarch64_ldset2_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas4_relax, .{ .name = "__aarch64_cas4_relax", .linkage = linkage });
+    @export(__aarch64_swp4_relax, .{ .name = "__aarch64_swp4_relax", .linkage = linkage });
+    @export(__aarch64_ldadd4_relax, .{ .name = "__aarch64_ldadd4_relax", .linkage = linkage });
+    @export(__aarch64_ldclr4_relax, .{ .name = "__aarch64_ldclr4_relax", .linkage = linkage });
+    @export(__aarch64_ldeor4_relax, .{ .name = "__aarch64_ldeor4_relax", .linkage = linkage });
+    @export(__aarch64_ldset4_relax, .{ .name = "__aarch64_ldset4_relax", .linkage = linkage });
+    @export(__aarch64_cas4_acq, .{ .name = "__aarch64_cas4_acq", .linkage = linkage });
+    @export(__aarch64_swp4_acq, .{ .name = "__aarch64_swp4_acq", .linkage = linkage });
+    @export(__aarch64_ldadd4_acq, .{ .name = "__aarch64_ldadd4_acq", .linkage = linkage });
+    @export(__aarch64_ldclr4_acq, .{ .name = "__aarch64_ldclr4_acq", .linkage = linkage });
+    @export(__aarch64_ldeor4_acq, .{ .name = "__aarch64_ldeor4_acq", .linkage = linkage });
+    @export(__aarch64_ldset4_acq, .{ .name = "__aarch64_ldset4_acq", .linkage = linkage });
+    @export(__aarch64_cas4_rel, .{ .name = "__aarch64_cas4_rel", .linkage = linkage });
+    @export(__aarch64_swp4_rel, .{ .name = "__aarch64_swp4_rel", .linkage = linkage });
+    @export(__aarch64_ldadd4_rel, .{ .name = "__aarch64_ldadd4_rel", .linkage = linkage });
+    @export(__aarch64_ldclr4_rel, .{ .name = "__aarch64_ldclr4_rel", .linkage = linkage });
+    @export(__aarch64_ldeor4_rel, .{ .name = "__aarch64_ldeor4_rel", .linkage = linkage });
+    @export(__aarch64_ldset4_rel, .{ .name = "__aarch64_ldset4_rel", .linkage = linkage });
+    @export(__aarch64_cas4_acq_rel, .{ .name = "__aarch64_cas4_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp4_acq_rel, .{ .name = "__aarch64_swp4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd4_acq_rel, .{ .name = "__aarch64_ldadd4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr4_acq_rel, .{ .name = "__aarch64_ldclr4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor4_acq_rel, .{ .name = "__aarch64_ldeor4_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset4_acq_rel, .{ .name = "__aarch64_ldset4_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas8_relax, .{ .name = "__aarch64_cas8_relax", .linkage = linkage });
+    @export(__aarch64_swp8_relax, .{ .name = "__aarch64_swp8_relax", .linkage = linkage });
+    @export(__aarch64_ldadd8_relax, .{ .name = "__aarch64_ldadd8_relax", .linkage = linkage });
+    @export(__aarch64_ldclr8_relax, .{ .name = "__aarch64_ldclr8_relax", .linkage = linkage });
+    @export(__aarch64_ldeor8_relax, .{ .name = "__aarch64_ldeor8_relax", .linkage = linkage });
+    @export(__aarch64_ldset8_relax, .{ .name = "__aarch64_ldset8_relax", .linkage = linkage });
+    @export(__aarch64_cas8_acq, .{ .name = "__aarch64_cas8_acq", .linkage = linkage });
+    @export(__aarch64_swp8_acq, .{ .name = "__aarch64_swp8_acq", .linkage = linkage });
+    @export(__aarch64_ldadd8_acq, .{ .name = "__aarch64_ldadd8_acq", .linkage = linkage });
+    @export(__aarch64_ldclr8_acq, .{ .name = "__aarch64_ldclr8_acq", .linkage = linkage });
+    @export(__aarch64_ldeor8_acq, .{ .name = "__aarch64_ldeor8_acq", .linkage = linkage });
+    @export(__aarch64_ldset8_acq, .{ .name = "__aarch64_ldset8_acq", .linkage = linkage });
+    @export(__aarch64_cas8_rel, .{ .name = "__aarch64_cas8_rel", .linkage = linkage });
+    @export(__aarch64_swp8_rel, .{ .name = "__aarch64_swp8_rel", .linkage = linkage });
+    @export(__aarch64_ldadd8_rel, .{ .name = "__aarch64_ldadd8_rel", .linkage = linkage });
+    @export(__aarch64_ldclr8_rel, .{ .name = "__aarch64_ldclr8_rel", .linkage = linkage });
+    @export(__aarch64_ldeor8_rel, .{ .name = "__aarch64_ldeor8_rel", .linkage = linkage });
+    @export(__aarch64_ldset8_rel, .{ .name = "__aarch64_ldset8_rel", .linkage = linkage });
+    @export(__aarch64_cas8_acq_rel, .{ .name = "__aarch64_cas8_acq_rel", .linkage = linkage });
+    @export(__aarch64_swp8_acq_rel, .{ .name = "__aarch64_swp8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldadd8_acq_rel, .{ .name = "__aarch64_ldadd8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldclr8_acq_rel, .{ .name = "__aarch64_ldclr8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldeor8_acq_rel, .{ .name = "__aarch64_ldeor8_acq_rel", .linkage = linkage });
+    @export(__aarch64_ldset8_acq_rel, .{ .name = "__aarch64_ldset8_acq_rel", .linkage = linkage });
+    @export(__aarch64_cas16_relax, .{ .name = "__aarch64_cas16_relax", .linkage = linkage });
+    @export(__aarch64_cas16_acq, .{ .name = "__aarch64_cas16_acq", .linkage = linkage });
+    @export(__aarch64_cas16_rel, .{ .name = "__aarch64_cas16_rel", .linkage = linkage });
+    @export(__aarch64_cas16_acq_rel, .{ .name = "__aarch64_cas16_acq_rel", .linkage = linkage });
 }
lib/compiler_rt.zig
@@ -1,8 +1,14 @@
+const builtin = @import("builtin");
+
 pub const panic = @import("compiler_rt/common.zig").panic;
 
 comptime {
     _ = @import("compiler_rt/atomics.zig");
-    _ = @import("compiler_rt/lse_atomics.zig");
+
+    // macOS has these functions inside libSystem.
+    if (builtin.cpu.arch.isAARCH64() and !builtin.os.tag.isDarwin()) {
+        _ = @import("compiler_rt/lse_atomics.zig");
+    }
 
     _ = @import("compiler_rt/addf3.zig");
     _ = @import("compiler_rt/addhf3.zig");
tools/gen_outline_atomics.zig
@@ -11,217 +11,97 @@ const AtomicOp = enum {
 };
 
 pub fn main() !void {
-    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
-    defer std.debug.assert(!gpa.deinit());
-    var allocator = gpa.allocator();
+    var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena_instance.deinit();
+    const arena = arena_instance.allocator();
 
-    const args = try std.process.argsAlloc(allocator);
-    defer std.process.argsFree(allocator, args);
-    if (args.len != 2) {
-        usageAndExit(std.io.getStdErr(), args[0], 1);
-    }
+    //const args = try std.process.argsAlloc(arena);
 
-    var file = try std.fs.cwd().createFile(args[1], .{ .truncate = true });
+    var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
+    const w = bw.writer();
 
-    try file.writeAll(
-        \\const std = @import("std");
+    try w.writeAll(
+        \\//! This file is generated by tools/gen_outline_atomics.zig.
         \\const builtin = @import("builtin");
-        \\const arch = builtin.cpu.arch;
-        \\const is_test = builtin.is_test;
-        \\const target = std.Target;
-        \\const os_tag = builtin.os.tag;
-        \\const is_darwin = target.Os.Tag.isDarwin(os_tag);
-        \\const has_lse = target.aarch64.featureSetHas(builtin.target.cpu.features, .lse);
-        \\const linkage = if (is_test)
-        \\    std.builtin.GlobalLinkage.Internal
-        \\else
-        \\    std.builtin.GlobalLinkage.Strong;
+        \\const std = @import("std");
+        \\const linkage = @import("./common.zig").linkage;
+        \\const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse);
+        \\
+        \\/// This default is overridden at runtime after inspecting CPU properties.
+        \\/// It is intentionally not exported in order to make the machine code that
+        \\/// uses it a statically predicted direct branch rather than using the PLT,
+        \\/// which ARM is concerned would have too much overhead.
+        \\var __aarch64_have_lse_atomics: u8 = @boolToInt(always_has_lse);
         \\
         \\
     );
 
+    var footer = std.ArrayList(u8).init(arena);
+    try footer.appendSlice("\ncomptime {\n");
+
     for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| {
         for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| {
-            for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |pat| {
-                if (pat == .cas or n != .sixteen) {
-                    for ([_]bool{ true, false }) |darwin| {
-                        for ([_]bool{ true, false }) |lse| {
-                            const darwin_name = if (darwin) "Darwin" else "Nondarwin";
-                            const lse_name = if (lse) "Lse" else "Nolse";
-                            var buf: [100:0]u8 = undefined;
-                            const name = try std.fmt.bufPrintZ(&buf, "{s}{s}{s}{s}{s}", .{ @tagName(pat), n.toBytes(), order.capName(), darwin_name, lse_name });
-                            const body = switch (pat) {
-                                .cas => try generateCas(&allocator, n, order, lse),
-                                .swp => try generateSwp(&allocator, n, order, lse),
-                                .ldadd => try generateLd(&allocator, n, order, .ldadd, lse),
-                                .ldclr => try generateLd(&allocator, n, order, .ldclr, lse),
-                                .ldeor => try generateLd(&allocator, n, order, .ldeor, lse),
-                                .ldset => try generateLd(&allocator, n, order, .ldset, lse),
-                            };
-                            defer allocator.destroy(body.ptr);
-                            try writeFunction(&file, name, pat, n, body);
-                        }
-                    }
-                    try writeExport(&file, @tagName(pat), n.toBytes(), order);
-                }
+            for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| {
+                if (n == .sixteen and op != .cas) continue;
+
+                const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{
+                    @tagName(op), n.toBytes(), @tagName(order),
+                });
+                try writeFunction(arena, w, name, op, n, order);
+                try footer.writer().print("    @export({s}, .{{ .name = \"{s}\", .linkage = linkage }});\n", .{
+                    name, name,
+                });
             }
         }
     }
 
-    try file.writeAll(
-        \\//TODO: Add linksection once implemented and remove init at writeFunction
-        \\fn __init_aarch64_have_lse_atomics() callconv(.C) void {
-        \\    const AT_HWCAP = 16;
-        \\    const HWCAP_ATOMICS = 1 << 8;
-        \\    const hwcap = std.os.linux.getauxval(AT_HWCAP);
-        \\    __aarch64_have_lse_atomics = @boolToInt((hwcap & HWCAP_ATOMICS) != 0);
-        \\}
-        \\
-        \\var __aarch64_have_lse_atomics: u8 = @boolToInt(has_lse);
-        \\
-        \\comptime {
-        \\    if (arch.isAARCH64()) {
-        \\        @export(__aarch64_cas1_relax, .{ .name = "__aarch64_cas1_relax", .linkage = linkage });
-        \\        @export(__aarch64_cas1_acq, .{ .name = "__aarch64_cas1_acq", .linkage = linkage });
-        \\        @export(__aarch64_cas1_rel, .{ .name = "__aarch64_cas1_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas1_acq_rel, .{ .name = "__aarch64_cas1_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas2_relax, .{ .name = "__aarch64_cas2_relax", .linkage = linkage });
-        \\        @export(__aarch64_cas2_acq, .{ .name = "__aarch64_cas2_acq", .linkage = linkage });
-        \\        @export(__aarch64_cas2_rel, .{ .name = "__aarch64_cas2_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas2_acq_rel, .{ .name = "__aarch64_cas2_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas4_relax, .{ .name = "__aarch64_cas4_relax", .linkage = linkage });
-        \\        @export(__aarch64_cas4_acq, .{ .name = "__aarch64_cas4_acq", .linkage = linkage });
-        \\        @export(__aarch64_cas4_rel, .{ .name = "__aarch64_cas4_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas4_acq_rel, .{ .name = "__aarch64_cas4_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas8_relax, .{ .name = "__aarch64_cas8_relax", .linkage = linkage });
-        \\        @export(__aarch64_cas8_acq, .{ .name = "__aarch64_cas8_acq", .linkage = linkage });
-        \\        @export(__aarch64_cas8_rel, .{ .name = "__aarch64_cas8_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas8_acq_rel, .{ .name = "__aarch64_cas8_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas16_relax, .{ .name = "__aarch64_cas16_relax", .linkage = linkage });
-        \\        @export(__aarch64_cas16_acq, .{ .name = "__aarch64_cas16_acq", .linkage = linkage });
-        \\        @export(__aarch64_cas16_rel, .{ .name = "__aarch64_cas16_rel", .linkage = linkage });
-        \\        @export(__aarch64_cas16_acq_rel, .{ .name = "__aarch64_cas16_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp1_relax, .{ .name = "__aarch64_swp1_relax", .linkage = linkage });
-        \\        @export(__aarch64_swp1_acq, .{ .name = "__aarch64_swp1_acq", .linkage = linkage });
-        \\        @export(__aarch64_swp1_rel, .{ .name = "__aarch64_swp1_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp1_acq_rel, .{ .name = "__aarch64_swp1_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp2_relax, .{ .name = "__aarch64_swp2_relax", .linkage = linkage });
-        \\        @export(__aarch64_swp2_acq, .{ .name = "__aarch64_swp2_acq", .linkage = linkage });
-        \\        @export(__aarch64_swp2_rel, .{ .name = "__aarch64_swp2_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp2_acq_rel, .{ .name = "__aarch64_swp2_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp4_relax, .{ .name = "__aarch64_swp4_relax", .linkage = linkage });
-        \\        @export(__aarch64_swp4_acq, .{ .name = "__aarch64_swp4_acq", .linkage = linkage });
-        \\        @export(__aarch64_swp4_rel, .{ .name = "__aarch64_swp4_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp4_acq_rel, .{ .name = "__aarch64_swp4_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp8_relax, .{ .name = "__aarch64_swp8_relax", .linkage = linkage });
-        \\        @export(__aarch64_swp8_acq, .{ .name = "__aarch64_swp8_acq", .linkage = linkage });
-        \\        @export(__aarch64_swp8_rel, .{ .name = "__aarch64_swp8_rel", .linkage = linkage });
-        \\        @export(__aarch64_swp8_acq_rel, .{ .name = "__aarch64_swp8_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd1_relax, .{ .name = "__aarch64_ldadd1_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldadd1_acq, .{ .name = "__aarch64_ldadd1_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldadd1_rel, .{ .name = "__aarch64_ldadd1_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd1_acq_rel, .{ .name = "__aarch64_ldadd1_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd2_relax, .{ .name = "__aarch64_ldadd2_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldadd2_acq, .{ .name = "__aarch64_ldadd2_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldadd2_rel, .{ .name = "__aarch64_ldadd2_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd2_acq_rel, .{ .name = "__aarch64_ldadd2_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd4_relax, .{ .name = "__aarch64_ldadd4_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldadd4_acq, .{ .name = "__aarch64_ldadd4_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldadd4_rel, .{ .name = "__aarch64_ldadd4_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd4_acq_rel, .{ .name = "__aarch64_ldadd4_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd8_relax, .{ .name = "__aarch64_ldadd8_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldadd8_acq, .{ .name = "__aarch64_ldadd8_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldadd8_rel, .{ .name = "__aarch64_ldadd8_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldadd8_acq_rel, .{ .name = "__aarch64_ldadd8_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr1_relax, .{ .name = "__aarch64_ldclr1_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldclr1_acq, .{ .name = "__aarch64_ldclr1_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldclr1_rel, .{ .name = "__aarch64_ldclr1_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr1_acq_rel, .{ .name = "__aarch64_ldclr1_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr2_relax, .{ .name = "__aarch64_ldclr2_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldclr2_acq, .{ .name = "__aarch64_ldclr2_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldclr2_rel, .{ .name = "__aarch64_ldclr2_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr2_acq_rel, .{ .name = "__aarch64_ldclr2_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr4_relax, .{ .name = "__aarch64_ldclr4_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldclr4_acq, .{ .name = "__aarch64_ldclr4_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldclr4_rel, .{ .name = "__aarch64_ldclr4_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr4_acq_rel, .{ .name = "__aarch64_ldclr4_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr8_relax, .{ .name = "__aarch64_ldclr8_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldclr8_acq, .{ .name = "__aarch64_ldclr8_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldclr8_rel, .{ .name = "__aarch64_ldclr8_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldclr8_acq_rel, .{ .name = "__aarch64_ldclr8_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor1_relax, .{ .name = "__aarch64_ldeor1_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldeor1_acq, .{ .name = "__aarch64_ldeor1_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldeor1_rel, .{ .name = "__aarch64_ldeor1_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor1_acq_rel, .{ .name = "__aarch64_ldeor1_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor2_relax, .{ .name = "__aarch64_ldeor2_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldeor2_acq, .{ .name = "__aarch64_ldeor2_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldeor2_rel, .{ .name = "__aarch64_ldeor2_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor2_acq_rel, .{ .name = "__aarch64_ldeor2_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor4_relax, .{ .name = "__aarch64_ldeor4_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldeor4_acq, .{ .name = "__aarch64_ldeor4_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldeor4_rel, .{ .name = "__aarch64_ldeor4_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor4_acq_rel, .{ .name = "__aarch64_ldeor4_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor8_relax, .{ .name = "__aarch64_ldeor8_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldeor8_acq, .{ .name = "__aarch64_ldeor8_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldeor8_rel, .{ .name = "__aarch64_ldeor8_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldeor8_acq_rel, .{ .name = "__aarch64_ldeor8_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset1_relax, .{ .name = "__aarch64_ldset1_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldset1_acq, .{ .name = "__aarch64_ldset1_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldset1_rel, .{ .name = "__aarch64_ldset1_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset1_acq_rel, .{ .name = "__aarch64_ldset1_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset2_relax, .{ .name = "__aarch64_ldset2_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldset2_acq, .{ .name = "__aarch64_ldset2_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldset2_rel, .{ .name = "__aarch64_ldset2_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset2_acq_rel, .{ .name = "__aarch64_ldset2_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset4_relax, .{ .name = "__aarch64_ldset4_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldset4_acq, .{ .name = "__aarch64_ldset4_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldset4_rel, .{ .name = "__aarch64_ldset4_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset4_acq_rel, .{ .name = "__aarch64_ldset4_acq_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset8_relax, .{ .name = "__aarch64_ldset8_relax", .linkage = linkage });
-        \\        @export(__aarch64_ldset8_acq, .{ .name = "__aarch64_ldset8_acq", .linkage = linkage });
-        \\        @export(__aarch64_ldset8_rel, .{ .name = "__aarch64_ldset8_rel", .linkage = linkage });
-        \\        @export(__aarch64_ldset8_acq_rel, .{ .name = "__aarch64_ldset8_acq_rel", .linkage = linkage });
-        \\    }
-        \\}
-        \\
-    );
+    try w.writeAll(footer.items);
+    try w.writeAll("}\n");
+    try bw.flush();
 }
 
-fn usageAndExit(file: std.fs.File, arg0: []const u8, code: u8) noreturn {
-    file.writer().print(
-        \\Usage: {s} /path/to/lib/compiler_rt/lse_atomics.zig
-        \\
-        \\Generates outline atomics for compiler-rt.
-        \\
-    , .{arg0}) catch std.process.exit(1);
-    std.process.exit(code);
-}
-
-fn writeFunction(file: *std.fs.File, name: [:0]const u8, op: AtomicOp, n: N, body: [:0]const u8) !void {
-    var fn_buf: [100:0]u8 = undefined;
+fn writeFunction(
+    arena: Allocator,
+    w: anytype,
+    name: []const u8,
+    op: AtomicOp,
+    n: N,
+    order: Ordering,
+) !void {
+    const body = switch (op) {
+        .cas => try generateCas(arena, n, order),
+        .swp => try generateSwp(arena, n, order),
+        .ldadd => try generateLd(arena, n, order, .ldadd),
+        .ldclr => try generateLd(arena, n, order, .ldclr),
+        .ldeor => try generateLd(arena, n, order, .ldeor),
+        .ldset => try generateLd(arena, n, order, .ldset),
+    };
     const fn_sig = if (op != .cas)
-        try std.fmt.bufPrintZ(&fn_buf, "fn {[name]s}(val: u{[n]s}, ptr: *u{[n]s}) callconv(.C) u{[n]s} {{", .{ .name = name, .n = n.toBits() })
+        try std.fmt.allocPrint(arena, "fn {[name]s}(val: u{[n]d}, ptr: *u{[n]d}) callconv(.C) u{[n]d} {{", .{
+            .name = name,
+            .n = n.toBits(),
+        })
     else
-        try std.fmt.bufPrintZ(&fn_buf, "fn {[name]s}(expected: u{[n]s}, desired: u{[n]s}, ptr: *u{[n]s}) callconv(.C) u{[n]s} {{", .{ .name = name, .n = n.toBits() });
-    try file.writeAll(fn_sig);
-    try file.writeAll(
+        try std.fmt.allocPrint(arena, "fn {[name]s}(expected: u{[n]d}, desired: u{[n]d}, ptr: *u{[n]d}) callconv(.C) u{[n]d} {{", .{
+            .name = name,
+            .n = n.toBits(),
+        });
+    try w.writeAll(fn_sig);
+    try w.writeAll(
         \\
         \\    @setRuntimeSafety(false);
-        \\    __init_aarch64_have_lse_atomics();
-        \\
         \\    return asm volatile (
         \\
     );
     var iter = std.mem.split(u8, body, "\n");
     while (iter.next()) |line| {
-        try file.writeAll("        \\\\");
-        try file.writeAll(line);
-        try file.writeAll("\n");
+        try w.writeAll("        \\\\");
+        try w.writeAll(line);
+        try w.writeAll("\n");
     }
-    var constraint_buf: [500:0]u8 = undefined;
     const constraints = if (op != .cas)
-        try std.fmt.bufPrintZ(&constraint_buf,
-            \\        : [ret] "={{{[reg]s}0}}" (-> u{[ty]s}),
+        try std.fmt.allocPrint(arena,
+            \\        : [ret] "={{{[reg]s}0}}" (-> u{[ty]d}),
             \\        : [val] "{{{[reg]s}0}}" (val),
             \\          [ptr] "{{x1}}" (ptr),
             \\          [__aarch64_have_lse_atomics] "{{w16}}" (__aarch64_have_lse_atomics),
@@ -229,8 +109,8 @@ fn writeFunction(file: *std.fs.File, name: [:0]const u8, op: AtomicOp, n: N, bod
             \\
         , .{ .reg = n.register(), .ty = n.toBits() })
     else
-        try std.fmt.bufPrintZ(&constraint_buf,
-            \\        : [ret] "={{{[reg]s}0}}" (-> u{[ty]s}),
+        try std.fmt.allocPrint(arena,
+            \\        : [ret] "={{{[reg]s}0}}" (-> u{[ty]d}),
             \\        : [expected] "{{{[reg]s}0}}" (expected),
             \\          [desired] "{{{[reg]s}1}}" (desired),
             \\          [ptr] "{{x2}}" (ptr),
@@ -239,55 +119,12 @@ fn writeFunction(file: *std.fs.File, name: [:0]const u8, op: AtomicOp, n: N, bod
             \\
         , .{ .reg = n.register(), .ty = n.toBits() });
 
-    try file.writeAll(constraints);
-    try file.writeAll(
+    try w.writeAll(constraints);
+    try w.writeAll(
         \\    );
         \\
     );
-    try file.writeAll("}\n");
-}
-
-fn writeExport(file: *std.fs.File, pat: [:0]const u8, n: [:0]const u8, order: Ordering) !void {
-    var darwin_lse_buf: [100:0]u8 = undefined;
-    var darwin_nolse_buf: [100:0]u8 = undefined;
-    var nodarwin_lse_buf: [100:0]u8 = undefined;
-    var nodarwin_nolse_buf: [100:0]u8 = undefined;
-    var name_buf: [100:0]u8 = undefined;
-    const darwin_lse = try std.fmt.bufPrintZ(&darwin_lse_buf, "{s}{s}{s}DarwinLse", .{ pat, n, order.capName() });
-    const darwin_nolse = try std.fmt.bufPrintZ(&darwin_nolse_buf, "{s}{s}{s}DarwinNolse", .{ pat, n, order.capName() });
-    const nodarwin_lse = try std.fmt.bufPrintZ(&nodarwin_lse_buf, "{s}{s}{s}NondarwinLse", .{ pat, n, order.capName() });
-    const nodarwin_nolse = try std.fmt.bufPrintZ(&nodarwin_nolse_buf, "{s}{s}{s}NondarwinNolse", .{ pat, n, order.capName() });
-    const name = try std.fmt.bufPrintZ(&name_buf, "__aarch64_{s}{s}_{s}", .{ pat, n, @tagName(order) });
-    try file.writeAll("const ");
-    try file.writeAll(name);
-    try file.writeAll(
-        \\ = if (is_darwin)
-        \\    if (has_lse)
-        \\        
-    );
-    try file.writeAll(darwin_lse);
-    try file.writeAll(
-        \\
-        \\    else
-        \\        
-    );
-    try file.writeAll(darwin_nolse);
-    try file.writeAll(
-        \\
-        \\else if (has_lse)
-        \\    
-    );
-    try file.writeAll(nodarwin_lse);
-    try file.writeAll(
-        \\
-        \\else
-        \\    
-    );
-    try file.writeAll(nodarwin_nolse);
-    try file.writeAll(
-        \\;
-        \\
-    );
+    try w.writeAll("}\n");
 }
 
 const N = enum(u8) {
@@ -298,22 +135,23 @@ const N = enum(u8) {
     sixteen = 16,
 
     const Defines = struct {
-        s: [:0]const u8,
-        uxt: [:0]const u8,
-        b: [:0]const u8,
+        s: []const u8,
+        uxt: []const u8,
+        b: []const u8,
     };
-    fn defines(self: @This()) Defines {
-        const s = switch (self) {
+
+    fn defines(n: N) Defines {
+        const s = switch (n) {
             .one => "b",
             .two => "h",
             else => "",
         };
-        const uxt = switch (self) {
+        const uxt = switch (n) {
             .one => "uxtb",
             .two => "uxth",
             .four, .eight, .sixteen => "mov",
         };
-        const b = switch (self) {
+        const b = switch (n) {
             .one => "0x00000000",
             .two => "0x40000000",
             .four => "0x80000000",
@@ -327,28 +165,16 @@ const N = enum(u8) {
         };
     }
 
-    fn register(self: @This()) [:0]const u8 {
-        return if (@enumToInt(self) < 8) "w" else "x";
+    fn register(n: N) []const u8 {
+        return if (@enumToInt(n) < 8) "w" else "x";
     }
 
-    fn toBytes(self: @This()) [:0]const u8 {
-        return switch (self) {
-            .one => "1",
-            .two => "2",
-            .four => "4",
-            .eight => "8",
-            .sixteen => "16",
-        };
+    fn toBytes(n: N) u8 {
+        return @enumToInt(n);
     }
 
-    fn toBits(self: @This()) [:0]const u8 {
-        return switch (self) {
-            .one => "8",
-            .two => "16",
-            .four => "32",
-            .eight => "64",
-            .sixteen => "128",
-        };
+    fn toBits(n: N) u8 {
+        return n.toBytes() * 8;
     }
 };
 
@@ -411,31 +237,18 @@ const Ordering = enum {
 
 const LdName = enum { ldadd, ldclr, ldeor, ldset };
 
-fn generateCas(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u8 {
+fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 {
     const s_def = n.defines();
     const o_def = order.defines();
-    var cas_buf = try alloc.create([200:0]u8);
-    var ldxr_buf = try alloc.create([200:0]u8);
-    var stxr_buf = try alloc.create([200:0]u8);
-    defer alloc.destroy(cas_buf);
-    defer alloc.destroy(ldxr_buf);
-    defer alloc.destroy(stxr_buf);
-    var instr_buf = try alloc.create([1000:0]u8);
-    errdefer alloc.destroy(instr_buf);
 
     const reg = n.register();
 
     if (@enumToInt(n) < 16) {
-        const cas = if (lse) blk: {
-            break :blk try std.fmt.bufPrintZ(cas_buf,
-                \\cas{[a]s}{[l]s}{[s]s} {[reg]s}0, {[reg]s}1, [x2]
-                \\
-            , .{ .a = o_def.a, .l = o_def.l, .s = s_def.s, .reg = reg });
-        } else try std.fmt.bufPrintZ(cas_buf, ".inst 0x08a07c41 + {s} + {s}\n", .{ s_def.b, o_def.m });
-        const ldxr = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xr{s}", .{ o_def.a, s_def.s });
-        const stxr = try std.fmt.bufPrintZ(stxr_buf, "st{s}xr{s}", .{ o_def.l, s_def.s });
+        const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}\n", .{ s_def.b, o_def.m });
+        const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s });
+        const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s });
 
-        return try std.fmt.bufPrintZ(instr_buf,
+        return try std.fmt.allocPrint(arena,
             \\        cbz     w16, 8f
             \\        {[cas]s}
             \\        cbz     wzr, 1f
@@ -456,15 +269,11 @@ fn generateCas(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u
             .reg = reg,
         });
     } else {
-        const casp = if (lse)
-            try std.fmt.bufPrintZ(cas_buf, "casp{s}{s}  x0, x1, x2, x3, [x4]\n", .{ o_def.a, o_def.l })
-        else
-            try std.fmt.bufPrintZ(cas_buf, ".inst 0x48207c82 + {s}\n", .{o_def.m});
-
-        const ldxp = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xp", .{o_def.a});
-        const stxp = try std.fmt.bufPrintZ(stxr_buf, "st{s}xp", .{o_def.l});
+        const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}\n", .{o_def.m});
+        const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a});
+        const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l});
 
-        return try std.fmt.bufPrintZ(instr_buf,
+        return try std.fmt.allocPrint(arena,
             \\        cbz     w16, 8f
             \\        {[casp]s}
             \\        cbz     wzr, 1f
@@ -487,53 +296,35 @@ fn generateCas(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u
     }
 }
 
-fn generateSwp(alloc: *Allocator, n: N, order: Ordering, lse: bool) ![:0]const u8 {
+fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 {
     const s_def = n.defines();
     const o_def = order.defines();
-
-    var swp_buf = try alloc.create([200:0]u8);
-    var ldxr_buf = try alloc.create([200:0]u8);
-    var stxr_buf = try alloc.create([200:0]u8);
-    defer alloc.destroy(swp_buf);
-    defer alloc.destroy(ldxr_buf);
-    defer alloc.destroy(stxr_buf);
-
     const reg = n.register();
 
-    const swp = if (lse) blk: {
-        break :blk try std.fmt.bufPrintZ(swp_buf,
-            \\swp{[a]s}{[l]s}{[s]s}  {[reg]s}0, {[reg]s}0, [x1]
-        , .{ .a = o_def.a, .l = o_def.l, .s = s_def.s, .reg = reg });
-    } else std.fmt.bufPrintZ(swp_buf, ".inst 0x38208020 + {s} + {s}", .{ .b = s_def.b, .n = o_def.n });
-
-    const ldxr = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xr{s}", .{ o_def.a, s_def.s });
-    const stxr = try std.fmt.bufPrintZ(stxr_buf, "st{s}xr{s}", .{ o_def.l, s_def.s });
-
-    var instr_buf = try alloc.create([1000:0]u8);
-    errdefer alloc.destroy(instr_buf);
-    return try std.fmt.bufPrintZ(instr_buf,
+    return try std.fmt.allocPrint(arena,
         \\        cbz     w16, 8f
-        \\        {[swp]s}
+        \\        .inst 0x38208020 + {[b]s} + {[n]s}
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    {[reg]s}16, {[reg]s}0
         \\0:
-        \\        {[ldxr]s}   {[reg]s}0, [x1]
-        \\        {[stxr]s}   w17, {[reg]s}16, [x1]
+        \\        ld{[a]s}xr{[s]s}   {[reg]s}0, [x1]
+        \\        st{[l]s}xr{[s]s}   w17, {[reg]s}16, [x1]
         \\        cbnz   w17, 0b
         \\1:
     , .{
-        .swp = swp,
-        .ldxr = ldxr,
-        .stxr = stxr,
+        .b = s_def.b,
+        .n = o_def.n,
         .reg = reg,
+        .s = s_def.s,
+        .a = o_def.a,
+        .l = o_def.l,
     });
 }
 
-fn generateLd(alloc: *Allocator, n: N, order: Ordering, ld: LdName, lse: bool) ![:0]const u8 {
+fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 {
     const s_def = n.defines();
     const o_def = order.defines();
-    const ldname = @tagName(ld);
     const op = switch (ld) {
         .ldadd => "add",
         .ldclr => "bic",
@@ -547,47 +338,27 @@ fn generateLd(alloc: *Allocator, n: N, order: Ordering, ld: LdName, lse: bool) !
         .ldset => "0x3000",
     };
 
-    var swp_buf = try alloc.create([200:0]u8);
-    var ldop_buf = try alloc.create([200:0]u8);
-    var ldxr_buf = try alloc.create([200:0]u8);
-    var stxr_buf = try alloc.create([200:0]u8);
-    defer alloc.destroy(swp_buf);
-    defer alloc.destroy(ldop_buf);
-    defer alloc.destroy(ldxr_buf);
-    defer alloc.destroy(stxr_buf);
-
     const reg = n.register();
 
-    const ldop = if (lse)
-        std.fmt.bufPrintZ(ldop_buf,
-            \\{[ldnm]s}{[a]s}{[l]s}{[s]s} {[reg]s}0, {[reg]s}0, [x1]
-        , .{ .ldnm = ldname, .a = o_def.a, .l = o_def.l, .s = s_def.s, .reg = reg })
-    else
-        std.fmt.bufPrintZ(ldop_buf,
-            \\.inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
-        , .{ .op_n = op_n, .b = s_def.b, .n = o_def.n });
-
-    const ldxr = try std.fmt.bufPrintZ(ldxr_buf, "ld{s}xr{s}", .{ o_def.a, s_def.s });
-    const stxr = try std.fmt.bufPrintZ(stxr_buf, "st{s}xr{s}", .{ o_def.l, s_def.s });
-
-    var instr_buf = try alloc.create([1000:0]u8);
-    errdefer alloc.destroy(instr_buf);
-    return try std.fmt.bufPrintZ(instr_buf,
+    return try std.fmt.allocPrint(arena,
         \\        cbz     w16, 8f
-        \\        {[ldop]s}
+        \\        .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s}
         \\        cbz     wzr, 1f
         \\8:
         \\        mov    {[reg]s}16, {[reg]s}0
         \\0:
-        \\        {[ldxr]s}   {[reg]s}0, [x1]
+        \\        ld{[a]s}xr{[s]s}   {[reg]s}0, [x1]
         \\        {[op]s}     {[reg]s}17, {[reg]s}0, {[reg]s}16
-        \\        {[stxr]s}   w15, {[reg]s}17, [x1]
+        \\        st{[l]s}xr{[s]s}   w15, {[reg]s}17, [x1]
         \\        cbnz   w15, 0b
         \\1:
     , .{
-        .ldop = ldop,
-        .ldxr = ldxr,
-        .stxr = stxr,
+        .op_n = op_n,
+        .b = s_def.b,
+        .n = o_def.n,
+        .s = s_def.s,
+        .a = o_def.a,
+        .l = o_def.l,
         .op = op,
         .reg = reg,
     });