Commit 70c85b1bf1
Changed files (103)
lib
include
cuda_wrappers
openmp_wrappers
ppc_wrappers
lib/include/cuda_wrappers/bits/basic_string.h
@@ -0,0 +1,9 @@
+// CUDA headers define __noinline__ which interferes with libstdc++'s use of
+// `__attribute((__noinline__))`. In order to avoid compilation error,
+// temporarily unset __noinline__ when we include affected libstdc++ header.
+
+#pragma push_macro("__noinline__")
+#undef __noinline__
+#include_next "bits/basic_string.h"
+
+#pragma pop_macro("__noinline__")
lib/include/cuda_wrappers/bits/basic_string.tcc
@@ -0,0 +1,9 @@
+// CUDA headers define __noinline__ which interferes with libstdc++'s use of
+// `__attribute((__noinline__))`. In order to avoid compilation error,
+// temporarily unset __noinline__ when we include affected libstdc++ header.
+
+#pragma push_macro("__noinline__")
+#undef __noinline__
+#include_next "bits/basic_string.tcc"
+
+#pragma pop_macro("__noinline__")
lib/include/llvm_libc_wrappers/assert.h
@@ -0,0 +1,34 @@
+//===-- Wrapper for C standard assert.h declarations on the GPU ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
+#define __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
+
+#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
+#error "This file is for GPU offloading compilation only"
+#endif
+
+#include_next <assert.h>
+
+#if __has_include(<llvm-libc-decls/assert.h>)
+
+#if defined(__HIP__) || defined(__CUDA__)
+#define __LIBC_ATTRS __attribute__((device))
+#endif
+
+#pragma omp begin declare target
+
+#include <llvm-libc-decls/assert.h>
+
+#pragma omp end declare target
+
+#undef __LIBC_ATTRS
+
+#endif
+
+#endif // __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
lib/include/llvm_libc_wrappers/ctype.h
@@ -13,8 +13,19 @@
#error "This file is for GPU offloading compilation only"
#endif
+// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is
+// necessary to prevent it from doing that and remapping our implementation.
+#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
+#pragma push_macro("__USE_EXTERN_INLINES")
+#undef __USE_EXTERN_INLINES
+#endif
+
#include_next <ctype.h>
+#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
+#pragma pop_macro("__USE_EXTERN_INLINES")
+#endif
+
#if __has_include(<llvm-libc-decls/ctype.h>)
#if defined(__HIP__) || defined(__CUDA__)
@@ -26,6 +37,7 @@
#pragma push_macro("isalnum")
#pragma push_macro("isalpha")
+#pragma push_macro("isascii")
#pragma push_macro("isblank")
#pragma push_macro("iscntrl")
#pragma push_macro("isdigit")
@@ -36,11 +48,13 @@
#pragma push_macro("isspace")
#pragma push_macro("isupper")
#pragma push_macro("isxdigit")
+#pragma push_macro("toascii")
#pragma push_macro("tolower")
#pragma push_macro("toupper")
#undef isalnum
#undef isalpha
+#undef isascii
#undef iscntrl
#undef isdigit
#undef islower
@@ -51,6 +65,7 @@
#undef isupper
#undef isblank
#undef isxdigit
+#undef toascii
#undef tolower
#undef toupper
@@ -64,6 +79,7 @@
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
#pragma pop_macro("isalnum")
#pragma pop_macro("isalpha")
+#pragma pop_macro("isascii")
#pragma pop_macro("isblank")
#pragma pop_macro("iscntrl")
#pragma pop_macro("isdigit")
@@ -74,6 +90,7 @@
#pragma pop_macro("isspace")
#pragma pop_macro("isupper")
#pragma pop_macro("isxdigit")
+#pragma pop_macro("toascii")
#pragma pop_macro("tolower")
#pragma pop_macro("toupper")
#endif
lib/include/llvm_libc_wrappers/stdio.h
@@ -6,21 +6,58 @@
//
//===----------------------------------------------------------------------===//
-#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
-#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
-
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
#error "This file is for GPU offloading compilation only"
#endif
#include_next <stdio.h>
+// In some old versions of glibc, other standard headers sometimes define
+// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h
+// to produce special definitions. Future includes of stdio.h when those
+// special macros are undefined are expected to produce the normal definitions
+// from stdio.h.
+//
+// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__)
+// unconditionally to the above include_next. Otherwise, after an occurrence of
+// the first glibc stdio.h use case described above, the include_next would be
+// skipped for remaining includes of stdio.h, leaving required symbols
+// undefined.
+//
+// We make the following assumptions to handle all use cases:
+//
+// 1. If the above include_next produces special glibc definitions, then (a) it
+// does not produce the normal definitions that we must intercept below, (b)
+// the current file was included from a glibc header that already defined
+// __GLIBC__ (usually by including glibc's <features.h>), and (c) the above
+// include_next does not define _STDIO_H. In that case, we skip the rest of
+// the current file and don't guard against future includes.
+// 2. If the above include_next produces the normal stdio.h definitions, then
+// either (a) __GLIBC__ is not defined because C headers are from some other
+// libc implementation or (b) the above include_next defines _STDIO_H to
+// prevent the above include_next from having any effect in the future.
+#if !defined(__GLIBC__) || defined(_STDIO_H)
+
+#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
+#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
+
#if __has_include(<llvm-libc-decls/stdio.h>)
#if defined(__HIP__) || defined(__CUDA__)
#define __LIBC_ATTRS __attribute__((device))
#endif
+// Some headers provide these as macros. Temporarily undefine them so they do
+// not conflict with any definitions for the GPU.
+
+#pragma push_macro("stdout")
+#pragma push_macro("stdin")
+#pragma push_macro("stderr")
+
+#undef stdout
+#undef stderr
+#undef stdin
+
#pragma omp begin declare target
#include <llvm-libc-decls/stdio.h>
@@ -29,6 +66,15 @@
#undef __LIBC_ATTRS
+// Restore the original macros when compiling on the host.
+#if !defined(__NVPTX__) && !defined(__AMDGPU__)
+#pragma pop_macro("stdout")
+#pragma pop_macro("stderr")
+#pragma pop_macro("stdin")
+#endif
+
#endif
#endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
+
+#endif
lib/include/llvm_libc_wrappers/stdlib.h
@@ -23,8 +23,11 @@
#pragma omp begin declare target
-// The LLVM C library uses this type so we forward declare it.
+// The LLVM C library uses these named types so we forward declare them.
typedef void (*__atexithandler_t)(void);
+typedef int (*__bsearchcompare_t)(const void *, const void *);
+typedef int (*__qsortcompare_t)(const void *, const void *);
+typedef int (*__qsortrcompare_t)(const void *, const void *, void *);
// Enforce ABI compatibility with the structs used by the LLVM C library.
_Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!");
lib/include/llvm_libc_wrappers/string.h
@@ -13,9 +13,6 @@
#error "This file is for GPU offloading compilation only"
#endif
-// FIXME: The GNU headers provide C++ standard compliant headers when in C++
-// mode and the LLVM libc does not. We cannot enable memchr, strchr, strchrnul,
-// strpbrk, strrchr, strstr, or strcasestr until this is addressed.
#include_next <string.h>
#if __has_include(<llvm-libc-decls/string.h>)
@@ -26,8 +23,70 @@
#pragma omp begin declare target
+// The GNU headers provide C++ standard compliant headers when in C++ mode and
+// the LLVM libc does not. We need to manually provide the definitions using the
+// same prototypes.
+#if defined(__cplusplus) && defined(__GLIBC__) && \
+ defined(__CORRECT_ISO_CPP_STRING_H_PROTO)
+
+#ifndef __LIBC_ATTRS
+#define __LIBC_ATTRS
+#endif
+
+extern "C" {
+void *memccpy(void *__restrict, const void *__restrict, int,
+ size_t) __LIBC_ATTRS;
+int memcmp(const void *, const void *, size_t) __LIBC_ATTRS;
+void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
+void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS;
+void *memmove(void *, const void *, size_t) __LIBC_ATTRS;
+void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
+void *memset(void *, int, size_t) __LIBC_ATTRS;
+char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
+char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
+char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS;
+int strcmp(const char *, const char *) __LIBC_ATTRS;
+int strcoll(const char *, const char *) __LIBC_ATTRS;
+char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
+size_t strcspn(const char *, const char *) __LIBC_ATTRS;
+char *strdup(const char *) __LIBC_ATTRS;
+size_t strlen(const char *) __LIBC_ATTRS;
+char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
+int strncmp(const char *, const char *, size_t) __LIBC_ATTRS;
+char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
+char *strndup(const char *, size_t) __LIBC_ATTRS;
+size_t strnlen(const char *, size_t) __LIBC_ATTRS;
+size_t strspn(const char *, const char *) __LIBC_ATTRS;
+char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS;
+char *strtok_r(char *__restrict, const char *__restrict,
+ char **__restrict) __LIBC_ATTRS;
+size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
+}
+
+extern "C++" {
+char *strstr(char *, const char *) noexcept __LIBC_ATTRS;
+const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS;
+char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS;
+const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS;
+char *strrchr(char *, int) noexcept __LIBC_ATTRS;
+const char *strrchr(const char *, int) noexcept __LIBC_ATTRS;
+char *strchr(char *, int) noexcept __LIBC_ATTRS;
+const char *strchr(const char *, int) noexcept __LIBC_ATTRS;
+char *strchrnul(char *, int) noexcept __LIBC_ATTRS;
+const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS;
+char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS;
+const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS;
+void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
+const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
+void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
+const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
+}
+
+#else
#include <llvm-libc-decls/string.h>
+#endif
+
#pragma omp end declare target
#undef __LIBC_ATTRS
lib/include/llvm_libc_wrappers/time.h
@@ -0,0 +1,34 @@
+//===-- Wrapper for C standard time.h declarations on the GPU -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
+#define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
+
+#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
+#error "This file is for GPU offloading compilation only"
+#endif
+
+#include_next <time.h>
+
+#if __has_include(<llvm-libc-decls/time.h>)
+
+#if defined(__HIP__) || defined(__CUDA__)
+#define __LIBC_ATTRS __attribute__((device))
+#endif
+
+#pragma omp begin declare target
+
+_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!");
+
+#include <llvm-libc-decls/time.h>
+
+#pragma omp end declare target
+
+#endif
+
+#endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
lib/include/openmp_wrappers/cmath
@@ -1,4 +1,4 @@
-/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -===
+/*===-- __clang_openmp_device_functions.h - OpenMP math declares -*- c++ -*-===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
lib/include/ppc_wrappers/nmmintrin.h
@@ -0,0 +1,26 @@
+/*===---- nmmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header is distributed to simplify porting x86_64 code that
+ makes explicit use of Intel intrinsics to powerpc64le.
+ It is the user's responsibility to determine if the results are
+ acceptable and make additional changes as necessary.
+ Note that much code that uses Intel intrinsics can be rewritten in
+ standard C or GNU C extensions, which are more portable and better
+ optimized across multiple targets. */
+#endif
+
+#ifndef NMMINTRIN_H_
+#define NMMINTRIN_H_
+
+/* We just include SSE4.1 header file. */
+#include <smmintrin.h>
+
+#endif /* NMMINTRIN_H_ */
lib/include/ppc_wrappers/smmintrin.h
@@ -14,7 +14,7 @@
#ifndef NO_WARN_X86_INTRINSICS
/* This header is distributed to simplify porting x86_64 code that
- makes explicit use of Intel intrinsics to powerp64/powerpc64le.
+ makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
It is the user's responsibility to determine if the results are
acceptable and make additional changes as necessary.
@@ -68,10 +68,10 @@ extern __inline __m128d
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
#else
- __fpscr_save.__fr = __builtin_mffs();
+ __fpscr_save.__fr = __builtin_ppc_mffs();
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
__fpscr_save.__fpscr &= ~0xf8;
- __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
#endif
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
@@ -83,10 +83,15 @@ extern __inline __m128d
switch (__rounding) {
case _MM_FROUND_TO_NEAREST_INT:
- __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+ __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+ __fpscr_save.__fr = __builtin_ppc_mffs();
+ __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
__attribute__((fallthrough));
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
- __builtin_set_fpscr_rn(0b00);
+ __builtin_ppc_set_fpscr_rn(0b00);
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
a read/use of the variable before the FPSCR is modified, above.
@@ -102,7 +107,7 @@ extern __inline __m128d
This can be removed if and when GCC PR102783 is fixed.
*/
__asm__("" : : "wa"(__r));
- __builtin_set_fpscr_rn(__fpscr_save.__fpscr);
+ __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
break;
case _MM_FROUND_TO_NEG_INF:
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
@@ -128,9 +133,14 @@ extern __inline __m128d
*/
__asm__("" : : "wa"(__r));
/* Restore enabled exceptions. */
- __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+ __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+ __fpscr_save.__fr = __builtin_ppc_mffs();
+ __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
__fpscr_save.__fpscr |= __enables_save.__fpscr;
- __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
}
return (__m128d)__r;
}
@@ -159,10 +169,10 @@ extern __inline __m128
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
#else
- __fpscr_save.__fr = __builtin_mffs();
+ __fpscr_save.__fr = __builtin_ppc_mffs();
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
__fpscr_save.__fpscr &= ~0xf8;
- __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
#endif
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
@@ -174,10 +184,15 @@ extern __inline __m128
switch (__rounding) {
case _MM_FROUND_TO_NEAREST_INT:
- __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+ __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+ __fpscr_save.__fr = __builtin_ppc_mffs();
+ __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
__attribute__((fallthrough));
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
- __builtin_set_fpscr_rn(0b00);
+ __builtin_ppc_set_fpscr_rn(0b00);
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
a read/use of the variable before the FPSCR is modified, above.
@@ -193,7 +208,7 @@ extern __inline __m128
This can be removed if and when GCC PR102783 is fixed.
*/
__asm__("" : : "wa"(__r));
- __builtin_set_fpscr_rn(__fpscr_save.__fpscr);
+ __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
break;
case _MM_FROUND_TO_NEG_INF:
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
@@ -219,9 +234,14 @@ extern __inline __m128
*/
__asm__("" : : "wa"(__r));
/* Restore enabled exceptions. */
- __fpscr_save.__fr = __builtin_mffsl();
+#ifdef _ARCH_PWR9
+ __fpscr_save.__fr = __builtin_ppc_mffsl();
+#else
+ __fpscr_save.__fr = __builtin_ppc_mffs();
+ __fpscr_save.__fpscr &= 0x70007f0ffL;
+#endif
__fpscr_save.__fpscr |= __enables_save.__fpscr;
- __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
+ __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
}
return (__m128)__r;
}
lib/include/__clang_cuda_device_functions.h
@@ -502,8 +502,8 @@ __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
-__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
-__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
+__DEVICE__ int __popc(unsigned int __a) { return __nv_popc(__a); }
+__DEVICE__ int __popcll(unsigned long long __a) { return __nv_popcll(__a); }
__DEVICE__ float __powf(float __a, float __b) {
return __nv_fast_powf(__a, __b);
}
lib/include/__clang_cuda_libdevice_declares.h
@@ -285,8 +285,8 @@ __DEVICE__ double __nv_normcdfinv(double __a);
__DEVICE__ float __nv_normcdfinvf(float __a);
__DEVICE__ float __nv_normf(int __a, const float *__b);
__DEVICE__ double __nv_norm(int __a, const double *__b);
-__DEVICE__ int __nv_popc(int __a);
-__DEVICE__ int __nv_popcll(long long __a);
+__DEVICE__ int __nv_popc(unsigned int __a);
+__DEVICE__ int __nv_popcll(unsigned long long __a);
__DEVICE__ double __nv_pow(double __a, double __b);
__DEVICE__ float __nv_powf(float __a, float __b);
__DEVICE__ double __nv_powi(double __a, int __b);
lib/include/__clang_cuda_math.h
@@ -36,7 +36,7 @@
// because the OpenMP overlay requires constexpr functions here but prior to
// c++14 void return functions could not be constexpr.
#pragma push_macro("__DEVICE_VOID__")
-#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
+#if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L
#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
#else
#define __DEVICE_VOID__ __DEVICE__
@@ -45,9 +45,9 @@
// libdevice provides fast low precision and slow full-recision implementations
// for some functions. Which one gets selected depends on
// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
-// -ffast-math or -fcuda-approx-transcendentals are in effect.
+// -ffast-math or -fgpu-approx-transcendentals are in effect.
#pragma push_macro("__FAST_OR_SLOW")
-#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
+#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __FAST_OR_SLOW(fast, slow) fast
#else
#define __FAST_OR_SLOW(fast, slow) slow
lib/include/__clang_cuda_runtime_wrapper.h
@@ -196,12 +196,12 @@ inline __host__ double __signbitd(double x) {
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
-// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.
+// sin and exp. This is controlled in clang by -fgpu-approx-transcendentals.
//
// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.
// slow divides), so we need to scope our define carefully here.
#pragma push_macro("__USE_FAST_MATH__")
-#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
+#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __USE_FAST_MATH__ 1
#endif
lib/include/__clang_hip_math.h
@@ -14,9 +14,6 @@
#endif
#if !defined(__HIPCC_RTC__)
-#if defined(__cplusplus)
-#include <algorithm>
-#endif
#include <limits.h>
#include <stdint.h>
#ifdef __OPENMP_AMDGCN__
@@ -32,6 +29,17 @@
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
#endif
+// Device library provides fast low precision and slow full-recision
+// implementations for some functions. Which one gets selected depends on
+// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
+// -ffast-math or -fgpu-approx-transcendentals are in effect.
+#pragma push_macro("__FAST_OR_SLOW")
+#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
+#define __FAST_OR_SLOW(fast, slow) fast
+#else
+#define __FAST_OR_SLOW(fast, slow) slow
+#endif
+
// A few functions return bool type starting only in C++11.
#pragma push_macro("__RETURN_TYPE")
#ifdef __OPENMP_AMDGCN__
@@ -139,21 +147,180 @@ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
}
// BEGIN FLOAT
+
+// BEGIN INTRINSICS
+
+__DEVICE__
+float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
+
+__DEVICE__
+float __exp10f(float __x) {
+ const float __log2_10 = 0x1.a934f0p+1f;
+ return __builtin_amdgcn_exp2f(__log2_10 * __x);
+}
+
+__DEVICE__
+float __expf(float __x) {
+ const float __log2_e = 0x1.715476p+0;
+ return __builtin_amdgcn_exp2f(__log2_e * __x);
+}
+
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+__DEVICE__
+float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
+__DEVICE__
+float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
+__DEVICE__
+float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
+__DEVICE__
+float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
+#else
+__DEVICE__
+float __fadd_rn(float __x, float __y) { return __x + __y; }
+#endif
+
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+__DEVICE__
+float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
+__DEVICE__
+float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
+__DEVICE__
+float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
+__DEVICE__
+float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
+#else
+__DEVICE__
+float __fdiv_rn(float __x, float __y) { return __x / __y; }
+#endif
+
+__DEVICE__
+float __fdividef(float __x, float __y) { return __x / __y; }
+
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+__DEVICE__
+float __fmaf_rd(float __x, float __y, float __z) {
+ return __ocml_fma_rtn_f32(__x, __y, __z);
+}
+__DEVICE__
+float __fmaf_rn(float __x, float __y, float __z) {
+ return __ocml_fma_rte_f32(__x, __y, __z);
+}
+__DEVICE__
+float __fmaf_ru(float __x, float __y, float __z) {
+ return __ocml_fma_rtp_f32(__x, __y, __z);
+}
+__DEVICE__
+float __fmaf_rz(float __x, float __y, float __z) {
+ return __ocml_fma_rtz_f32(__x, __y, __z);
+}
+#else
+__DEVICE__
+float __fmaf_rn(float __x, float __y, float __z) {
+ return __builtin_fmaf(__x, __y, __z);
+}
+#endif
+
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+__DEVICE__
+float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
+__DEVICE__
+float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
+__DEVICE__
+float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
+__DEVICE__
+float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
+#else
+__DEVICE__
+float __fmul_rn(float __x, float __y) { return __x * __y; }
+#endif
+
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+__DEVICE__
+float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
+__DEVICE__
+float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
+__DEVICE__
+float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
+__DEVICE__
+float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
+#else
+__DEVICE__
+float __frcp_rn(float __x) { return 1.0f / __x; }
+#endif
+
+__DEVICE__
+float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
+
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+__DEVICE__
+float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
+__DEVICE__
+float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
+__DEVICE__
+float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
+__DEVICE__
+float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
+#else
+__DEVICE__
+float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
+#endif
+
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+__DEVICE__
+float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
+__DEVICE__
+float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
+__DEVICE__
+float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
+__DEVICE__
+float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
+#else
+__DEVICE__
+float __fsub_rn(float __x, float __y) { return __x - __y; }
+#endif
+
+__DEVICE__
+float __log10f(float __x) { return __builtin_log10f(__x); }
+
+__DEVICE__
+float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
+
+__DEVICE__
+float __logf(float __x) { return __builtin_logf(__x); }
+
+__DEVICE__
+float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
+
+__DEVICE__
+float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
+
+__DEVICE__
+void __sincosf(float __x, float *__sinptr, float *__cosptr) {
+ *__sinptr = __ocml_native_sin_f32(__x);
+ *__cosptr = __ocml_native_cos_f32(__x);
+}
+
+__DEVICE__
+float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
+
+__DEVICE__
+float __tanf(float __x) {
+ return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
+}
+// END INTRINSICS
+
#if defined(__cplusplus)
__DEVICE__
int abs(int __x) {
- int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
- return (__x ^ __sgn) - __sgn;
+ return __builtin_abs(__x);
}
__DEVICE__
long labs(long __x) {
- long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
- return (__x ^ __sgn) - __sgn;
+ return __builtin_labs(__x);
}
__DEVICE__
long long llabs(long long __x) {
- long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
- return (__x ^ __sgn) - __sgn;
+ return __builtin_llabs(__x);
}
#endif
@@ -188,7 +355,7 @@ __DEVICE__
float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
__DEVICE__
-float cosf(float __x) { return __ocml_cos_f32(__x); }
+float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
__DEVICE__
float coshf(float __x) { return __ocml_cosh_f32(__x); }
@@ -321,13 +488,13 @@ __DEVICE__
float log1pf(float __x) { return __ocml_log1p_f32(__x); }
__DEVICE__
-float log2f(float __x) { return __builtin_log2f(__x); }
+float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); }
__DEVICE__
float logbf(float __x) { return __ocml_logb_f32(__x); }
__DEVICE__
-float logf(float __x) { return __builtin_logf(__x); }
+float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); }
__DEVICE__
long int lrintf(float __x) { return __builtin_rintf(__x); }
@@ -401,7 +568,7 @@ float normf(int __dim,
++__a;
}
- return __ocml_sqrt_f32(__r);
+ return __builtin_sqrtf(__r);
}
__DEVICE__
@@ -483,9 +650,13 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
+#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
+ __sincosf(__x, __sinptr, __cosptr);
+#else
*__sinptr =
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
+#endif
}
__DEVICE__
@@ -500,7 +671,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) {
}
__DEVICE__
-float sinf(float __x) { return __ocml_sin_f32(__x); }
+float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
__DEVICE__
float sinhf(float __x) { return __ocml_sinh_f32(__x); }
@@ -509,7 +680,7 @@ __DEVICE__
float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
__DEVICE__
-float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
+float sqrtf(float __x) { return __builtin_sqrtf(__x); }
__DEVICE__
float tanf(float __x) { return __ocml_tan_f32(__x); }
@@ -551,158 +722,7 @@ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
return __x1;
}
-// BEGIN INTRINSICS
-
-__DEVICE__
-float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
-
-__DEVICE__
-float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
-
-__DEVICE__
-float __expf(float __x) { return __ocml_native_exp_f32(__x); }
-
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
-__DEVICE__
-float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
-__DEVICE__
-float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
-__DEVICE__
-float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
-#else
-__DEVICE__
-float __fadd_rn(float __x, float __y) { return __x + __y; }
-#endif
-
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
-__DEVICE__
-float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
-__DEVICE__
-float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
-__DEVICE__
-float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
-#else
-__DEVICE__
-float __fdiv_rn(float __x, float __y) { return __x / __y; }
-#endif
-
-__DEVICE__
-float __fdividef(float __x, float __y) { return __x / __y; }
-
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-float __fmaf_rd(float __x, float __y, float __z) {
- return __ocml_fma_rtn_f32(__x, __y, __z);
-}
-__DEVICE__
-float __fmaf_rn(float __x, float __y, float __z) {
- return __ocml_fma_rte_f32(__x, __y, __z);
-}
-__DEVICE__
-float __fmaf_ru(float __x, float __y, float __z) {
- return __ocml_fma_rtp_f32(__x, __y, __z);
-}
-__DEVICE__
-float __fmaf_rz(float __x, float __y, float __z) {
- return __ocml_fma_rtz_f32(__x, __y, __z);
-}
-#else
-__DEVICE__
-float __fmaf_rn(float __x, float __y, float __z) {
- return __builtin_fmaf(__x, __y, __z);
-}
-#endif
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
-__DEVICE__
-float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
-__DEVICE__
-float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
-__DEVICE__
-float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
-#else
-__DEVICE__
-float __fmul_rn(float __x, float __y) { return __x * __y; }
-#endif
-
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
-__DEVICE__
-float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
-__DEVICE__
-float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
-__DEVICE__
-float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
-#else
-__DEVICE__
-float __frcp_rn(float __x) { return 1.0f / __x; }
-#endif
-
-__DEVICE__
-float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
-
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
-__DEVICE__
-float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
-__DEVICE__
-float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
-__DEVICE__
-float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
-#else
-__DEVICE__
-float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
-#endif
-
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
-__DEVICE__
-float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
-__DEVICE__
-float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
-__DEVICE__
-float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
-#else
-__DEVICE__
-float __fsub_rn(float __x, float __y) { return __x - __y; }
-#endif
-
-__DEVICE__
-float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
-
-__DEVICE__
-float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
-
-__DEVICE__
-float __logf(float __x) { return __ocml_native_log_f32(__x); }
-
-__DEVICE__
-float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
-
-__DEVICE__
-float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
-
-__DEVICE__
-void __sincosf(float __x, float *__sinptr, float *__cosptr) {
- *__sinptr = __ocml_native_sin_f32(__x);
- *__cosptr = __ocml_native_cos_f32(__x);
-}
-
-__DEVICE__
-float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
-
-__DEVICE__
-float __tanf(float __x) { return __ocml_tan_f32(__x); }
-// END INTRINSICS
// END FLOAT
// BEGIN DOUBLE
@@ -941,7 +961,7 @@ double norm(int __dim,
++__a;
}
- return __ocml_sqrt_f64(__r);
+ return __builtin_sqrt(__r);
}
__DEVICE__
@@ -1064,7 +1084,7 @@ __DEVICE__
double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
__DEVICE__
-double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
+double sqrt(double __x) { return __builtin_sqrt(__x); }
__DEVICE__
double tan(double __x) { return __ocml_tan_f64(__x); }
@@ -1198,7 +1218,7 @@ __DEVICE__
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
#else
__DEVICE__
-double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
+double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
@@ -1288,16 +1308,17 @@ double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
__host__ inline static int min(int __arg1, int __arg2) {
- return std::min(__arg1, __arg2);
+ return __arg1 < __arg2 ? __arg1 : __arg2;
}
__host__ inline static int max(int __arg1, int __arg2) {
- return std::max(__arg1, __arg2);
+ return __arg1 > __arg2 ? __arg1 : __arg2;
}
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
#endif
#pragma pop_macro("__DEVICE__")
#pragma pop_macro("__RETURN_TYPE")
+#pragma pop_macro("__FAST_OR_SLOW")
#endif // __CLANG_HIP_MATH_H__
lib/include/__clang_hip_runtime_wrapper.h
@@ -47,28 +47,9 @@ extern "C" {
#endif //__cplusplus
#if !defined(__HIPCC_RTC__)
-#include <cmath>
-#include <cstdlib>
-#include <stdlib.h>
#if __has_include("hip/hip_version.h")
#include "hip/hip_version.h"
#endif // __has_include("hip/hip_version.h")
-#else
-typedef __SIZE_TYPE__ size_t;
-// Define macros which are needed to declare HIP device API's without standard
-// C/C++ headers. This is for readability so that these API's can be written
-// the same way as non-hipRTC use case. These macros need to be popped so that
-// they do not pollute users' name space.
-#pragma push_macro("NULL")
-#pragma push_macro("uint32_t")
-#pragma push_macro("uint64_t")
-#pragma push_macro("CHAR_BIT")
-#pragma push_macro("INT_MAX")
-#define NULL (void *)0
-#define uint32_t __UINT32_TYPE__
-#define uint64_t __UINT64_TYPE__
-#define CHAR_BIT __CHAR_BIT__
-#define INT_MAX __INTMAX_MAX__
#endif // __HIPCC_RTC__
typedef __SIZE_TYPE__ __hip_size_t;
@@ -78,11 +59,13 @@ extern "C" {
#endif //__cplusplus
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
-extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
-extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
+__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
+__device__ void __ockl_dm_dealloc(unsigned long long __addr);
#if __has_feature(address_sanitizer)
-extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
-extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
+__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
+ unsigned long long __pc);
+__device__ void __asan_free_impl(unsigned long long __addr,
+ unsigned long long __pc);
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
return (void *)__asan_malloc_impl(__size, __pc);
@@ -91,7 +74,7 @@ __attribute__((noinline, weak)) __device__ void free(void *__ptr) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
__asan_free_impl((unsigned long long)__ptr, __pc);
}
-#else
+#else // __has_feature(address_sanitizer)
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return (void *) __ockl_dm_alloc(__size);
}
@@ -109,7 +92,7 @@ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__hip_free(__ptr);
}
-#else
+#else // __HIP_ENABLE_DEVICE_MALLOC__
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__builtin_trap();
return (void *)0;
@@ -117,13 +100,38 @@ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__builtin_trap();
}
-#endif
+#endif // __HIP_ENABLE_DEVICE_MALLOC__
#endif // HIP version check
#ifdef __cplusplus
} // extern "C"
#endif //__cplusplus
+#if !defined(__HIPCC_RTC__)
+#include <cmath>
+#include <cstdlib>
+#include <stdlib.h>
+#if __has_include("hip/hip_version.h")
+#include "hip/hip_version.h"
+#endif // __has_include("hip/hip_version.h")
+#else
+typedef __SIZE_TYPE__ size_t;
+// Define macros which are needed to declare HIP device API's without standard
+// C/C++ headers. This is for readability so that these API's can be written
+// the same way as non-hipRTC use case. These macros need to be popped so that
+// they do not pollute users' name space.
+#pragma push_macro("NULL")
+#pragma push_macro("uint32_t")
+#pragma push_macro("uint64_t")
+#pragma push_macro("CHAR_BIT")
+#pragma push_macro("INT_MAX")
+#define NULL (void *)0
+#define uint32_t __UINT32_TYPE__
+#define uint64_t __UINT64_TYPE__
+#define CHAR_BIT __CHAR_BIT__
+#define INT_MAX __INTMAX_MAX__
+#endif // __HIPCC_RTC__
+
#include <__clang_hip_libdevice_declares.h>
#include <__clang_hip_math.h>
#include <__clang_hip_stdlib.h>
lib/include/__stdarg___gnuc_va_list.h
@@ -0,0 +1,13 @@
+/*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __GNUC_VA_LIST
+#define __GNUC_VA_LIST
+typedef __builtin_va_list __gnuc_va_list;
+#endif
lib/include/__stdarg___va_copy.h
@@ -0,0 +1,12 @@
+/*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __va_copy
+#define __va_copy(d, s) __builtin_va_copy(d, s)
+#endif
lib/include/__stdarg_va_arg.h
@@ -0,0 +1,22 @@
+/*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef va_arg
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+/* C23 does not require the second parameter for va_start. */
+#define va_start(ap, ...) __builtin_va_start(ap, 0)
+#else
+/* Versions before C23 do require the second parameter. */
+#define va_start(ap, param) __builtin_va_start(ap, param)
+#endif
+#define va_end(ap) __builtin_va_end(ap)
+#define va_arg(ap, type) __builtin_va_arg(ap, type)
+
+#endif
lib/include/__stdarg_va_copy.h
@@ -0,0 +1,12 @@
+/*===---- __stdarg_va_copy.h - Definition of va_copy------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef va_copy
+#define va_copy(dest, src) __builtin_va_copy(dest, src)
+#endif
lib/include/__stdarg_va_list.h
@@ -0,0 +1,13 @@
+/*===---- __stdarg_va_list.h - Definition of va_list -----------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _VA_LIST
+#define _VA_LIST
+typedef __builtin_va_list va_list;
+#endif
lib/include/__stddef_max_align_t.h
@@ -1,4 +1,4 @@
-/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===
+/*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
lib/include/__stddef_null.h
@@ -0,0 +1,29 @@
+/*===---- __stddef_null.h - Definition of NULL -----------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#if !defined(NULL) || !__building_module(_Builtin_stddef)
+
+/* linux/stddef.h will define NULL to 0. glibc (and other) headers then define
+ * __need_NULL and rely on stddef.h to redefine NULL to the correct value again.
+ * Modules don't support redefining macros like that, but support that pattern
+ * in the non-modules case.
+ */
+#undef NULL
+
+#ifdef __cplusplus
+#if !defined(__MINGW32__) && !defined(_MSC_VER)
+#define NULL __null
+#else
+#define NULL 0
+#endif
+#else
+#define NULL ((void*)0)
+#endif
+
+#endif
lib/include/__stddef_nullptr_t.h
@@ -0,0 +1,29 @@
+/*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(_NULLPTR_T) || \
+ (__has_feature(modules) && !__building_module(_Builtin_stddef))
+#define _NULLPTR_T
+
+#ifdef __cplusplus
+#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
+namespace std {
+typedef decltype(nullptr) nullptr_t;
+}
+using ::std::nullptr_t;
+#endif
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+typedef typeof(nullptr) nullptr_t;
+#endif
+
+#endif
lib/include/__stddef_offsetof.h
@@ -0,0 +1,17 @@
+/*===---- __stddef_offsetof.h - Definition of offsetof ---------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(offsetof) || \
+ (__has_feature(modules) && !__building_module(_Builtin_stddef))
+#define offsetof(t, d) __builtin_offsetof(t, d)
+#endif
lib/include/__stddef_ptrdiff_t.h
@@ -0,0 +1,20 @@
+/*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(_PTRDIFF_T) || \
+ (__has_feature(modules) && !__building_module(_Builtin_stddef))
+#define _PTRDIFF_T
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+#endif
lib/include/__stddef_rsize_t.h
@@ -0,0 +1,20 @@
+/*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(_RSIZE_T) || \
+ (__has_feature(modules) && !__building_module(_Builtin_stddef))
+#define _RSIZE_T
+
+typedef __SIZE_TYPE__ rsize_t;
+
+#endif
lib/include/__stddef_size_t.h
@@ -0,0 +1,20 @@
+/*===---- __stddef_size_t.h - Definition of size_t -------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(_SIZE_T) || \
+ (__has_feature(modules) && !__building_module(_Builtin_stddef))
+#define _SIZE_T
+
+typedef __SIZE_TYPE__ size_t;
+
+#endif
lib/include/__stddef_unreachable.h
@@ -0,0 +1,21 @@
+/*===---- __stddef_unreachable.h - Definition of unreachable ---------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __cplusplus
+
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(unreachable) || \
+ (__has_feature(modules) && !__building_module(_Builtin_stddef))
+#define unreachable() __builtin_unreachable()
+#endif
+
+#endif
lib/include/__stddef_wchar_t.h
@@ -0,0 +1,28 @@
+/*===---- __stddef_wchar.h - Definition of wchar_t -------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
+
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(_WCHAR_T) || \
+ (__has_feature(modules) && !__building_module(_Builtin_stddef))
+#define _WCHAR_T
+
+#ifdef _MSC_EXTENSIONS
+#define _WCHAR_T_DEFINED
+#endif
+
+typedef __WCHAR_TYPE__ wchar_t;
+
+#endif
+
+#endif
lib/include/__stddef_wint_t.h
@@ -0,0 +1,15 @@
+/*===---- __stddef_wint.h - Definition of wint_t ---------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _WINT_T
+#define _WINT_T
+
+typedef __WINT_TYPE__ wint_t;
+
+#endif
lib/include/adcintrin.h
@@ -0,0 +1,160 @@
+/*===---- adcintrin.h - ADC intrinsics -------------------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __ADCINTRIN_H
+#define __ADCINTRIN_H
+
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
+/* Use C++ inline semantics in C++, GNU inline for C mode. */
+#if defined(__cplusplus)
+#define __INLINE __inline
+#else
+#define __INLINE static __inline
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
+/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
+/// at \a __p, and returns the 8-bit carry-out (carry flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store32(__p, __x + __y + temp)
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADC instruction.
+///
+/// \param __cf
+/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+/// A 32-bit unsigned addend.
+/// \param __y
+/// A 32-bit unsigned addend.
+/// \param __p
+/// Pointer to memory for storing the sum.
+/// \returns The 8-bit unsigned carry-out value.
+__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
+ unsigned int __x,
+ unsigned int __y,
+ unsigned int *__p) {
+ return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
+}
+
+/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
+/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
+/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
+/// and returns the 8-bit carry-out (carry or overflow flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store32(__p, __x - (__y + temp))
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c SBB instruction.
+///
+/// \param __cf
+/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+/// The 32-bit unsigned minuend.
+/// \param __y
+/// The 32-bit unsigned subtrahend.
+/// \param __p
+/// Pointer to memory for storing the difference.
+/// \returns The 8-bit unsigned carry-out value.
+__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
+ unsigned int __x,
+ unsigned int __y,
+ unsigned int *__p) {
+ return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
+}
+
+#ifdef __x86_64__
+/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
+/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
+/// at \a __p, and returns the 8-bit carry-out (carry flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store64(__p, __x + __y + temp)
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADC instruction.
+///
+/// \param __cf
+/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+/// A 64-bit unsigned addend.
+/// \param __y
+/// A 64-bit unsigned addend.
+/// \param __p
+/// Pointer to memory for storing the sum.
+/// \returns The 8-bit unsigned carry-out value.
+__INLINE unsigned char __DEFAULT_FN_ATTRS
+_addcarry_u64(unsigned char __cf, unsigned long long __x,
+ unsigned long long __y, unsigned long long *__p) {
+ return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
+}
+
+/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
+/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
+/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
+/// and returns the 8-bit carry-out (carry or overflow flag).
+///
+/// \code{.operation}
+/// temp := (__cf == 0) ? 0 : 1
+/// Store64(__p, __x - (__y + temp))
+/// result := CF
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c ADC instruction.
+///
+/// \param __cf
+/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
+/// \param __x
+/// The 64-bit unsigned minuend.
+/// \param __y
+/// The 64-bit unsigned subtrahend.
+/// \param __p
+/// Pointer to memory for storing the difference.
+/// \returns The 8-bit unsigned carry-out value.
+__INLINE unsigned char __DEFAULT_FN_ATTRS
+_subborrow_u64(unsigned char __cf, unsigned long long __x,
+ unsigned long long __y, unsigned long long *__p) {
+ return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
+}
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#undef __INLINE
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __ADCINTRIN_H */
lib/include/adxintrin.h
@@ -15,7 +15,8 @@
#define __ADXINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("adx")))
/* Use C++ inline semantics in C++, GNU inline for C mode. */
#if defined(__cplusplus)
@@ -53,10 +54,10 @@ extern "C" {
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
-__INLINE unsigned char
- __attribute__((__always_inline__, __nodebug__, __target__("adx")))
- _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
- unsigned int *__p) {
+__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf,
+ unsigned int __x,
+ unsigned int __y,
+ unsigned int *__p) {
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
@@ -84,137 +85,10 @@ __INLINE unsigned char
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
-__INLINE unsigned char
- __attribute__((__always_inline__, __nodebug__, __target__("adx")))
- _addcarryx_u64(unsigned char __cf, unsigned long long __x,
- unsigned long long __y, unsigned long long *__p) {
- return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
-}
-#endif
-
-/* Intrinsics that are also available if __ADX__ is undefined. */
-
-/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
-/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
-/// at \a __p, and returns the 8-bit carry-out (carry flag).
-///
-/// \code{.operation}
-/// temp := (__cf == 0) ? 0 : 1
-/// Store32(__p, __x + __y + temp)
-/// result := CF
-/// \endcode
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the \c ADC instruction.
-///
-/// \param __cf
-/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
-/// \param __x
-/// A 32-bit unsigned addend.
-/// \param __y
-/// A 32-bit unsigned addend.
-/// \param __p
-/// Pointer to memory for storing the sum.
-/// \returns The 8-bit unsigned carry-out value.
-__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
- unsigned int __x,
- unsigned int __y,
- unsigned int *__p) {
- return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
-}
-
-#ifdef __x86_64__
-/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
-/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
-/// at \a __p, and returns the 8-bit carry-out (carry flag).
-///
-/// \code{.operation}
-/// temp := (__cf == 0) ? 0 : 1
-/// Store64(__p, __x + __y + temp)
-/// result := CF
-/// \endcode
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the \c ADC instruction.
-///
-/// \param __cf
-/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
-/// \param __x
-/// A 64-bit unsigned addend.
-/// \param __y
-/// A 64-bit unsigned addend.
-/// \param __p
-/// Pointer to memory for storing the sum.
-/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
-_addcarry_u64(unsigned char __cf, unsigned long long __x,
- unsigned long long __y, unsigned long long *__p) {
- return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
-}
-#endif
-
-/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
-/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
-/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
-/// and returns the 8-bit carry-out (carry or overflow flag).
-///
-/// \code{.operation}
-/// temp := (__cf == 0) ? 0 : 1
-/// Store32(__p, __x - (__y + temp))
-/// result := CF
-/// \endcode
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the \c SBB instruction.
-///
-/// \param __cf
-/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
-/// \param __x
-/// The 32-bit unsigned minuend.
-/// \param __y
-/// The 32-bit unsigned subtrahend.
-/// \param __p
-/// Pointer to memory for storing the difference.
-/// \returns The 8-bit unsigned carry-out value.
-__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
- unsigned int __x,
- unsigned int __y,
- unsigned int *__p) {
- return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
-}
-
-#ifdef __x86_64__
-/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
-/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
-/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
-/// and returns the 8-bit carry-out (carry or overflow flag).
-///
-/// \code{.operation}
-/// temp := (__cf == 0) ? 0 : 1
-/// Store64(__p, __x - (__y + temp))
-/// result := CF
-/// \endcode
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the \c ADC instruction.
-///
-/// \param __cf
-/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
-/// \param __x
-/// The 64-bit unsigned minuend.
-/// \param __y
-/// The 64-bit unsigned subtrahend.
-/// \param __p
-/// Pointer to memory for storing the difference.
-/// \returns The 8-bit unsigned carry-out value.
-__INLINE unsigned char __DEFAULT_FN_ATTRS
-_subborrow_u64(unsigned char __cf, unsigned long long __x,
+_addcarryx_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
- return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
+ return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif
@@ -222,6 +96,7 @@ _subborrow_u64(unsigned char __cf, unsigned long long __x,
}
#endif
+#undef __INLINE
#undef __DEFAULT_FN_ATTRS
#endif /* __ADXINTRIN_H */
lib/include/altivec.h
@@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
int __b) {
- vector signed char __res = (vector signed char)(0);
- __res[__b & 0x7] = __a;
+ const vector signed char __zero = (vector signed char)0;
+ vector signed char __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1);
+ __res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_promote(unsigned char __a, int __b) {
- vector unsigned char __res = (vector unsigned char)(0);
- __res[__b & 0x7] = __a;
+ const vector unsigned char __zero = (vector unsigned char)(0);
+ vector unsigned char __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1);
+ __res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
- vector short __res = (vector short)(0);
+ const vector short __zero = (vector short)(0);
+ vector short __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_promote(unsigned short __a, int __b) {
- vector unsigned short __res = (vector unsigned short)(0);
+ const vector unsigned short __zero = (vector unsigned short)(0);
+ vector unsigned short __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
- vector int __res = (vector int)(0);
+ const vector int __zero = (vector int)(0);
+ vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
int __b) {
- vector unsigned int __res = (vector unsigned int)(0);
+ const vector unsigned int __zero = (vector unsigned int)(0);
+ vector unsigned int __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
- vector float __res = (vector float)(0);
+ const vector float __zero = (vector float)(0);
+ vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
#ifdef __VSX__
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
- vector double __res = (vector double)(0);
+ const vector double __zero = (vector double)(0);
+ vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector signed long long __ATTRS_o_ai
vec_promote(signed long long __a, int __b) {
- vector signed long long __res = (vector signed long long)(0);
+ const vector signed long long __zero = (vector signed long long)(0);
+ vector signed long long __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_promote(unsigned long long __a, int __b) {
- vector unsigned long long __res = (vector unsigned long long)(0);
+ const vector unsigned long long __zero = (vector unsigned long long)(0);
+ vector unsigned long long __res =
+ __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
lib/include/ammintrin.h
@@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y)
/// \param __a
/// The 64-bit double-precision floating-point register value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
-_mm_stream_sd(double *__p, __m128d __a)
+_mm_stream_sd(void *__p, __m128d __a)
{
- __builtin_ia32_movntsd(__p, (__v2df)__a);
+ __builtin_ia32_movntsd((double *)__p, (__v2df)__a);
}
/// Stores a 32-bit single-precision floating-point value in a 32-bit
@@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a)
/// \param __a
/// The 32-bit single-precision floating-point register value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
-_mm_stream_ss(float *__p, __m128 __a)
+_mm_stream_ss(void *__p, __m128 __a)
{
- __builtin_ia32_movntss(__p, (__v4sf)__a);
+ __builtin_ia32_movntss((float *)__p, (__v4sf)__a);
}
#undef __DEFAULT_FN_ATTRS
lib/include/arm_acle.h
@@ -4,6 +4,13 @@
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
+ * The Arm C Language Extensions specifications can be found in the following
+ * link: https://github.com/ARM-software/acle/releases
+ *
+ * The ACLE section numbers are subject to change. When consulting the
+ * specifications, it is recommended to search using section titles if
+ * the section numbers look outdated.
+ *
*===-----------------------------------------------------------------------===
*/
@@ -20,8 +27,8 @@
extern "C" {
#endif
-/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
-/* 8.3 Memory barriers */
+/* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
+/* 7.3 Memory barriers */
#if !__has_builtin(__dmb)
#define __dmb(i) __builtin_arm_dmb(i)
#endif
@@ -32,7 +39,7 @@ extern "C" {
#define __isb(i) __builtin_arm_isb(i)
#endif
-/* 8.4 Hints */
+/* 7.4 Hints */
#if !__has_builtin(__wfi)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
@@ -68,7 +75,7 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
#define __dbg(t) __builtin_arm_dbg(t)
#endif
-/* 8.5 Swap */
+/* 7.5 Swap */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__swp(uint32_t __x, volatile uint32_t *__p) {
uint32_t v;
@@ -78,8 +85,8 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
return v;
}
-/* 8.6 Memory prefetch intrinsics */
-/* 8.6.1 Data prefetch */
+/* 7.6 Memory prefetch intrinsics */
+/* 7.6.1 Data prefetch */
#define __pld(addr) __pldx(0, 0, 0, addr)
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@@ -90,7 +97,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
#endif
-/* 8.6.2 Instruction prefetch */
+/* 7.6.2 Instruction prefetch */
#define __pli(addr) __plix(0, 0, addr)
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@@ -101,15 +108,15 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
__builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
#endif
-/* 8.7 NOP */
+/* 7.7 NOP */
#if !defined(_MSC_VER) || !defined(__aarch64__)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
__builtin_arm_nop();
}
#endif
-/* 9 DATA-PROCESSING INTRINSICS */
-/* 9.2 Miscellaneous data-processing intrinsics */
+/* 8 DATA-PROCESSING INTRINSICS */
+/* 8.2 Miscellaneous data-processing intrinsics */
/* ROR */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__ror(uint32_t __x, uint32_t __y) {
@@ -248,9 +255,7 @@ __rbitl(unsigned long __t) {
#endif
}
-/*
- * 9.3 16-bit multiplications
- */
+/* 8.3 16-bit multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
__smulbb(int32_t __a, int32_t __b) {
@@ -279,18 +284,18 @@ __smulwt(int32_t __a, int32_t __b) {
#endif
/*
- * 9.4 Saturating intrinsics
+ * 8.4 Saturating intrinsics
*
* FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
* intrinsics are implemented and the flag is enabled.
*/
-/* 9.4.1 Width-specified saturation intrinsics */
+/* 8.4.1 Width-specified saturation intrinsics */
#if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
#define __ssat(x, y) __builtin_arm_ssat(x, y)
#define __usat(x, y) __builtin_arm_usat(x, y)
#endif
-/* 9.4.2 Saturating addition and subtraction intrinsics */
+/* 8.4.2 Saturating addition and subtraction intrinsics */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qadd(int32_t __t, int32_t __v) {
@@ -308,7 +313,7 @@ __qdbl(int32_t __t) {
}
#endif
-/* 9.4.3 Accumultating multiplications */
+/* 8.4.3 Accumultating multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
@@ -337,13 +342,13 @@ __smlawt(int32_t __a, int32_t __b, int32_t __c) {
#endif
-/* 9.5.4 Parallel 16-bit saturation */
+/* 8.5.4 Parallel 16-bit saturation */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
#define __usat16(x, y) __builtin_arm_usat16(x, y)
#endif
-/* 9.5.5 Packing and unpacking */
+/* 8.5.5 Packing and unpacking */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
typedef int32_t int8x4_t;
typedef int32_t int16x2_t;
@@ -368,7 +373,7 @@ __uxtb16(int8x4_t __a) {
}
#endif
-/* 9.5.6 Parallel selection */
+/* 8.5.6 Parallel selection */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
__sel(uint8x4_t __a, uint8x4_t __b) {
@@ -376,7 +381,7 @@ __sel(uint8x4_t __a, uint8x4_t __b) {
}
#endif
-/* 9.5.7 Parallel 8-bit addition and subtraction */
+/* 8.5.7 Parallel 8-bit addition and subtraction */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
__qadd8(int8x4_t __a, int8x4_t __b) {
@@ -428,7 +433,7 @@ __usub8(uint8x4_t __a, uint8x4_t __b) {
}
#endif
-/* 9.5.8 Sum of 8-bit absolute differences */
+/* 8.5.8 Sum of 8-bit absolute differences */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__usad8(uint8x4_t __a, uint8x4_t __b) {
@@ -440,7 +445,7 @@ __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
}
#endif
-/* 9.5.9 Parallel 16-bit addition and subtraction */
+/* 8.5.9 Parallel 16-bit addition and subtraction */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
__qadd16(int16x2_t __a, int16x2_t __b) {
@@ -540,7 +545,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) {
}
#endif
-/* 9.5.10 Parallel 16-bit multiplications */
+/* 8.5.10 Parallel 16-bit multiplications */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
@@ -592,7 +597,22 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
}
#endif
-/* 9.7 CRC32 intrinsics */
+/* 8.6 Floating-point data-processing intrinsics */
+#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING) && \
+ (__ARM_FEATURE_DIRECTED_ROUNDING)) && \
+ (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
+static __inline__ double __attribute__((__always_inline__, __nodebug__))
+__rintn(double __a) {
+ return __builtin_roundeven(__a);
+}
+
+static __inline__ float __attribute__((__always_inline__, __nodebug__))
+__rintnf(float __a) {
+ return __builtin_roundevenf(__a);
+}
+#endif
+
+/* 8.8 CRC32 intrinsics */
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
@@ -636,6 +656,7 @@ __crc32cd(uint32_t __a, uint64_t __b) {
}
#endif
+/* 8.6 Floating-point data-processing intrinsics */
/* Armv8.3-A Javascript conversion intrinsic */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
@@ -687,7 +708,7 @@ __rint64x(double __a) {
}
#endif
-/* Armv8.7-A load/store 64-byte intrinsics */
+/* 8.9 Armv8.7-A load/store 64-byte intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
typedef struct {
uint64_t val[8];
@@ -713,7 +734,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
}
#endif
-/* 10.1 Special register intrinsics */
+/* 11.1 Special register intrinsics */
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
#define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg)
@@ -727,7 +748,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
-/* Memory Tagging Extensions (MTE) Intrinsics */
+/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
@@ -736,12 +757,71 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
-/* Memory Operations Intrinsics */
+/* 18 Memory Operations Intrinsics */
#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
__builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
#endif
-/* Transactional Memory Extension (TME) Intrinsics */
+/* 11.3 Coprocessor Intrinsics */
+#if defined(__ARM_FEATURE_COPROC)
+
+#if (__ARM_FEATURE_COPROC & 0x1)
+
+#if (__ARM_ARCH < 8)
+#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
+ __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
+#endif /* __ARM_ARCH < 8 */
+
+#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p)
+#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p)
+
+#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \
+ __builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2)
+#define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \
+ __builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2)
+
+#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8)
+#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
+#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
+#endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */
+
+#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__)
+#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
+ __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
+#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
+#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
+#endif /* ___ARM_ARCH_8M_MAIN__ */
+
+#endif /* __ARM_FEATURE_COPROC & 0x1 */
+
+#if (__ARM_FEATURE_COPROC & 0x2)
+#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \
+ __builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)
+#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p)
+#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p)
+#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p)
+#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p)
+#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \
+ __builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)
+#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \
+ __builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2)
+#endif
+
+#if (__ARM_FEATURE_COPROC & 0x4)
+#define __arm_mcrr(coproc, opc1, value, CRm) \
+ __builtin_arm_mcrr(coproc, opc1, value, CRm)
+#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm)
+#endif
+
+#if (__ARM_FEATURE_COPROC & 0x8)
+#define __arm_mcrr2(coproc, opc1, value, CRm) \
+ __builtin_arm_mcrr2(coproc, opc1, value, CRm)
+#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm)
+#endif
+
+#endif // __ARM_FEATURE_COPROC
+
+/* 17 Transactional Memory Extension (TME) Intrinsics */
#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
#define _TMFAILURE_REASON 0x00007fffu
@@ -763,7 +843,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#endif /* __ARM_FEATURE_TME */
-/* Armv8.5-A Random number generation intrinsics */
+/* 8.7 Armv8.5-A Random number generation intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
__rndr(uint64_t *__p) {
lib/include/arm_neon.h
@@ -35,12 +35,7 @@
#include <stdint.h>
#include <arm_bf16.h>
-typedef float float32_t;
-typedef __fp16 float16_t;
-#ifdef __aarch64__
-typedef double float64_t;
-#endif
-
+#include <arm_vector_types.h>
#ifdef __aarch64__
typedef uint8_t poly8_t;
typedef uint16_t poly16_t;
@@ -51,30 +46,6 @@ typedef int8_t poly8_t;
typedef int16_t poly16_t;
typedef int64_t poly64_t;
#endif
-typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
-typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
-typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
-typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
-typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
-typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
-typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
-typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
-typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
-typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
-typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
-typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
-typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
-typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
-typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
-typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
-typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
-typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
-typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
-typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
-#ifdef __aarch64__
-typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
-typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
-#endif
typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
@@ -82,96 +53,6 @@ typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
-typedef struct int8x8x2_t {
- int8x8_t val[2];
-} int8x8x2_t;
-
-typedef struct int8x16x2_t {
- int8x16_t val[2];
-} int8x16x2_t;
-
-typedef struct int16x4x2_t {
- int16x4_t val[2];
-} int16x4x2_t;
-
-typedef struct int16x8x2_t {
- int16x8_t val[2];
-} int16x8x2_t;
-
-typedef struct int32x2x2_t {
- int32x2_t val[2];
-} int32x2x2_t;
-
-typedef struct int32x4x2_t {
- int32x4_t val[2];
-} int32x4x2_t;
-
-typedef struct int64x1x2_t {
- int64x1_t val[2];
-} int64x1x2_t;
-
-typedef struct int64x2x2_t {
- int64x2_t val[2];
-} int64x2x2_t;
-
-typedef struct uint8x8x2_t {
- uint8x8_t val[2];
-} uint8x8x2_t;
-
-typedef struct uint8x16x2_t {
- uint8x16_t val[2];
-} uint8x16x2_t;
-
-typedef struct uint16x4x2_t {
- uint16x4_t val[2];
-} uint16x4x2_t;
-
-typedef struct uint16x8x2_t {
- uint16x8_t val[2];
-} uint16x8x2_t;
-
-typedef struct uint32x2x2_t {
- uint32x2_t val[2];
-} uint32x2x2_t;
-
-typedef struct uint32x4x2_t {
- uint32x4_t val[2];
-} uint32x4x2_t;
-
-typedef struct uint64x1x2_t {
- uint64x1_t val[2];
-} uint64x1x2_t;
-
-typedef struct uint64x2x2_t {
- uint64x2_t val[2];
-} uint64x2x2_t;
-
-typedef struct float16x4x2_t {
- float16x4_t val[2];
-} float16x4x2_t;
-
-typedef struct float16x8x2_t {
- float16x8_t val[2];
-} float16x8x2_t;
-
-typedef struct float32x2x2_t {
- float32x2_t val[2];
-} float32x2x2_t;
-
-typedef struct float32x4x2_t {
- float32x4_t val[2];
-} float32x4x2_t;
-
-#ifdef __aarch64__
-typedef struct float64x1x2_t {
- float64x1_t val[2];
-} float64x1x2_t;
-
-typedef struct float64x2x2_t {
- float64x2_t val[2];
-} float64x2x2_t;
-
-#endif
typedef struct poly8x8x2_t {
poly8x8_t val[2];
} poly8x8x2_t;
@@ -196,96 +77,6 @@ typedef struct poly64x2x2_t {
poly64x2_t val[2];
} poly64x2x2_t;
-typedef struct int8x8x3_t {
- int8x8_t val[3];
-} int8x8x3_t;
-
-typedef struct int8x16x3_t {
- int8x16_t val[3];
-} int8x16x3_t;
-
-typedef struct int16x4x3_t {
- int16x4_t val[3];
-} int16x4x3_t;
-
-typedef struct int16x8x3_t {
- int16x8_t val[3];
-} int16x8x3_t;
-
-typedef struct int32x2x3_t {
- int32x2_t val[3];
-} int32x2x3_t;
-
-typedef struct int32x4x3_t {
- int32x4_t val[3];
-} int32x4x3_t;
-
-typedef struct int64x1x3_t {
- int64x1_t val[3];
-} int64x1x3_t;
-
-typedef struct int64x2x3_t {
- int64x2_t val[3];
-} int64x2x3_t;
-
-typedef struct uint8x8x3_t {
- uint8x8_t val[3];
-} uint8x8x3_t;
-
-typedef struct uint8x16x3_t {
- uint8x16_t val[3];
-} uint8x16x3_t;
-
-typedef struct uint16x4x3_t {
- uint16x4_t val[3];
-} uint16x4x3_t;
-
-typedef struct uint16x8x3_t {
- uint16x8_t val[3];
-} uint16x8x3_t;
-
-typedef struct uint32x2x3_t {
- uint32x2_t val[3];
-} uint32x2x3_t;
-
-typedef struct uint32x4x3_t {
- uint32x4_t val[3];
-} uint32x4x3_t;
-
-typedef struct uint64x1x3_t {
- uint64x1_t val[3];
-} uint64x1x3_t;
-
-typedef struct uint64x2x3_t {
- uint64x2_t val[3];
-} uint64x2x3_t;
-
-typedef struct float16x4x3_t {
- float16x4_t val[3];
-} float16x4x3_t;
-
-typedef struct float16x8x3_t {
- float16x8_t val[3];
-} float16x8x3_t;
-
-typedef struct float32x2x3_t {
- float32x2_t val[3];
-} float32x2x3_t;
-
-typedef struct float32x4x3_t {
- float32x4_t val[3];
-} float32x4x3_t;
-
-#ifdef __aarch64__
-typedef struct float64x1x3_t {
- float64x1_t val[3];
-} float64x1x3_t;
-
-typedef struct float64x2x3_t {
- float64x2_t val[3];
-} float64x2x3_t;
-
-#endif
typedef struct poly8x8x3_t {
poly8x8_t val[3];
} poly8x8x3_t;
@@ -310,96 +101,6 @@ typedef struct poly64x2x3_t {
poly64x2_t val[3];
} poly64x2x3_t;
-typedef struct int8x8x4_t {
- int8x8_t val[4];
-} int8x8x4_t;
-
-typedef struct int8x16x4_t {
- int8x16_t val[4];
-} int8x16x4_t;
-
-typedef struct int16x4x4_t {
- int16x4_t val[4];
-} int16x4x4_t;
-
-typedef struct int16x8x4_t {
- int16x8_t val[4];
-} int16x8x4_t;
-
-typedef struct int32x2x4_t {
- int32x2_t val[4];
-} int32x2x4_t;
-
-typedef struct int32x4x4_t {
- int32x4_t val[4];
-} int32x4x4_t;
-
-typedef struct int64x1x4_t {
- int64x1_t val[4];
-} int64x1x4_t;
-
-typedef struct int64x2x4_t {
- int64x2_t val[4];
-} int64x2x4_t;
-
-typedef struct uint8x8x4_t {
- uint8x8_t val[4];
-} uint8x8x4_t;
-
-typedef struct uint8x16x4_t {
- uint8x16_t val[4];
-} uint8x16x4_t;
-
-typedef struct uint16x4x4_t {
- uint16x4_t val[4];
-} uint16x4x4_t;
-
-typedef struct uint16x8x4_t {
- uint16x8_t val[4];
-} uint16x8x4_t;
-
-typedef struct uint32x2x4_t {
- uint32x2_t val[4];
-} uint32x2x4_t;
-
-typedef struct uint32x4x4_t {
- uint32x4_t val[4];
-} uint32x4x4_t;
-
-typedef struct uint64x1x4_t {
- uint64x1_t val[4];
-} uint64x1x4_t;
-
-typedef struct uint64x2x4_t {
- uint64x2_t val[4];
-} uint64x2x4_t;
-
-typedef struct float16x4x4_t {
- float16x4_t val[4];
-} float16x4x4_t;
-
-typedef struct float16x8x4_t {
- float16x8_t val[4];
-} float16x8x4_t;
-
-typedef struct float32x2x4_t {
- float32x2_t val[4];
-} float32x2x4_t;
-
-typedef struct float32x4x4_t {
- float32x4_t val[4];
-} float32x4x4_t;
-
-#ifdef __aarch64__
-typedef struct float64x1x4_t {
- float64x1_t val[4];
-} float64x1x4_t;
-
-typedef struct float64x2x4_t {
- float64x2_t val[4];
-} float64x2x4_t;
-
-#endif
typedef struct poly8x8x4_t {
poly8x8_t val[4];
} poly8x8x4_t;
@@ -424,33 +125,6 @@ typedef struct poly64x2x4_t {
poly64x2_t val[4];
} poly64x2x4_t;
-typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
-typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
-
-typedef struct bfloat16x4x2_t {
- bfloat16x4_t val[2];
-} bfloat16x4x2_t;
-
-typedef struct bfloat16x8x2_t {
- bfloat16x8_t val[2];
-} bfloat16x8x2_t;
-
-typedef struct bfloat16x4x3_t {
- bfloat16x4_t val[3];
-} bfloat16x4x3_t;
-
-typedef struct bfloat16x8x3_t {
- bfloat16x8_t val[3];
-} bfloat16x8x3_t;
-
-typedef struct bfloat16x4x4_t {
- bfloat16x4_t val[4];
-} bfloat16x4x4_t;
-
-typedef struct bfloat16x8x4_t {
- bfloat16x8_t val[4];
-} bfloat16x8x4_t;
-
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
#ifdef __LITTLE_ENDIAN__
@@ -66600,6 +66274,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0)
}
#endif
+#ifdef __LITTLE_ENDIAN__
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42);
+ return __ret;
+}
+#else
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32x_f64(float64x1_t __p0) {
+ float64x1_t __ret;
+ __ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10);
+ return __ret;
+}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) {
float32x4_t __ret;
@@ -66632,6 +66327,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0)
}
#endif
+#ifdef __LITTLE_ENDIAN__
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42);
+ return __ret;
+}
+#else
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32z_f64(float64x1_t __p0) {
+ float64x1_t __ret;
+ __ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10);
+ return __ret;
+}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) {
float32x4_t __ret;
@@ -66664,6 +66380,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0)
}
#endif
+#ifdef __LITTLE_ENDIAN__
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42);
+ return __ret;
+}
+#else
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64x_f64(float64x1_t __p0) {
+ float64x1_t __ret;
+ __ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10);
+ return __ret;
+}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) {
float32x4_t __ret;
@@ -66696,6 +66433,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0)
}
#endif
+#ifdef __LITTLE_ENDIAN__
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42);
+ return __ret;
+}
+#else
+__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
+ float64x2_t __ret;
+ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64z_f64(float64x1_t __p0) {
+ float64x1_t __ret;
+ __ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10);
+ return __ret;
+}
#endif
#if defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)
#ifdef __LITTLE_ENDIAN__
lib/include/arm_sme.h
@@ -0,0 +1,2412 @@
+/*===---- arm_sme.h - ARM SME intrinsics ------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __ARM_SME_H
+#define __ARM_SME_H
+
+#if !defined(__LITTLE_ENDIAN__)
+#error "Big endian is currently not supported for arm_sme.h"
+#endif
+#include <arm_sve.h>
+
+/* Function attributes */
+#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
+
+#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void __arm_za_disable(void) __arm_streaming_compatible;
+
+__ai bool __arm_has_sme(void) __arm_streaming_compatible {
+ uint64_t x0, x1;
+ __builtin_arm_get_sme_state(&x0, &x1);
+ return x0 & (1ULL << 63);
+}
+
+__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible {
+ uint64_t x0, x1;
+ __builtin_arm_get_sme_state(&x0, &x1);
+ return x0 & 1;
+}
+
+__ai __attribute__((target("sme"))) void svundef_za(void) __arm_streaming_compatible __arm_out("za") { }
+
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m)))
+void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m)))
+void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m)))
+void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m)))
+void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb)))
+uint64_t svcntsb(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd)))
+uint64_t svcntsd(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh)))
+uint64_t svcntsh(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw)))
+uint64_t svcntsw(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128)))
+void svld1_hor_vnum_za128(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16)))
+void svld1_hor_vnum_za16(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32)))
+void svld1_hor_vnum_za32(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64)))
+void svld1_hor_vnum_za64(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8)))
+void svld1_hor_vnum_za8(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128)))
+void svld1_hor_za128(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16)))
+void svld1_hor_za16(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32)))
+void svld1_hor_za32(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64)))
+void svld1_hor_za64(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8)))
+void svld1_hor_za8(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128)))
+void svld1_ver_vnum_za128(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16)))
+void svld1_ver_vnum_za16(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32)))
+void svld1_ver_vnum_za32(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64)))
+void svld1_ver_vnum_za64(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8)))
+void svld1_ver_vnum_za8(uint64_t, uint32_t, svbool_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128)))
+void svld1_ver_za128(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16)))
+void svld1_ver_za16(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32)))
+void svld1_ver_za32(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64)))
+void svld1_ver_za64(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8)))
+void svld1_ver_za8(uint64_t, uint32_t, svbool_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za)))
+void svldr_vnum_za(uint32_t, void const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_za)))
+void svldr_za(uint32_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m)))
+void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m)))
+void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m)))
+void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m)))
+void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m)))
+void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m)))
+void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m)))
+void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m)))
+void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m)))
+void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m)))
+void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m)))
+svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m)))
+svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m)))
+svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m)))
+svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m)))
+svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m)))
+svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m)))
+svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m)))
+svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m)))
+svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m)))
+svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m)))
+svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m)))
+svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m)))
+svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m)))
+svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m)))
+svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m)))
+svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m)))
+svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m)))
+svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m)))
+svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m)))
+svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m)))
+svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m)))
+svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m)))
+svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m)))
+svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m)))
+svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m)))
+svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m)))
+svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m)))
+svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m)))
+svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m)))
+svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m)))
+svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m)))
+svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m)))
+svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m)))
+svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m)))
+svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m)))
+svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m)))
+svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m)))
+svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m)))
+svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m)))
+svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m)))
+svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m)))
+svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m)))
+svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m)))
+svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m)))
+svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m)))
+svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m)))
+svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m)))
+svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128)))
+void svst1_hor_vnum_za128(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16)))
+void svst1_hor_vnum_za16(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32)))
+void svst1_hor_vnum_za32(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64)))
+void svst1_hor_vnum_za64(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8)))
+void svst1_hor_vnum_za8(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128)))
+void svst1_hor_za128(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16)))
+void svst1_hor_za16(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32)))
+void svst1_hor_za32(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64)))
+void svst1_hor_za64(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8)))
+void svst1_hor_za8(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128)))
+void svst1_ver_vnum_za128(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16)))
+void svst1_ver_vnum_za16(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32)))
+void svst1_ver_vnum_za32(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64)))
+void svst1_ver_vnum_za64(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8)))
+void svst1_ver_vnum_za8(uint64_t, uint32_t, svbool_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128)))
+void svst1_ver_za128(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16)))
+void svst1_ver_za16(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32)))
+void svst1_ver_za32(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64)))
+void svst1_ver_za64(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8)))
+void svst1_ver_za8(uint64_t, uint32_t, svbool_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za)))
+void svstr_vnum_za(uint32_t, void *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_za)))
+void svstr_za(uint32_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m)))
+void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m)))
+void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m)))
+void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m)))
+void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m)))
+void svwrite_hor_za128_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m)))
+void svwrite_hor_za128_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m)))
+void svwrite_hor_za128_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m)))
+void svwrite_hor_za128_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m)))
+void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m)))
+void svwrite_hor_za128_s8_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m)))
+void svwrite_hor_za128_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m)))
+void svwrite_hor_za128_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m)))
+void svwrite_hor_za128_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m)))
+void svwrite_hor_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m)))
+void svwrite_hor_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m)))
+void svwrite_hor_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m)))
+void svwrite_hor_za16_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m)))
+void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m)))
+void svwrite_hor_za16_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m)))
+void svwrite_hor_za16_s16_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m)))
+void svwrite_hor_za32_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m)))
+void svwrite_hor_za32_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m)))
+void svwrite_hor_za32_s32_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m)))
+void svwrite_hor_za64_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m)))
+void svwrite_hor_za64_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m)))
+void svwrite_hor_za64_s64_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m)))
+void svwrite_hor_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m)))
+void svwrite_hor_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m)))
+void svwrite_ver_za128_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m)))
+void svwrite_ver_za128_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m)))
+void svwrite_ver_za128_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m)))
+void svwrite_ver_za128_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m)))
+void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m)))
+void svwrite_ver_za128_s8_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m)))
+void svwrite_ver_za128_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m)))
+void svwrite_ver_za128_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m)))
+void svwrite_ver_za128_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m)))
+void svwrite_ver_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m)))
+void svwrite_ver_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m)))
+void svwrite_ver_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m)))
+void svwrite_ver_za16_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m)))
+void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m)))
+void svwrite_ver_za16_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m)))
+void svwrite_ver_za16_s16_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m)))
+void svwrite_ver_za32_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m)))
+void svwrite_ver_za32_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m)))
+void svwrite_ver_za32_s32_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m)))
+void svwrite_ver_za64_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m)))
+void svwrite_ver_za64_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m)))
+void svwrite_ver_za64_s64_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m)))
+void svwrite_ver_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m)))
+void svwrite_ver_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za)))
+void svzero_mask_za(uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za)))
+void svzero_za();
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m)))
+void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m)))
+void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m)))
+void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m)))
+void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m)))
+void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m)))
+void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m)))
+void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m)))
+void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m)))
+void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m)))
+void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m)))
+void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m)))
+void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m)))
+void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m)))
+void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m)))
+svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m)))
+svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m)))
+svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m)))
+svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m)))
+svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m)))
+svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m)))
+svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m)))
+svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m)))
+svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m)))
+svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m)))
+svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m)))
+svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m)))
+svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m)))
+svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m)))
+svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m)))
+svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m)))
+svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m)))
+svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m)))
+svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m)))
+svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m)))
+svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m)))
+svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m)))
+svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m)))
+svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m)))
+svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m)))
+svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m)))
+svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m)))
+svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m)))
+svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m)))
+svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m)))
+svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m)))
+svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m)))
+svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m)))
+svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m)))
+svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m)))
+svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m)))
+svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m)))
+svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m)))
+svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m)))
+svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m)))
+svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m)))
+svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m)))
+svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m)))
+svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m)))
+svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m)))
+svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m)))
+svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m)))
+svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m)))
+void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m)))
+void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m)))
+void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m)))
+void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m)))
+void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m)))
+void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m)))
+void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m)))
+void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m)))
+void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m)))
+void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m)))
+void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m)))
+void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m)))
+void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m)))
+void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m)))
+void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m)))
+void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m)))
+void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m)))
+void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m)))
+void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m)))
+void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m)))
+void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m)))
+void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m)))
+void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m)))
+void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m)))
+void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m)))
+void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m)))
+void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m)))
+void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m)))
+void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m)))
+void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m)))
+void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m)))
+void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m)))
+void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m)))
+void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m)))
+void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m)))
+void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m)))
+void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m)))
+void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m)))
+void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m)))
+void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m)))
+void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m)))
+void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m)))
+void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m)))
+void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m)))
+void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m)))
+void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m)))
+void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m)))
+void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m)))
+void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m)))
+void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m)))
+void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m)))
+void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m)))
+void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m)))
+void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m)))
+void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m)))
+void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m)))
+void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m)))
+void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2)))
+void svadd_write_single_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x2)))
+void svadd_write_single_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x4)))
+void svadd_write_single_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x4)))
+void svadd_write_single_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x2)))
+void svadd_write_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x2)))
+void svadd_write_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x4)))
+void svadd_write_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x4)))
+void svadd_write_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x2)))
+void svadd_za32_u32_vg1x2(uint32_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x2)))
+void svadd_za32_f32_vg1x2(uint32_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x2)))
+void svadd_za32_s32_vg1x2(uint32_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x4)))
+void svadd_za32_u32_vg1x4(uint32_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x4)))
+void svadd_za32_f32_vg1x4(uint32_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x4)))
+void svadd_za32_s32_vg1x4(uint32_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_u32_m)))
+void svbmopa_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_s32_m)))
+void svbmopa_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_u32_m)))
+void svbmops_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_s32_m)))
+void svbmops_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x2)))
+void svdot_single_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x2)))
+void svdot_single_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x2)))
+void svdot_single_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x2)))
+void svdot_single_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x2)))
+void svdot_single_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x2)))
+void svdot_single_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x4)))
+void svdot_single_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x4)))
+void svdot_single_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x4)))
+void svdot_single_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x4)))
+void svdot_single_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x4)))
+void svdot_single_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x4)))
+void svdot_single_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x2)))
+void svdot_lane_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x2)))
+void svdot_lane_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x2)))
+void svdot_lane_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x2)))
+void svdot_lane_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x2)))
+void svdot_lane_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x2)))
+void svdot_lane_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x4)))
+void svdot_lane_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x4)))
+void svdot_lane_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x4)))
+void svdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x4)))
+void svdot_lane_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x4)))
+void svdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x4)))
+void svdot_lane_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x2)))
+void svdot_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x2)))
+void svdot_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x2)))
+void svdot_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x2)))
+void svdot_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x2)))
+void svdot_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x2)))
+void svdot_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x4)))
+void svdot_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x4)))
+void svdot_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x4)))
+void svdot_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x4)))
+void svdot_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x4)))
+void svdot_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x4)))
+void svdot_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_zt)))
+void svldr_zt(uint64_t, void const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8)))
+svuint8_t svluti2_lane_zt_u8(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32)))
+svuint32_t svluti2_lane_zt_u32(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16)))
+svuint16_t svluti2_lane_zt_u16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16)))
+svbfloat16_t svluti2_lane_zt_bf16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8)))
+svint8_t svluti2_lane_zt_s8(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32)))
+svfloat32_t svluti2_lane_zt_f32(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16)))
+svfloat16_t svluti2_lane_zt_f16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32)))
+svint32_t svluti2_lane_zt_s32(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16)))
+svint16_t svluti2_lane_zt_s16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x2)))
+svuint8x2_t svluti2_lane_zt_u8_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32_x2)))
+svuint32x2_t svluti2_lane_zt_u32_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16_x2)))
+svuint16x2_t svluti2_lane_zt_u16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16_x2)))
+svbfloat16x2_t svluti2_lane_zt_bf16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8_x2)))
+svint8x2_t svluti2_lane_zt_s8_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32_x2)))
+svfloat32x2_t svluti2_lane_zt_f32_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_x2)))
+svfloat16x2_t svluti2_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x2)))
+svint32x2_t svluti2_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x2)))
+svint16x2_t svluti2_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x4)))
+svuint8x4_t svluti2_lane_zt_u8_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32_x4)))
+svuint32x4_t svluti2_lane_zt_u32_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16_x4)))
+svuint16x4_t svluti2_lane_zt_u16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16_x4)))
+svbfloat16x4_t svluti2_lane_zt_bf16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8_x4)))
+svint8x4_t svluti2_lane_zt_s8_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32_x4)))
+svfloat32x4_t svluti2_lane_zt_f32_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_x4)))
+svfloat16x4_t svluti2_lane_zt_f16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x4)))
+svint32x4_t svluti2_lane_zt_s32_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x4)))
+svint16x4_t svluti2_lane_zt_s16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8)))
+svuint8_t svluti4_lane_zt_u8(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32)))
+svuint32_t svluti4_lane_zt_u32(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16)))
+svuint16_t svluti4_lane_zt_u16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16)))
+svbfloat16_t svluti4_lane_zt_bf16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s8)))
+svint8_t svluti4_lane_zt_s8(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32)))
+svfloat32_t svluti4_lane_zt_f32(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16)))
+svfloat16_t svluti4_lane_zt_f16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32)))
+svint32_t svluti4_lane_zt_s32(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16)))
+svint16_t svluti4_lane_zt_s16(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8_x2)))
+svuint8x2_t svluti4_lane_zt_u8_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32_x2)))
+svuint32x2_t svluti4_lane_zt_u32_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16_x2)))
+svuint16x2_t svluti4_lane_zt_u16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16_x2)))
+svbfloat16x2_t svluti4_lane_zt_bf16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s8_x2)))
+svint8x2_t svluti4_lane_zt_s8_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32_x2)))
+svfloat32x2_t svluti4_lane_zt_f32_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16_x2)))
+svfloat16x2_t svluti4_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32_x2)))
+svint32x2_t svluti4_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16_x2)))
+svint16x2_t svluti4_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32_x4)))
+svuint32x4_t svluti4_lane_zt_u32_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16_x4)))
+svuint16x4_t svluti4_lane_zt_u16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16_x4)))
+svbfloat16x4_t svluti4_lane_zt_bf16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32_x4)))
+svfloat32x4_t svluti4_lane_zt_f32_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16_x4)))
+svfloat16x4_t svluti4_lane_zt_f16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32_x4)))
+svint32x4_t svluti4_lane_zt_s32_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16_x4)))
+svint16x4_t svluti4_lane_zt_s16_x4(uint64_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x2)))
+void svmla_single_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x4)))
+void svmla_single_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x2)))
+void svmla_single_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x2)))
+void svmla_single_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x2)))
+void svmla_single_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x2)))
+void svmla_single_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x4)))
+void svmla_single_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x4)))
+void svmla_single_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x4)))
+void svmla_single_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x4)))
+void svmla_single_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x2)))
+void svmla_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x2)))
+void svmla_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x4)))
+void svmla_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x4)))
+void svmla_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x2)))
+void svmla_lane_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x4)))
+void svmla_lane_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x1)))
+void svmla_lane_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x1)))
+void svmla_lane_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x1)))
+void svmla_lane_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x1)))
+void svmla_lane_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x2)))
+void svmla_lane_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x2)))
+void svmla_lane_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x2)))
+void svmla_lane_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x2)))
+void svmla_lane_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x4)))
+void svmla_lane_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x4)))
+void svmla_lane_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x4)))
+void svmla_lane_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x4)))
+void svmla_lane_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x1)))
+void svmla_lane_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x1)))
+void svmla_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x2)))
+void svmla_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x2)))
+void svmla_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x4)))
+void svmla_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x4)))
+void svmla_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x2)))
+void svmla_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x4)))
+void svmla_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x1)))
+void svmla_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x1)))
+void svmla_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x1)))
+void svmla_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x1)))
+void svmla_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x2)))
+void svmla_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x2)))
+void svmla_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x2)))
+void svmla_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x2)))
+void svmla_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x4)))
+void svmla_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x4)))
+void svmla_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x4)))
+void svmla_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x4)))
+void svmla_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x1)))
+void svmla_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x1)))
+void svmla_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x2)))
+void svmla_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x2)))
+void svmla_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x4)))
+void svmla_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x4)))
+void svmla_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x2)))
+void svmls_single_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x4)))
+void svmls_single_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x2)))
+void svmls_single_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x2)))
+void svmls_single_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x2)))
+void svmls_single_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x2)))
+void svmls_single_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x4)))
+void svmls_single_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x4)))
+void svmls_single_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x4)))
+void svmls_single_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x4)))
+void svmls_single_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x2)))
+void svmls_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x2)))
+void svmls_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x4)))
+void svmls_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x4)))
+void svmls_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x2)))
+void svmls_lane_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x4)))
+void svmls_lane_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x1)))
+void svmls_lane_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x1)))
+void svmls_lane_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x1)))
+void svmls_lane_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x1)))
+void svmls_lane_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x2)))
+void svmls_lane_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x2)))
+void svmls_lane_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x2)))
+void svmls_lane_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x2)))
+void svmls_lane_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x4)))
+void svmls_lane_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x4)))
+void svmls_lane_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x4)))
+void svmls_lane_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x4)))
+void svmls_lane_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x1)))
+void svmls_lane_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x1)))
+void svmls_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x2)))
+void svmls_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x2)))
+void svmls_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x4)))
+void svmls_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x4)))
+void svmls_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x2)))
+void svmls_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x4)))
+void svmls_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x1)))
+void svmls_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x1)))
+void svmls_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x1)))
+void svmls_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x1)))
+void svmls_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x2)))
+void svmls_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x2)))
+void svmls_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x2)))
+void svmls_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x2)))
+void svmls_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x4)))
+void svmls_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x4)))
+void svmls_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x4)))
+void svmls_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x4)))
+void svmls_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x1)))
+void svmls_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x1)))
+void svmls_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x2)))
+void svmls_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x2)))
+void svmls_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x4)))
+void svmls_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x4)))
+void svmls_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s16_m)))
+void svmopa_za32_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u16_m)))
+void svmopa_za32_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s16_m)))
+void svmops_za32_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u16_m)))
+void svmops_za32_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_vg2)))
+svuint16x2_t svread_hor_za16_u16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_vg2)))
+svbfloat16x2_t svread_hor_za16_bf16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_vg2)))
+svfloat16x2_t svread_hor_za16_f16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_vg2)))
+svint16x2_t svread_hor_za16_s16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_vg4)))
+svuint16x4_t svread_hor_za16_u16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_vg4)))
+svbfloat16x4_t svread_hor_za16_bf16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_vg4)))
+svfloat16x4_t svread_hor_za16_f16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_vg4)))
+svint16x4_t svread_hor_za16_s16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_vg2)))
+svuint32x2_t svread_hor_za32_u32_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_vg2)))
+svfloat32x2_t svread_hor_za32_f32_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_vg2)))
+svint32x2_t svread_hor_za32_s32_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_vg4)))
+svuint32x4_t svread_hor_za32_u32_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_vg4)))
+svfloat32x4_t svread_hor_za32_f32_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_vg4)))
+svint32x4_t svread_hor_za32_s32_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_vg2)))
+svuint64x2_t svread_hor_za64_u64_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_vg2)))
+svfloat64x2_t svread_hor_za64_f64_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_vg2)))
+svint64x2_t svread_hor_za64_s64_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_vg4)))
+svuint64x4_t svread_hor_za64_u64_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_vg4)))
+svfloat64x4_t svread_hor_za64_f64_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_vg4)))
+svint64x4_t svread_hor_za64_s64_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg2)))
+svuint8x2_t svread_hor_za8_u8_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg2)))
+svint8x2_t svread_hor_za8_s8_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg4)))
+svuint8x4_t svread_hor_za8_u8_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg4)))
+svint8x4_t svread_hor_za8_s8_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_vg2)))
+svuint16x2_t svread_ver_za16_u16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_vg2)))
+svbfloat16x2_t svread_ver_za16_bf16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_vg2)))
+svfloat16x2_t svread_ver_za16_f16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_vg2)))
+svint16x2_t svread_ver_za16_s16_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_vg4)))
+svuint16x4_t svread_ver_za16_u16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_vg4)))
+svbfloat16x4_t svread_ver_za16_bf16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_vg4)))
+svfloat16x4_t svread_ver_za16_f16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_vg4)))
+svint16x4_t svread_ver_za16_s16_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_vg2)))
+svuint32x2_t svread_ver_za32_u32_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_vg2)))
+svfloat32x2_t svread_ver_za32_f32_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_vg2)))
+svint32x2_t svread_ver_za32_s32_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_vg4)))
+svuint32x4_t svread_ver_za32_u32_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_vg4)))
+svfloat32x4_t svread_ver_za32_f32_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_vg4)))
+svint32x4_t svread_ver_za32_s32_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_vg2)))
+svuint64x2_t svread_ver_za64_u64_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_vg2)))
+svfloat64x2_t svread_ver_za64_f64_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_vg2)))
+svint64x2_t svread_ver_za64_s64_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_vg4)))
+svuint64x4_t svread_ver_za64_u64_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_vg4)))
+svfloat64x4_t svread_ver_za64_f64_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_vg4)))
+svint64x4_t svread_ver_za64_s64_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg2)))
+svuint8x2_t svread_ver_za8_u8_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg2)))
+svint8x2_t svread_ver_za8_s8_vg2(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg4)))
+svuint8x4_t svread_ver_za8_u8_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg4)))
+svint8x4_t svread_ver_za8_s8_vg4(uint64_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_u16_vg1x2)))
+svuint16x2_t svread_za16_u16_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_bf16_vg1x2)))
+svbfloat16x2_t svread_za16_bf16_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_f16_vg1x2)))
+svfloat16x2_t svread_za16_f16_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_s16_vg1x2)))
+svint16x2_t svread_za16_s16_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_u16_vg1x4)))
+svuint16x4_t svread_za16_u16_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_bf16_vg1x4)))
+svbfloat16x4_t svread_za16_bf16_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_f16_vg1x4)))
+svfloat16x4_t svread_za16_f16_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_s16_vg1x4)))
+svint16x4_t svread_za16_s16_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_u32_vg1x2)))
+svuint32x2_t svread_za32_u32_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_f32_vg1x2)))
+svfloat32x2_t svread_za32_f32_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_s32_vg1x2)))
+svint32x2_t svread_za32_s32_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_u32_vg1x4)))
+svuint32x4_t svread_za32_u32_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_f32_vg1x4)))
+svfloat32x4_t svread_za32_f32_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_s32_vg1x4)))
+svint32x4_t svread_za32_s32_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_u64_vg1x2)))
+svuint64x2_t svread_za64_u64_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_f64_vg1x2)))
+svfloat64x2_t svread_za64_f64_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_s64_vg1x2)))
+svint64x2_t svread_za64_s64_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_u64_vg1x4)))
+svuint64x4_t svread_za64_u64_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_f64_vg1x4)))
+svfloat64x4_t svread_za64_f64_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_s64_vg1x4)))
+svint64x4_t svread_za64_s64_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x2)))
+svuint8x2_t svread_za8_u8_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x2)))
+svint8x2_t svread_za8_s8_vg1x2(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x4)))
+svuint8x4_t svread_za8_u8_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x4)))
+svint8x4_t svread_za8_s8_vg1x4(uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_zt)))
+void svstr_zt(uint64_t, void *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x2)))
+void svsub_write_single_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x2)))
+void svsub_write_single_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x4)))
+void svsub_write_single_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x4)))
+void svsub_write_single_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x2)))
+void svsub_write_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x2)))
+void svsub_write_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x4)))
+void svsub_write_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x4)))
+void svsub_write_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x2)))
+void svsub_za32_u32_vg1x2(uint32_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x2)))
+void svsub_za32_f32_vg1x2(uint32_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x2)))
+void svsub_za32_s32_vg1x2(uint32_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x4)))
+void svsub_za32_u32_vg1x4(uint32_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x4)))
+void svsub_za32_f32_vg1x4(uint32_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x4)))
+void svsub_za32_s32_vg1x4(uint32_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x2)))
+void svsudot_single_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x4)))
+void svsudot_single_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x2)))
+void svsudot_lane_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x4)))
+void svsudot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x2)))
+void svsudot_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x4)))
+void svsudot_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x2)))
+void svsumla_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x4)))
+void svsumla_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x1)))
+void svsumla_lane_za32_s8_vg4x1(uint32_t, svint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x2)))
+void svsumla_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x4)))
+void svsumla_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x1)))
+void svsumla_za32_s8_vg4x1(uint32_t, svint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x2)))
+void svsumla_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x4)))
+void svsumla_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsuvdot_lane_za32_s8_vg1x4)))
+void svsuvdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x2)))
+void svusdot_single_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x4)))
+void svusdot_single_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x2)))
+void svusdot_lane_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x4)))
+void svusdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x2)))
+void svusdot_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x4)))
+void svusdot_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x2)))
+void svusmla_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x4)))
+void svusmla_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x1)))
+void svusmla_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x2)))
+void svusmla_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x4)))
+void svusmla_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x1)))
+void svusmla_za32_u8_vg4x1(uint32_t, svuint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x2)))
+void svusmla_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x4)))
+void svusmla_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusvdot_lane_za32_u8_vg1x4)))
+void svusvdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_bf16_vg1x2)))
+void svvdot_lane_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_f16_vg1x2)))
+void svvdot_lane_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s16_vg1x2)))
+void svvdot_lane_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u16_vg1x2)))
+void svvdot_lane_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s8_vg1x4)))
+void svvdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u8_vg1x4)))
+void svvdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg2)))
+void svwrite_hor_za16_u16_vg2(uint64_t, uint32_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg2)))
+void svwrite_hor_za16_bf16_vg2(uint64_t, uint32_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg2)))
+void svwrite_hor_za16_f16_vg2(uint64_t, uint32_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg2)))
+void svwrite_hor_za16_s16_vg2(uint64_t, uint32_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg4)))
+void svwrite_hor_za16_u16_vg4(uint64_t, uint32_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg4)))
+void svwrite_hor_za16_bf16_vg4(uint64_t, uint32_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg4)))
+void svwrite_hor_za16_f16_vg4(uint64_t, uint32_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg4)))
+void svwrite_hor_za16_s16_vg4(uint64_t, uint32_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg2)))
+void svwrite_hor_za32_u32_vg2(uint64_t, uint32_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg2)))
+void svwrite_hor_za32_f32_vg2(uint64_t, uint32_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg2)))
+void svwrite_hor_za32_s32_vg2(uint64_t, uint32_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg4)))
+void svwrite_hor_za32_u32_vg4(uint64_t, uint32_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg4)))
+void svwrite_hor_za32_f32_vg4(uint64_t, uint32_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg4)))
+void svwrite_hor_za32_s32_vg4(uint64_t, uint32_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg2)))
+void svwrite_hor_za64_u64_vg2(uint64_t, uint32_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg2)))
+void svwrite_hor_za64_f64_vg2(uint64_t, uint32_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg2)))
+void svwrite_hor_za64_s64_vg2(uint64_t, uint32_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg4)))
+void svwrite_hor_za64_u64_vg4(uint64_t, uint32_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg4)))
+void svwrite_hor_za64_f64_vg4(uint64_t, uint32_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg4)))
+void svwrite_hor_za64_s64_vg4(uint64_t, uint32_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg2)))
+void svwrite_hor_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2)))
+void svwrite_hor_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4)))
+void svwrite_hor_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4)))
+void svwrite_hor_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2)))
+void svwrite_ver_za16_u16_vg2(uint64_t, uint32_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2)))
+void svwrite_ver_za16_bf16_vg2(uint64_t, uint32_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg2)))
+void svwrite_ver_za16_f16_vg2(uint64_t, uint32_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg2)))
+void svwrite_ver_za16_s16_vg2(uint64_t, uint32_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg4)))
+void svwrite_ver_za16_u16_vg4(uint64_t, uint32_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg4)))
+void svwrite_ver_za16_bf16_vg4(uint64_t, uint32_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg4)))
+void svwrite_ver_za16_f16_vg4(uint64_t, uint32_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg4)))
+void svwrite_ver_za16_s16_vg4(uint64_t, uint32_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg2)))
+void svwrite_ver_za32_u32_vg2(uint64_t, uint32_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg2)))
+void svwrite_ver_za32_f32_vg2(uint64_t, uint32_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg2)))
+void svwrite_ver_za32_s32_vg2(uint64_t, uint32_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg4)))
+void svwrite_ver_za32_u32_vg4(uint64_t, uint32_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg4)))
+void svwrite_ver_za32_f32_vg4(uint64_t, uint32_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg4)))
+void svwrite_ver_za32_s32_vg4(uint64_t, uint32_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg2)))
+void svwrite_ver_za64_u64_vg2(uint64_t, uint32_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg2)))
+void svwrite_ver_za64_f64_vg2(uint64_t, uint32_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg2)))
+void svwrite_ver_za64_s64_vg2(uint64_t, uint32_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg4)))
+void svwrite_ver_za64_u64_vg4(uint64_t, uint32_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg4)))
+void svwrite_ver_za64_f64_vg4(uint64_t, uint32_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg4)))
+void svwrite_ver_za64_s64_vg4(uint64_t, uint32_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg2)))
+void svwrite_ver_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2)))
+void svwrite_ver_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4)))
+void svwrite_ver_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4)))
+void svwrite_ver_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2)))
+void svwrite_za16_u16_vg1x2(uint32_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2)))
+void svwrite_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x2)))
+void svwrite_za16_f16_vg1x2(uint32_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x2)))
+void svwrite_za16_s16_vg1x2(uint32_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x4)))
+void svwrite_za16_u16_vg1x4(uint32_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x4)))
+void svwrite_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x4)))
+void svwrite_za16_f16_vg1x4(uint32_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x4)))
+void svwrite_za16_s16_vg1x4(uint32_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x2)))
+void svwrite_za32_u32_vg1x2(uint32_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x2)))
+void svwrite_za32_f32_vg1x2(uint32_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x2)))
+void svwrite_za32_s32_vg1x2(uint32_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x4)))
+void svwrite_za32_u32_vg1x4(uint32_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x4)))
+void svwrite_za32_f32_vg1x4(uint32_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x4)))
+void svwrite_za32_s32_vg1x4(uint32_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x2)))
+void svwrite_za64_u64_vg1x2(uint32_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x2)))
+void svwrite_za64_f64_vg1x2(uint32_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x2)))
+void svwrite_za64_s64_vg1x2(uint32_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x4)))
+void svwrite_za64_u64_vg1x4(uint32_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x4)))
+void svwrite_za64_f64_vg1x4(uint32_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x4)))
+void svwrite_za64_s64_vg1x4(uint32_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x2)))
+void svwrite_za8_u8_vg1x2(uint32_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2)))
+void svwrite_za8_s8_vg1x2(uint32_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4)))
+void svwrite_za8_u8_vg1x4(uint32_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4)))
+void svwrite_za8_s8_vg1x4(uint32_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_zt)))
+void svzero_zt(uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2)))
+void svadd_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x2)))
+void svadd_write_za32_vg1x2(uint32_t, svint32x2_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x4)))
+void svadd_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x4)))
+void svadd_write_za32_vg1x4(uint32_t, svint32x4_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x2)))
+void svadd_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x2)))
+void svadd_write_za32_vg1x2(uint32_t, svint32x2_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x4)))
+void svadd_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x4)))
+void svadd_write_za32_vg1x4(uint32_t, svint32x4_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x2)))
+void svadd_za32_vg1x2(uint32_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x2)))
+void svadd_za32_vg1x2(uint32_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x2)))
+void svadd_za32_vg1x2(uint32_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x4)))
+void svadd_za32_vg1x4(uint32_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x4)))
+void svadd_za32_vg1x4(uint32_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x4)))
+void svadd_za32_vg1x4(uint32_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_u32_m)))
+void svbmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_s32_m)))
+void svbmopa_za32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_u32_m)))
+void svbmops_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_s32_m)))
+void svbmops_za32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svuint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x2)))
+void svdot_lane_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x2)))
+void svdot_lane_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x2)))
+void svdot_lane_za32_vg1x2(uint32_t, svint8x2_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x2)))
+void svdot_lane_za32_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x2)))
+void svdot_lane_za32_vg1x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x2)))
+void svdot_lane_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x4)))
+void svdot_lane_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x4)))
+void svdot_lane_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x4)))
+void svdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x4)))
+void svdot_lane_za32_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x4)))
+void svdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x4)))
+void svdot_lane_za32_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svuint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x2)))
+void svdot_za32_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svuint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x4)))
+void svdot_za32_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x2)))
+void svmla_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x4)))
+void svmla_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x2)))
+void svmla_za32_vg4x2(uint32_t, svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x2)))
+void svmla_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x4)))
+void svmla_za32_vg4x4(uint32_t, svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x4)))
+void svmla_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x2)))
+void svmla_lane_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x4)))
+void svmla_lane_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x1)))
+void svmla_lane_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x1)))
+void svmla_lane_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x1)))
+void svmla_lane_za32_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x1)))
+void svmla_lane_za32_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x2)))
+void svmla_lane_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x2)))
+void svmla_lane_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x2)))
+void svmla_lane_za32_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x2)))
+void svmla_lane_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x4)))
+void svmla_lane_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x4)))
+void svmla_lane_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x4)))
+void svmla_lane_za32_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x4)))
+void svmla_lane_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x1)))
+void svmla_lane_za32_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x1)))
+void svmla_lane_za32_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x2)))
+void svmla_lane_za32_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x2)))
+void svmla_lane_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x4)))
+void svmla_lane_za32_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x4)))
+void svmla_lane_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x2)))
+void svmla_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x4)))
+void svmla_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x1)))
+void svmla_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x1)))
+void svmla_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x1)))
+void svmla_za32_vg2x1(uint32_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x1)))
+void svmla_za32_vg2x1(uint32_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x2)))
+void svmla_za32_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x4)))
+void svmla_za32_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x1)))
+void svmla_za32_vg4x1(uint32_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x1)))
+void svmla_za32_vg4x1(uint32_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x2)))
+void svmla_za32_vg4x2(uint32_t, svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x2)))
+void svmla_za32_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x4)))
+void svmla_za32_vg4x4(uint32_t, svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x4)))
+void svmla_za32_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x2)))
+void svmls_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x4)))
+void svmls_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x2)))
+void svmls_za32_vg4x2(uint32_t, svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x2)))
+void svmls_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x4)))
+void svmls_za32_vg4x4(uint32_t, svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x4)))
+void svmls_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x2)))
+void svmls_lane_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x4)))
+void svmls_lane_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x1)))
+void svmls_lane_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x1)))
+void svmls_lane_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x1)))
+void svmls_lane_za32_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x1)))
+void svmls_lane_za32_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x2)))
+void svmls_lane_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x2)))
+void svmls_lane_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x2)))
+void svmls_lane_za32_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x2)))
+void svmls_lane_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x4)))
+void svmls_lane_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x4)))
+void svmls_lane_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x4)))
+void svmls_lane_za32_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x4)))
+void svmls_lane_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x1)))
+void svmls_lane_za32_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x1)))
+void svmls_lane_za32_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x2)))
+void svmls_lane_za32_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x2)))
+void svmls_lane_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x4)))
+void svmls_lane_za32_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x4)))
+void svmls_lane_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x2)))
+void svmls_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x4)))
+void svmls_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x1)))
+void svmls_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x1)))
+void svmls_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x1)))
+void svmls_za32_vg2x1(uint32_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x1)))
+void svmls_za32_vg2x1(uint32_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x2)))
+void svmls_za32_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x4)))
+void svmls_za32_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x1)))
+void svmls_za32_vg4x1(uint32_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x1)))
+void svmls_za32_vg4x1(uint32_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x2)))
+void svmls_za32_vg4x2(uint32_t, svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x2)))
+void svmls_za32_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x4)))
+void svmls_za32_vg4x4(uint32_t, svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x4)))
+void svmls_za32_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s16_m)))
+void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u16_m)))
+void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s16_m)))
+void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u16_m)))
+void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x2)))
+void svsub_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x2)))
+void svsub_write_za32_vg1x2(uint32_t, svint32x2_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x4)))
+void svsub_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x4)))
+void svsub_write_za32_vg1x4(uint32_t, svint32x4_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x2)))
+void svsub_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x2)))
+void svsub_write_za32_vg1x2(uint32_t, svint32x2_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x4)))
+void svsub_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x4)))
+void svsub_write_za32_vg1x4(uint32_t, svint32x4_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x2)))
+void svsub_za32_vg1x2(uint32_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x2)))
+void svsub_za32_vg1x2(uint32_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x2)))
+void svsub_za32_vg1x2(uint32_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x4)))
+void svsub_za32_vg1x4(uint32_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x4)))
+void svsub_za32_vg1x4(uint32_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x4)))
+void svsub_za32_vg1x4(uint32_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x2)))
+void svsudot_za32_vg1x2(uint32_t, svint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x4)))
+void svsudot_za32_vg1x4(uint32_t, svint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x2)))
+void svsudot_lane_za32_vg1x2(uint32_t, svint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x4)))
+void svsudot_lane_za32_vg1x4(uint32_t, svint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x2)))
+void svsudot_za32_vg1x2(uint32_t, svint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x4)))
+void svsudot_za32_vg1x4(uint32_t, svint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x2)))
+void svsumla_za32_vg4x2(uint32_t, svint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x4)))
+void svsumla_za32_vg4x4(uint32_t, svint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x1)))
+void svsumla_lane_za32_vg4x1(uint32_t, svint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x2)))
+void svsumla_lane_za32_vg4x2(uint32_t, svint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x4)))
+void svsumla_lane_za32_vg4x4(uint32_t, svint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x1)))
+void svsumla_za32_vg4x1(uint32_t, svint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x2)))
+void svsumla_za32_vg4x2(uint32_t, svint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x4)))
+void svsumla_za32_vg4x4(uint32_t, svint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsuvdot_lane_za32_s8_vg1x4)))
+void svsuvdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x2)))
+void svusdot_za32_vg1x2(uint32_t, svuint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x4)))
+void svusdot_za32_vg1x4(uint32_t, svuint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x2)))
+void svusdot_lane_za32_vg1x2(uint32_t, svuint8x2_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x4)))
+void svusdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x2)))
+void svusdot_za32_vg1x2(uint32_t, svuint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x4)))
+void svusdot_za32_vg1x4(uint32_t, svuint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x2)))
+void svusmla_za32_vg4x2(uint32_t, svuint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x4)))
+void svusmla_za32_vg4x4(uint32_t, svuint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x1)))
+void svusmla_lane_za32_vg4x1(uint32_t, svuint8_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x2)))
+void svusmla_lane_za32_vg4x2(uint32_t, svuint8x2_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x4)))
+void svusmla_lane_za32_vg4x4(uint32_t, svuint8x4_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x1)))
+void svusmla_za32_vg4x1(uint32_t, svuint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x2)))
+void svusmla_za32_vg4x2(uint32_t, svuint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x4)))
+void svusmla_za32_vg4x4(uint32_t, svuint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusvdot_lane_za32_u8_vg1x4)))
+void svusvdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_bf16_vg1x2)))
+void svvdot_lane_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_f16_vg1x2)))
+void svvdot_lane_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s16_vg1x2)))
+void svvdot_lane_za32_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u16_vg1x2)))
+void svvdot_lane_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s8_vg1x4)))
+void svvdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u8_vg1x4)))
+void svvdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg2)))
+void svwrite_hor_za16_vg2(uint64_t, uint32_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg2)))
+void svwrite_hor_za16_vg2(uint64_t, uint32_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg2)))
+void svwrite_hor_za16_vg2(uint64_t, uint32_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg2)))
+void svwrite_hor_za16_vg2(uint64_t, uint32_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg4)))
+void svwrite_hor_za16_vg4(uint64_t, uint32_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg4)))
+void svwrite_hor_za16_vg4(uint64_t, uint32_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg4)))
+void svwrite_hor_za16_vg4(uint64_t, uint32_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg4)))
+void svwrite_hor_za16_vg4(uint64_t, uint32_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg2)))
+void svwrite_hor_za32_vg2(uint64_t, uint32_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg2)))
+void svwrite_hor_za32_vg2(uint64_t, uint32_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg2)))
+void svwrite_hor_za32_vg2(uint64_t, uint32_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg4)))
+void svwrite_hor_za32_vg4(uint64_t, uint32_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg4)))
+void svwrite_hor_za32_vg4(uint64_t, uint32_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg4)))
+void svwrite_hor_za32_vg4(uint64_t, uint32_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg2)))
+void svwrite_hor_za64_vg2(uint64_t, uint32_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg2)))
+void svwrite_hor_za64_vg2(uint64_t, uint32_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg2)))
+void svwrite_hor_za64_vg2(uint64_t, uint32_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg4)))
+void svwrite_hor_za64_vg4(uint64_t, uint32_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg4)))
+void svwrite_hor_za64_vg4(uint64_t, uint32_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg4)))
+void svwrite_hor_za64_vg4(uint64_t, uint32_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg2)))
+void svwrite_hor_za8_vg2(uint64_t, uint32_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2)))
+void svwrite_hor_za8_vg2(uint64_t, uint32_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4)))
+void svwrite_hor_za8_vg4(uint64_t, uint32_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4)))
+void svwrite_hor_za8_vg4(uint64_t, uint32_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2)))
+void svwrite_ver_za16_vg2(uint64_t, uint32_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2)))
+void svwrite_ver_za16_vg2(uint64_t, uint32_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg2)))
+void svwrite_ver_za16_vg2(uint64_t, uint32_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg2)))
+void svwrite_ver_za16_vg2(uint64_t, uint32_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg4)))
+void svwrite_ver_za16_vg4(uint64_t, uint32_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg4)))
+void svwrite_ver_za16_vg4(uint64_t, uint32_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg4)))
+void svwrite_ver_za16_vg4(uint64_t, uint32_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg4)))
+void svwrite_ver_za16_vg4(uint64_t, uint32_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg2)))
+void svwrite_ver_za32_vg2(uint64_t, uint32_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg2)))
+void svwrite_ver_za32_vg2(uint64_t, uint32_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg2)))
+void svwrite_ver_za32_vg2(uint64_t, uint32_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg4)))
+void svwrite_ver_za32_vg4(uint64_t, uint32_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg4)))
+void svwrite_ver_za32_vg4(uint64_t, uint32_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg4)))
+void svwrite_ver_za32_vg4(uint64_t, uint32_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg2)))
+void svwrite_ver_za64_vg2(uint64_t, uint32_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg2)))
+void svwrite_ver_za64_vg2(uint64_t, uint32_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg2)))
+void svwrite_ver_za64_vg2(uint64_t, uint32_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg4)))
+void svwrite_ver_za64_vg4(uint64_t, uint32_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg4)))
+void svwrite_ver_za64_vg4(uint64_t, uint32_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg4)))
+void svwrite_ver_za64_vg4(uint64_t, uint32_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg2)))
+void svwrite_ver_za8_vg2(uint64_t, uint32_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2)))
+void svwrite_ver_za8_vg2(uint64_t, uint32_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4)))
+void svwrite_ver_za8_vg4(uint64_t, uint32_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4)))
+void svwrite_ver_za8_vg4(uint64_t, uint32_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2)))
+void svwrite_za16_vg1x2(uint32_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2)))
+void svwrite_za16_vg1x2(uint32_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x2)))
+void svwrite_za16_vg1x2(uint32_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x2)))
+void svwrite_za16_vg1x2(uint32_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x4)))
+void svwrite_za16_vg1x4(uint32_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x4)))
+void svwrite_za16_vg1x4(uint32_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x4)))
+void svwrite_za16_vg1x4(uint32_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x4)))
+void svwrite_za16_vg1x4(uint32_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x2)))
+void svwrite_za32_vg1x2(uint32_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x2)))
+void svwrite_za32_vg1x2(uint32_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x2)))
+void svwrite_za32_vg1x2(uint32_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x4)))
+void svwrite_za32_vg1x4(uint32_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x4)))
+void svwrite_za32_vg1x4(uint32_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x4)))
+void svwrite_za32_vg1x4(uint32_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x2)))
+void svwrite_za64_vg1x2(uint32_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x2)))
+void svwrite_za64_vg1x2(uint32_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x2)))
+void svwrite_za64_vg1x2(uint32_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x4)))
+void svwrite_za64_vg1x4(uint32_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x4)))
+void svwrite_za64_vg1x4(uint32_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x4)))
+void svwrite_za64_vg1x4(uint32_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x2)))
+void svwrite_za8_vg1x2(uint32_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2)))
+void svwrite_za8_vg1x2(uint32_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4)))
+void svwrite_za8_vg1x4(uint32_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4)))
+void svwrite_za8_vg1x4(uint32_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2)))
+void svadd_za64_f64_vg1x2(uint32_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4)))
+void svadd_za64_f64_vg1x4(uint32_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x2)))
+void svmla_single_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x4)))
+void svmla_single_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x2)))
+void svmla_lane_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x4)))
+void svmla_lane_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x2)))
+void svmla_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x4)))
+void svmla_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x2)))
+void svmls_single_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x4)))
+void svmls_single_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x2)))
+void svmls_lane_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x4)))
+void svmls_lane_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x2)))
+void svmls_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x4)))
+void svmls_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x2)))
+void svsub_za64_f64_vg1x2(uint32_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x4)))
+void svsub_za64_f64_vg1x4(uint32_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2)))
+void svadd_za64_vg1x2(uint32_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4)))
+void svadd_za64_vg1x4(uint32_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x2)))
+void svmla_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x4)))
+void svmla_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x2)))
+void svmla_lane_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x4)))
+void svmla_lane_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x2)))
+void svmla_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x4)))
+void svmla_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x2)))
+void svmls_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x4)))
+void svmls_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x2)))
+void svmls_lane_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x4)))
+void svmls_lane_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x2)))
+void svmls_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x4)))
+void svmls_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x2)))
+void svsub_za64_vg1x2(uint32_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x4)))
+void svsub_za64_vg1x4(uint32_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x2)))
+void svadd_write_single_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x2)))
+void svadd_write_single_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x4)))
+void svadd_write_single_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x4)))
+void svadd_write_single_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x2)))
+void svadd_write_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x2)))
+void svadd_write_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x4)))
+void svadd_write_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x4)))
+void svadd_write_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x2)))
+void svadd_za64_u64_vg1x2(uint32_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x2)))
+void svadd_za64_s64_vg1x2(uint32_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x4)))
+void svadd_za64_u64_vg1x4(uint32_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x4)))
+void svadd_za64_s64_vg1x4(uint32_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x2)))
+void svdot_single_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x2)))
+void svdot_single_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x4)))
+void svdot_single_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x4)))
+void svdot_single_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x2)))
+void svdot_lane_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x2)))
+void svdot_lane_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x4)))
+void svdot_lane_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x4)))
+void svdot_lane_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x2)))
+void svdot_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x2)))
+void svdot_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x4)))
+void svdot_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x4)))
+void svdot_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x2)))
+void svmla_single_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x2)))
+void svmla_single_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x4)))
+void svmla_single_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x4)))
+void svmla_single_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x1)))
+void svmla_lane_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x1)))
+void svmla_lane_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x2)))
+void svmla_lane_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x2)))
+void svmla_lane_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x4)))
+void svmla_lane_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x4)))
+void svmla_lane_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x1)))
+void svmla_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x1)))
+void svmla_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x2)))
+void svmla_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x2)))
+void svmla_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x4)))
+void svmla_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x4)))
+void svmla_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x2)))
+void svmls_single_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x2)))
+void svmls_single_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x4)))
+void svmls_single_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x4)))
+void svmls_single_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x1)))
+void svmls_lane_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x1)))
+void svmls_lane_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x2)))
+void svmls_lane_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x2)))
+void svmls_lane_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x4)))
+void svmls_lane_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x4)))
+void svmls_lane_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x1)))
+void svmls_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x1)))
+void svmls_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x2)))
+void svmls_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x2)))
+void svmls_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x4)))
+void svmls_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x4)))
+void svmls_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x2)))
+void svsub_write_single_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x2)))
+void svsub_write_single_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x4)))
+void svsub_write_single_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x4)))
+void svsub_write_single_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x2)))
+void svsub_write_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x2)))
+void svsub_write_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x4)))
+void svsub_write_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x4)))
+void svsub_write_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x2)))
+void svsub_za64_u64_vg1x2(uint32_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x2)))
+void svsub_za64_s64_vg1x2(uint32_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x4)))
+void svsub_za64_u64_vg1x4(uint32_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x4)))
+void svsub_za64_s64_vg1x4(uint32_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s16_vg1x4)))
+void svvdot_lane_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4)))
+void svvdot_lane_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x2)))
+void svadd_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x2)))
+void svadd_write_za64_vg1x2(uint32_t, svint64x2_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x4)))
+void svadd_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x4)))
+void svadd_write_za64_vg1x4(uint32_t, svint64x4_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x2)))
+void svadd_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x2)))
+void svadd_write_za64_vg1x2(uint32_t, svint64x2_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x4)))
+void svadd_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x4)))
+void svadd_write_za64_vg1x4(uint32_t, svint64x4_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x2)))
+void svadd_za64_vg1x2(uint32_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x2)))
+void svadd_za64_vg1x2(uint32_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x4)))
+void svadd_za64_vg1x4(uint32_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x4)))
+void svadd_za64_vg1x4(uint32_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x2)))
+void svdot_za64_vg1x2(uint32_t, svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x2)))
+void svdot_za64_vg1x2(uint32_t, svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x4)))
+void svdot_za64_vg1x4(uint32_t, svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x4)))
+void svdot_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x2)))
+void svdot_lane_za64_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x2)))
+void svdot_lane_za64_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x4)))
+void svdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x4)))
+void svdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x2)))
+void svdot_za64_vg1x2(uint32_t, svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x2)))
+void svdot_za64_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x4)))
+void svdot_za64_vg1x4(uint32_t, svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x4)))
+void svdot_za64_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x2)))
+void svmla_za64_vg4x2(uint32_t, svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x2)))
+void svmla_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x4)))
+void svmla_za64_vg4x4(uint32_t, svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x4)))
+void svmla_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x1)))
+void svmla_lane_za64_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x1)))
+void svmla_lane_za64_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x2)))
+void svmla_lane_za64_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x2)))
+void svmla_lane_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x4)))
+void svmla_lane_za64_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x4)))
+void svmla_lane_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x1)))
+void svmla_za64_vg4x1(uint32_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x1)))
+void svmla_za64_vg4x1(uint32_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x2)))
+void svmla_za64_vg4x2(uint32_t, svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x2)))
+void svmla_za64_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x4)))
+void svmla_za64_vg4x4(uint32_t, svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x4)))
+void svmla_za64_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x2)))
+void svmls_za64_vg4x2(uint32_t, svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x2)))
+void svmls_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x4)))
+void svmls_za64_vg4x4(uint32_t, svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x4)))
+void svmls_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x1)))
+void svmls_lane_za64_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x1)))
+void svmls_lane_za64_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x2)))
+void svmls_lane_za64_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x2)))
+void svmls_lane_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x4)))
+void svmls_lane_za64_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x4)))
+void svmls_lane_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x1)))
+void svmls_za64_vg4x1(uint32_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x1)))
+void svmls_za64_vg4x1(uint32_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x2)))
+void svmls_za64_vg4x2(uint32_t, svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x2)))
+void svmls_za64_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x4)))
+void svmls_za64_vg4x4(uint32_t, svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x4)))
+void svmls_za64_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x2)))
+void svsub_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x2)))
+void svsub_write_za64_vg1x2(uint32_t, svint64x2_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x4)))
+void svsub_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x4)))
+void svsub_write_za64_vg1x4(uint32_t, svint64x4_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x2)))
+void svsub_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x2)))
+void svsub_write_za64_vg1x2(uint32_t, svint64x2_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x4)))
+void svsub_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x4)))
+void svsub_write_za64_vg1x4(uint32_t, svint64x4_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x2)))
+void svsub_za64_vg1x2(uint32_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x2)))
+void svsub_za64_vg1x2(uint32_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x4)))
+void svsub_za64_vg1x4(uint32_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x4)))
+void svsub_za64_vg1x4(uint32_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s16_vg1x4)))
+void svvdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4)))
+void svvdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#undef __ai
+
+#endif /* __ARM_SME_H */
lib/include/arm_sme_draft_spec_subject_to_change.h
@@ -1,642 +0,0 @@
-/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics ------===
- *
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#ifndef __ARM_SME_H
-#define __ARM_SME_H
-
-#if !defined(__LITTLE_ENDIAN__)
-#error "Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h"
-#endif
-#include <arm_sve.h>
-
-/* Function attributes */
-#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
-
-#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__))
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
-void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
-void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
-void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
-void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb), arm_streaming_compatible, arm_preserves_za))
-uint64_t svcntsb(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd), arm_streaming_compatible, arm_preserves_za))
-uint64_t svcntsd(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh), arm_streaming_compatible, arm_preserves_za))
-uint64_t svcntsh(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw), arm_streaming_compatible, arm_preserves_za))
-uint64_t svcntsw(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128), arm_streaming, arm_shared_za))
-void svld1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16), arm_streaming, arm_shared_za))
-void svld1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32), arm_streaming, arm_shared_za))
-void svld1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64), arm_streaming, arm_shared_za))
-void svld1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8), arm_streaming, arm_shared_za))
-void svld1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128), arm_streaming, arm_shared_za))
-void svld1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16), arm_streaming, arm_shared_za))
-void svld1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32), arm_streaming, arm_shared_za))
-void svld1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64), arm_streaming, arm_shared_za))
-void svld1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8), arm_streaming, arm_shared_za))
-void svld1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128), arm_streaming, arm_shared_za))
-void svld1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16), arm_streaming, arm_shared_za))
-void svld1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32), arm_streaming, arm_shared_za))
-void svld1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64), arm_streaming, arm_shared_za))
-void svld1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8), arm_streaming, arm_shared_za))
-void svld1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128), arm_streaming, arm_shared_za))
-void svld1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16), arm_streaming, arm_shared_za))
-void svld1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32), arm_streaming, arm_shared_za))
-void svld1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64), arm_streaming, arm_shared_za))
-void svld1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8), arm_streaming, arm_shared_za))
-void svld1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
-void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
-void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
-void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
-void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
-void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
-void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
-void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
-void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
-void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
-void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8), arm_streaming, arm_shared_za, arm_preserves_za))
-void svst1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
-void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
-void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
-void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
-void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za), arm_streaming_compatible, arm_shared_za))
-void svzero_mask_za(uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za), arm_streaming_compatible, arm_shared_za))
-void svzero_za();
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
-void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
-void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
-void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
-void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
-void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
-void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
-void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
-void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
-void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
-void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
-void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
-void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
-void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
-void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
-svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
-void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
-void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
-void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
-void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
-void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
-void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
-void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
-void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
-void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
-void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
-void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
-void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
-void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
-void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
-void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
-void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
-void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
-void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
-void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
-void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
-void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
-void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
-void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
-void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
-void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
-void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
-void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
-void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
-void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
-void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
-void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
-void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
-void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
-void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za), arm_streaming_compatible, arm_shared_za))
-void svldr_vnum_za(uint32_t, uint64_t, void const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za), arm_streaming_compatible, arm_shared_za, arm_preserves_za))
-void svstr_vnum_za(uint32_t, uint64_t, void *);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#undef __ai
-
-#endif /* __ARM_SME_H */
lib/include/arm_sve.h
@@ -35,8 +35,9 @@ typedef __SVUint32_t svuint32_t;
typedef __SVUint64_t svuint64_t;
typedef __SVFloat16_t svfloat16_t;
-typedef __SVBFloat16_t svbfloat16_t;
+typedef __SVBfloat16_t svbfloat16_t;
#include <arm_bf16.h>
+#include <arm_vector_types.h>
typedef __SVFloat32_t svfloat32_t;
typedef __SVFloat64_t svfloat64_t;
typedef __clang_svint8x2_t svint8x2_t;
@@ -124,725 +125,4657 @@ enum svprfop
#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__))
#define svreinterpret_s8_s8(...) __builtin_sve_reinterpret_s8_s8(__VA_ARGS__)
-#define svreinterpret_s8_s16(...) __builtin_sve_reinterpret_s8_s16(__VA_ARGS__)
-#define svreinterpret_s8_s32(...) __builtin_sve_reinterpret_s8_s32(__VA_ARGS__)
-#define svreinterpret_s8_s64(...) __builtin_sve_reinterpret_s8_s64(__VA_ARGS__)
#define svreinterpret_s8_u8(...) __builtin_sve_reinterpret_s8_u8(__VA_ARGS__)
+#define svreinterpret_s8_s16(...) __builtin_sve_reinterpret_s8_s16(__VA_ARGS__)
#define svreinterpret_s8_u16(...) __builtin_sve_reinterpret_s8_u16(__VA_ARGS__)
+#define svreinterpret_s8_s32(...) __builtin_sve_reinterpret_s8_s32(__VA_ARGS__)
#define svreinterpret_s8_u32(...) __builtin_sve_reinterpret_s8_u32(__VA_ARGS__)
+#define svreinterpret_s8_s64(...) __builtin_sve_reinterpret_s8_s64(__VA_ARGS__)
#define svreinterpret_s8_u64(...) __builtin_sve_reinterpret_s8_u64(__VA_ARGS__)
#define svreinterpret_s8_f16(...) __builtin_sve_reinterpret_s8_f16(__VA_ARGS__)
#define svreinterpret_s8_bf16(...) __builtin_sve_reinterpret_s8_bf16(__VA_ARGS__)
#define svreinterpret_s8_f32(...) __builtin_sve_reinterpret_s8_f32(__VA_ARGS__)
#define svreinterpret_s8_f64(...) __builtin_sve_reinterpret_s8_f64(__VA_ARGS__)
-#define svreinterpret_s16_s8(...) __builtin_sve_reinterpret_s16_s8(__VA_ARGS__)
-#define svreinterpret_s16_s16(...) __builtin_sve_reinterpret_s16_s16(__VA_ARGS__)
-#define svreinterpret_s16_s32(...) __builtin_sve_reinterpret_s16_s32(__VA_ARGS__)
-#define svreinterpret_s16_s64(...) __builtin_sve_reinterpret_s16_s64(__VA_ARGS__)
-#define svreinterpret_s16_u8(...) __builtin_sve_reinterpret_s16_u8(__VA_ARGS__)
-#define svreinterpret_s16_u16(...) __builtin_sve_reinterpret_s16_u16(__VA_ARGS__)
-#define svreinterpret_s16_u32(...) __builtin_sve_reinterpret_s16_u32(__VA_ARGS__)
-#define svreinterpret_s16_u64(...) __builtin_sve_reinterpret_s16_u64(__VA_ARGS__)
-#define svreinterpret_s16_f16(...) __builtin_sve_reinterpret_s16_f16(__VA_ARGS__)
-#define svreinterpret_s16_bf16(...) __builtin_sve_reinterpret_s16_bf16(__VA_ARGS__)
-#define svreinterpret_s16_f32(...) __builtin_sve_reinterpret_s16_f32(__VA_ARGS__)
-#define svreinterpret_s16_f64(...) __builtin_sve_reinterpret_s16_f64(__VA_ARGS__)
-#define svreinterpret_s32_s8(...) __builtin_sve_reinterpret_s32_s8(__VA_ARGS__)
-#define svreinterpret_s32_s16(...) __builtin_sve_reinterpret_s32_s16(__VA_ARGS__)
-#define svreinterpret_s32_s32(...) __builtin_sve_reinterpret_s32_s32(__VA_ARGS__)
-#define svreinterpret_s32_s64(...) __builtin_sve_reinterpret_s32_s64(__VA_ARGS__)
-#define svreinterpret_s32_u8(...) __builtin_sve_reinterpret_s32_u8(__VA_ARGS__)
-#define svreinterpret_s32_u16(...) __builtin_sve_reinterpret_s32_u16(__VA_ARGS__)
-#define svreinterpret_s32_u32(...) __builtin_sve_reinterpret_s32_u32(__VA_ARGS__)
-#define svreinterpret_s32_u64(...) __builtin_sve_reinterpret_s32_u64(__VA_ARGS__)
-#define svreinterpret_s32_f16(...) __builtin_sve_reinterpret_s32_f16(__VA_ARGS__)
-#define svreinterpret_s32_bf16(...) __builtin_sve_reinterpret_s32_bf16(__VA_ARGS__)
-#define svreinterpret_s32_f32(...) __builtin_sve_reinterpret_s32_f32(__VA_ARGS__)
-#define svreinterpret_s32_f64(...) __builtin_sve_reinterpret_s32_f64(__VA_ARGS__)
-#define svreinterpret_s64_s8(...) __builtin_sve_reinterpret_s64_s8(__VA_ARGS__)
-#define svreinterpret_s64_s16(...) __builtin_sve_reinterpret_s64_s16(__VA_ARGS__)
-#define svreinterpret_s64_s32(...) __builtin_sve_reinterpret_s64_s32(__VA_ARGS__)
-#define svreinterpret_s64_s64(...) __builtin_sve_reinterpret_s64_s64(__VA_ARGS__)
-#define svreinterpret_s64_u8(...) __builtin_sve_reinterpret_s64_u8(__VA_ARGS__)
-#define svreinterpret_s64_u16(...) __builtin_sve_reinterpret_s64_u16(__VA_ARGS__)
-#define svreinterpret_s64_u32(...) __builtin_sve_reinterpret_s64_u32(__VA_ARGS__)
-#define svreinterpret_s64_u64(...) __builtin_sve_reinterpret_s64_u64(__VA_ARGS__)
-#define svreinterpret_s64_f16(...) __builtin_sve_reinterpret_s64_f16(__VA_ARGS__)
-#define svreinterpret_s64_bf16(...) __builtin_sve_reinterpret_s64_bf16(__VA_ARGS__)
-#define svreinterpret_s64_f32(...) __builtin_sve_reinterpret_s64_f32(__VA_ARGS__)
-#define svreinterpret_s64_f64(...) __builtin_sve_reinterpret_s64_f64(__VA_ARGS__)
#define svreinterpret_u8_s8(...) __builtin_sve_reinterpret_u8_s8(__VA_ARGS__)
-#define svreinterpret_u8_s16(...) __builtin_sve_reinterpret_u8_s16(__VA_ARGS__)
-#define svreinterpret_u8_s32(...) __builtin_sve_reinterpret_u8_s32(__VA_ARGS__)
-#define svreinterpret_u8_s64(...) __builtin_sve_reinterpret_u8_s64(__VA_ARGS__)
#define svreinterpret_u8_u8(...) __builtin_sve_reinterpret_u8_u8(__VA_ARGS__)
+#define svreinterpret_u8_s16(...) __builtin_sve_reinterpret_u8_s16(__VA_ARGS__)
#define svreinterpret_u8_u16(...) __builtin_sve_reinterpret_u8_u16(__VA_ARGS__)
+#define svreinterpret_u8_s32(...) __builtin_sve_reinterpret_u8_s32(__VA_ARGS__)
#define svreinterpret_u8_u32(...) __builtin_sve_reinterpret_u8_u32(__VA_ARGS__)
+#define svreinterpret_u8_s64(...) __builtin_sve_reinterpret_u8_s64(__VA_ARGS__)
#define svreinterpret_u8_u64(...) __builtin_sve_reinterpret_u8_u64(__VA_ARGS__)
#define svreinterpret_u8_f16(...) __builtin_sve_reinterpret_u8_f16(__VA_ARGS__)
#define svreinterpret_u8_bf16(...) __builtin_sve_reinterpret_u8_bf16(__VA_ARGS__)
#define svreinterpret_u8_f32(...) __builtin_sve_reinterpret_u8_f32(__VA_ARGS__)
#define svreinterpret_u8_f64(...) __builtin_sve_reinterpret_u8_f64(__VA_ARGS__)
+#define svreinterpret_s16_s8(...) __builtin_sve_reinterpret_s16_s8(__VA_ARGS__)
+#define svreinterpret_s16_u8(...) __builtin_sve_reinterpret_s16_u8(__VA_ARGS__)
+#define svreinterpret_s16_s16(...) __builtin_sve_reinterpret_s16_s16(__VA_ARGS__)
+#define svreinterpret_s16_u16(...) __builtin_sve_reinterpret_s16_u16(__VA_ARGS__)
+#define svreinterpret_s16_s32(...) __builtin_sve_reinterpret_s16_s32(__VA_ARGS__)
+#define svreinterpret_s16_u32(...) __builtin_sve_reinterpret_s16_u32(__VA_ARGS__)
+#define svreinterpret_s16_s64(...) __builtin_sve_reinterpret_s16_s64(__VA_ARGS__)
+#define svreinterpret_s16_u64(...) __builtin_sve_reinterpret_s16_u64(__VA_ARGS__)
+#define svreinterpret_s16_f16(...) __builtin_sve_reinterpret_s16_f16(__VA_ARGS__)
+#define svreinterpret_s16_bf16(...) __builtin_sve_reinterpret_s16_bf16(__VA_ARGS__)
+#define svreinterpret_s16_f32(...) __builtin_sve_reinterpret_s16_f32(__VA_ARGS__)
+#define svreinterpret_s16_f64(...) __builtin_sve_reinterpret_s16_f64(__VA_ARGS__)
#define svreinterpret_u16_s8(...) __builtin_sve_reinterpret_u16_s8(__VA_ARGS__)
-#define svreinterpret_u16_s16(...) __builtin_sve_reinterpret_u16_s16(__VA_ARGS__)
-#define svreinterpret_u16_s32(...) __builtin_sve_reinterpret_u16_s32(__VA_ARGS__)
-#define svreinterpret_u16_s64(...) __builtin_sve_reinterpret_u16_s64(__VA_ARGS__)
#define svreinterpret_u16_u8(...) __builtin_sve_reinterpret_u16_u8(__VA_ARGS__)
+#define svreinterpret_u16_s16(...) __builtin_sve_reinterpret_u16_s16(__VA_ARGS__)
#define svreinterpret_u16_u16(...) __builtin_sve_reinterpret_u16_u16(__VA_ARGS__)
+#define svreinterpret_u16_s32(...) __builtin_sve_reinterpret_u16_s32(__VA_ARGS__)
#define svreinterpret_u16_u32(...) __builtin_sve_reinterpret_u16_u32(__VA_ARGS__)
+#define svreinterpret_u16_s64(...) __builtin_sve_reinterpret_u16_s64(__VA_ARGS__)
#define svreinterpret_u16_u64(...) __builtin_sve_reinterpret_u16_u64(__VA_ARGS__)
#define svreinterpret_u16_f16(...) __builtin_sve_reinterpret_u16_f16(__VA_ARGS__)
#define svreinterpret_u16_bf16(...) __builtin_sve_reinterpret_u16_bf16(__VA_ARGS__)
#define svreinterpret_u16_f32(...) __builtin_sve_reinterpret_u16_f32(__VA_ARGS__)
#define svreinterpret_u16_f64(...) __builtin_sve_reinterpret_u16_f64(__VA_ARGS__)
+#define svreinterpret_s32_s8(...) __builtin_sve_reinterpret_s32_s8(__VA_ARGS__)
+#define svreinterpret_s32_u8(...) __builtin_sve_reinterpret_s32_u8(__VA_ARGS__)
+#define svreinterpret_s32_s16(...) __builtin_sve_reinterpret_s32_s16(__VA_ARGS__)
+#define svreinterpret_s32_u16(...) __builtin_sve_reinterpret_s32_u16(__VA_ARGS__)
+#define svreinterpret_s32_s32(...) __builtin_sve_reinterpret_s32_s32(__VA_ARGS__)
+#define svreinterpret_s32_u32(...) __builtin_sve_reinterpret_s32_u32(__VA_ARGS__)
+#define svreinterpret_s32_s64(...) __builtin_sve_reinterpret_s32_s64(__VA_ARGS__)
+#define svreinterpret_s32_u64(...) __builtin_sve_reinterpret_s32_u64(__VA_ARGS__)
+#define svreinterpret_s32_f16(...) __builtin_sve_reinterpret_s32_f16(__VA_ARGS__)
+#define svreinterpret_s32_bf16(...) __builtin_sve_reinterpret_s32_bf16(__VA_ARGS__)
+#define svreinterpret_s32_f32(...) __builtin_sve_reinterpret_s32_f32(__VA_ARGS__)
+#define svreinterpret_s32_f64(...) __builtin_sve_reinterpret_s32_f64(__VA_ARGS__)
#define svreinterpret_u32_s8(...) __builtin_sve_reinterpret_u32_s8(__VA_ARGS__)
-#define svreinterpret_u32_s16(...) __builtin_sve_reinterpret_u32_s16(__VA_ARGS__)
-#define svreinterpret_u32_s32(...) __builtin_sve_reinterpret_u32_s32(__VA_ARGS__)
-#define svreinterpret_u32_s64(...) __builtin_sve_reinterpret_u32_s64(__VA_ARGS__)
#define svreinterpret_u32_u8(...) __builtin_sve_reinterpret_u32_u8(__VA_ARGS__)
+#define svreinterpret_u32_s16(...) __builtin_sve_reinterpret_u32_s16(__VA_ARGS__)
#define svreinterpret_u32_u16(...) __builtin_sve_reinterpret_u32_u16(__VA_ARGS__)
+#define svreinterpret_u32_s32(...) __builtin_sve_reinterpret_u32_s32(__VA_ARGS__)
#define svreinterpret_u32_u32(...) __builtin_sve_reinterpret_u32_u32(__VA_ARGS__)
+#define svreinterpret_u32_s64(...) __builtin_sve_reinterpret_u32_s64(__VA_ARGS__)
#define svreinterpret_u32_u64(...) __builtin_sve_reinterpret_u32_u64(__VA_ARGS__)
#define svreinterpret_u32_f16(...) __builtin_sve_reinterpret_u32_f16(__VA_ARGS__)
#define svreinterpret_u32_bf16(...) __builtin_sve_reinterpret_u32_bf16(__VA_ARGS__)
#define svreinterpret_u32_f32(...) __builtin_sve_reinterpret_u32_f32(__VA_ARGS__)
#define svreinterpret_u32_f64(...) __builtin_sve_reinterpret_u32_f64(__VA_ARGS__)
+#define svreinterpret_s64_s8(...) __builtin_sve_reinterpret_s64_s8(__VA_ARGS__)
+#define svreinterpret_s64_u8(...) __builtin_sve_reinterpret_s64_u8(__VA_ARGS__)
+#define svreinterpret_s64_s16(...) __builtin_sve_reinterpret_s64_s16(__VA_ARGS__)
+#define svreinterpret_s64_u16(...) __builtin_sve_reinterpret_s64_u16(__VA_ARGS__)
+#define svreinterpret_s64_s32(...) __builtin_sve_reinterpret_s64_s32(__VA_ARGS__)
+#define svreinterpret_s64_u32(...) __builtin_sve_reinterpret_s64_u32(__VA_ARGS__)
+#define svreinterpret_s64_s64(...) __builtin_sve_reinterpret_s64_s64(__VA_ARGS__)
+#define svreinterpret_s64_u64(...) __builtin_sve_reinterpret_s64_u64(__VA_ARGS__)
+#define svreinterpret_s64_f16(...) __builtin_sve_reinterpret_s64_f16(__VA_ARGS__)
+#define svreinterpret_s64_bf16(...) __builtin_sve_reinterpret_s64_bf16(__VA_ARGS__)
+#define svreinterpret_s64_f32(...) __builtin_sve_reinterpret_s64_f32(__VA_ARGS__)
+#define svreinterpret_s64_f64(...) __builtin_sve_reinterpret_s64_f64(__VA_ARGS__)
#define svreinterpret_u64_s8(...) __builtin_sve_reinterpret_u64_s8(__VA_ARGS__)
-#define svreinterpret_u64_s16(...) __builtin_sve_reinterpret_u64_s16(__VA_ARGS__)
-#define svreinterpret_u64_s32(...) __builtin_sve_reinterpret_u64_s32(__VA_ARGS__)
-#define svreinterpret_u64_s64(...) __builtin_sve_reinterpret_u64_s64(__VA_ARGS__)
#define svreinterpret_u64_u8(...) __builtin_sve_reinterpret_u64_u8(__VA_ARGS__)
+#define svreinterpret_u64_s16(...) __builtin_sve_reinterpret_u64_s16(__VA_ARGS__)
#define svreinterpret_u64_u16(...) __builtin_sve_reinterpret_u64_u16(__VA_ARGS__)
+#define svreinterpret_u64_s32(...) __builtin_sve_reinterpret_u64_s32(__VA_ARGS__)
#define svreinterpret_u64_u32(...) __builtin_sve_reinterpret_u64_u32(__VA_ARGS__)
+#define svreinterpret_u64_s64(...) __builtin_sve_reinterpret_u64_s64(__VA_ARGS__)
#define svreinterpret_u64_u64(...) __builtin_sve_reinterpret_u64_u64(__VA_ARGS__)
#define svreinterpret_u64_f16(...) __builtin_sve_reinterpret_u64_f16(__VA_ARGS__)
#define svreinterpret_u64_bf16(...) __builtin_sve_reinterpret_u64_bf16(__VA_ARGS__)
#define svreinterpret_u64_f32(...) __builtin_sve_reinterpret_u64_f32(__VA_ARGS__)
#define svreinterpret_u64_f64(...) __builtin_sve_reinterpret_u64_f64(__VA_ARGS__)
#define svreinterpret_f16_s8(...) __builtin_sve_reinterpret_f16_s8(__VA_ARGS__)
-#define svreinterpret_f16_s16(...) __builtin_sve_reinterpret_f16_s16(__VA_ARGS__)
-#define svreinterpret_f16_s32(...) __builtin_sve_reinterpret_f16_s32(__VA_ARGS__)
-#define svreinterpret_f16_s64(...) __builtin_sve_reinterpret_f16_s64(__VA_ARGS__)
#define svreinterpret_f16_u8(...) __builtin_sve_reinterpret_f16_u8(__VA_ARGS__)
+#define svreinterpret_f16_s16(...) __builtin_sve_reinterpret_f16_s16(__VA_ARGS__)
#define svreinterpret_f16_u16(...) __builtin_sve_reinterpret_f16_u16(__VA_ARGS__)
+#define svreinterpret_f16_s32(...) __builtin_sve_reinterpret_f16_s32(__VA_ARGS__)
#define svreinterpret_f16_u32(...) __builtin_sve_reinterpret_f16_u32(__VA_ARGS__)
+#define svreinterpret_f16_s64(...) __builtin_sve_reinterpret_f16_s64(__VA_ARGS__)
#define svreinterpret_f16_u64(...) __builtin_sve_reinterpret_f16_u64(__VA_ARGS__)
#define svreinterpret_f16_f16(...) __builtin_sve_reinterpret_f16_f16(__VA_ARGS__)
#define svreinterpret_f16_bf16(...) __builtin_sve_reinterpret_f16_bf16(__VA_ARGS__)
#define svreinterpret_f16_f32(...) __builtin_sve_reinterpret_f16_f32(__VA_ARGS__)
#define svreinterpret_f16_f64(...) __builtin_sve_reinterpret_f16_f64(__VA_ARGS__)
#define svreinterpret_bf16_s8(...) __builtin_sve_reinterpret_bf16_s8(__VA_ARGS__)
-#define svreinterpret_bf16_s16(...) __builtin_sve_reinterpret_bf16_s16(__VA_ARGS__)
-#define svreinterpret_bf16_s32(...) __builtin_sve_reinterpret_bf16_s32(__VA_ARGS__)
-#define svreinterpret_bf16_s64(...) __builtin_sve_reinterpret_bf16_s64(__VA_ARGS__)
#define svreinterpret_bf16_u8(...) __builtin_sve_reinterpret_bf16_u8(__VA_ARGS__)
+#define svreinterpret_bf16_s16(...) __builtin_sve_reinterpret_bf16_s16(__VA_ARGS__)
#define svreinterpret_bf16_u16(...) __builtin_sve_reinterpret_bf16_u16(__VA_ARGS__)
+#define svreinterpret_bf16_s32(...) __builtin_sve_reinterpret_bf16_s32(__VA_ARGS__)
#define svreinterpret_bf16_u32(...) __builtin_sve_reinterpret_bf16_u32(__VA_ARGS__)
+#define svreinterpret_bf16_s64(...) __builtin_sve_reinterpret_bf16_s64(__VA_ARGS__)
#define svreinterpret_bf16_u64(...) __builtin_sve_reinterpret_bf16_u64(__VA_ARGS__)
#define svreinterpret_bf16_f16(...) __builtin_sve_reinterpret_bf16_f16(__VA_ARGS__)
#define svreinterpret_bf16_bf16(...) __builtin_sve_reinterpret_bf16_bf16(__VA_ARGS__)
#define svreinterpret_bf16_f32(...) __builtin_sve_reinterpret_bf16_f32(__VA_ARGS__)
#define svreinterpret_bf16_f64(...) __builtin_sve_reinterpret_bf16_f64(__VA_ARGS__)
#define svreinterpret_f32_s8(...) __builtin_sve_reinterpret_f32_s8(__VA_ARGS__)
-#define svreinterpret_f32_s16(...) __builtin_sve_reinterpret_f32_s16(__VA_ARGS__)
-#define svreinterpret_f32_s32(...) __builtin_sve_reinterpret_f32_s32(__VA_ARGS__)
-#define svreinterpret_f32_s64(...) __builtin_sve_reinterpret_f32_s64(__VA_ARGS__)
#define svreinterpret_f32_u8(...) __builtin_sve_reinterpret_f32_u8(__VA_ARGS__)
+#define svreinterpret_f32_s16(...) __builtin_sve_reinterpret_f32_s16(__VA_ARGS__)
#define svreinterpret_f32_u16(...) __builtin_sve_reinterpret_f32_u16(__VA_ARGS__)
+#define svreinterpret_f32_s32(...) __builtin_sve_reinterpret_f32_s32(__VA_ARGS__)
#define svreinterpret_f32_u32(...) __builtin_sve_reinterpret_f32_u32(__VA_ARGS__)
+#define svreinterpret_f32_s64(...) __builtin_sve_reinterpret_f32_s64(__VA_ARGS__)
#define svreinterpret_f32_u64(...) __builtin_sve_reinterpret_f32_u64(__VA_ARGS__)
#define svreinterpret_f32_f16(...) __builtin_sve_reinterpret_f32_f16(__VA_ARGS__)
#define svreinterpret_f32_bf16(...) __builtin_sve_reinterpret_f32_bf16(__VA_ARGS__)
#define svreinterpret_f32_f32(...) __builtin_sve_reinterpret_f32_f32(__VA_ARGS__)
#define svreinterpret_f32_f64(...) __builtin_sve_reinterpret_f32_f64(__VA_ARGS__)
#define svreinterpret_f64_s8(...) __builtin_sve_reinterpret_f64_s8(__VA_ARGS__)
-#define svreinterpret_f64_s16(...) __builtin_sve_reinterpret_f64_s16(__VA_ARGS__)
-#define svreinterpret_f64_s32(...) __builtin_sve_reinterpret_f64_s32(__VA_ARGS__)
-#define svreinterpret_f64_s64(...) __builtin_sve_reinterpret_f64_s64(__VA_ARGS__)
#define svreinterpret_f64_u8(...) __builtin_sve_reinterpret_f64_u8(__VA_ARGS__)
+#define svreinterpret_f64_s16(...) __builtin_sve_reinterpret_f64_s16(__VA_ARGS__)
#define svreinterpret_f64_u16(...) __builtin_sve_reinterpret_f64_u16(__VA_ARGS__)
+#define svreinterpret_f64_s32(...) __builtin_sve_reinterpret_f64_s32(__VA_ARGS__)
#define svreinterpret_f64_u32(...) __builtin_sve_reinterpret_f64_u32(__VA_ARGS__)
+#define svreinterpret_f64_s64(...) __builtin_sve_reinterpret_f64_s64(__VA_ARGS__)
#define svreinterpret_f64_u64(...) __builtin_sve_reinterpret_f64_u64(__VA_ARGS__)
#define svreinterpret_f64_f16(...) __builtin_sve_reinterpret_f64_f16(__VA_ARGS__)
#define svreinterpret_f64_bf16(...) __builtin_sve_reinterpret_f64_bf16(__VA_ARGS__)
#define svreinterpret_f64_f32(...) __builtin_sve_reinterpret_f64_f32(__VA_ARGS__)
#define svreinterpret_f64_f64(...) __builtin_sve_reinterpret_f64_f64(__VA_ARGS__)
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint8_t op) {
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint8_t op) __arm_streaming_compatible {
return __builtin_sve_reinterpret_s8_s8(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint16_t op) {
- return __builtin_sve_reinterpret_s8_s16(op);
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s8(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint8_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u8(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u64(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svbfloat16_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_bf16(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat32_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f32(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat64_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f64(op);
+}
+
+#define svreinterpret_s8_s8_x2(...) __builtin_sve_reinterpret_s8_s8_x2(__VA_ARGS__)
+#define svreinterpret_s8_u8_x2(...) __builtin_sve_reinterpret_s8_u8_x2(__VA_ARGS__)
+#define svreinterpret_s8_s16_x2(...) __builtin_sve_reinterpret_s8_s16_x2(__VA_ARGS__)
+#define svreinterpret_s8_u16_x2(...) __builtin_sve_reinterpret_s8_u16_x2(__VA_ARGS__)
+#define svreinterpret_s8_s32_x2(...) __builtin_sve_reinterpret_s8_s32_x2(__VA_ARGS__)
+#define svreinterpret_s8_u32_x2(...) __builtin_sve_reinterpret_s8_u32_x2(__VA_ARGS__)
+#define svreinterpret_s8_s64_x2(...) __builtin_sve_reinterpret_s8_s64_x2(__VA_ARGS__)
+#define svreinterpret_s8_u64_x2(...) __builtin_sve_reinterpret_s8_u64_x2(__VA_ARGS__)
+#define svreinterpret_s8_f16_x2(...) __builtin_sve_reinterpret_s8_f16_x2(__VA_ARGS__)
+#define svreinterpret_s8_bf16_x2(...) __builtin_sve_reinterpret_s8_bf16_x2(__VA_ARGS__)
+#define svreinterpret_s8_f32_x2(...) __builtin_sve_reinterpret_s8_f32_x2(__VA_ARGS__)
+#define svreinterpret_s8_f64_x2(...) __builtin_sve_reinterpret_s8_f64_x2(__VA_ARGS__)
+#define svreinterpret_u8_s8_x2(...) __builtin_sve_reinterpret_u8_s8_x2(__VA_ARGS__)
+#define svreinterpret_u8_u8_x2(...) __builtin_sve_reinterpret_u8_u8_x2(__VA_ARGS__)
+#define svreinterpret_u8_s16_x2(...) __builtin_sve_reinterpret_u8_s16_x2(__VA_ARGS__)
+#define svreinterpret_u8_u16_x2(...) __builtin_sve_reinterpret_u8_u16_x2(__VA_ARGS__)
+#define svreinterpret_u8_s32_x2(...) __builtin_sve_reinterpret_u8_s32_x2(__VA_ARGS__)
+#define svreinterpret_u8_u32_x2(...) __builtin_sve_reinterpret_u8_u32_x2(__VA_ARGS__)
+#define svreinterpret_u8_s64_x2(...) __builtin_sve_reinterpret_u8_s64_x2(__VA_ARGS__)
+#define svreinterpret_u8_u64_x2(...) __builtin_sve_reinterpret_u8_u64_x2(__VA_ARGS__)
+#define svreinterpret_u8_f16_x2(...) __builtin_sve_reinterpret_u8_f16_x2(__VA_ARGS__)
+#define svreinterpret_u8_bf16_x2(...) __builtin_sve_reinterpret_u8_bf16_x2(__VA_ARGS__)
+#define svreinterpret_u8_f32_x2(...) __builtin_sve_reinterpret_u8_f32_x2(__VA_ARGS__)
+#define svreinterpret_u8_f64_x2(...) __builtin_sve_reinterpret_u8_f64_x2(__VA_ARGS__)
+#define svreinterpret_s16_s8_x2(...) __builtin_sve_reinterpret_s16_s8_x2(__VA_ARGS__)
+#define svreinterpret_s16_u8_x2(...) __builtin_sve_reinterpret_s16_u8_x2(__VA_ARGS__)
+#define svreinterpret_s16_s16_x2(...) __builtin_sve_reinterpret_s16_s16_x2(__VA_ARGS__)
+#define svreinterpret_s16_u16_x2(...) __builtin_sve_reinterpret_s16_u16_x2(__VA_ARGS__)
+#define svreinterpret_s16_s32_x2(...) __builtin_sve_reinterpret_s16_s32_x2(__VA_ARGS__)
+#define svreinterpret_s16_u32_x2(...) __builtin_sve_reinterpret_s16_u32_x2(__VA_ARGS__)
+#define svreinterpret_s16_s64_x2(...) __builtin_sve_reinterpret_s16_s64_x2(__VA_ARGS__)
+#define svreinterpret_s16_u64_x2(...) __builtin_sve_reinterpret_s16_u64_x2(__VA_ARGS__)
+#define svreinterpret_s16_f16_x2(...) __builtin_sve_reinterpret_s16_f16_x2(__VA_ARGS__)
+#define svreinterpret_s16_bf16_x2(...) __builtin_sve_reinterpret_s16_bf16_x2(__VA_ARGS__)
+#define svreinterpret_s16_f32_x2(...) __builtin_sve_reinterpret_s16_f32_x2(__VA_ARGS__)
+#define svreinterpret_s16_f64_x2(...) __builtin_sve_reinterpret_s16_f64_x2(__VA_ARGS__)
+#define svreinterpret_u16_s8_x2(...) __builtin_sve_reinterpret_u16_s8_x2(__VA_ARGS__)
+#define svreinterpret_u16_u8_x2(...) __builtin_sve_reinterpret_u16_u8_x2(__VA_ARGS__)
+#define svreinterpret_u16_s16_x2(...) __builtin_sve_reinterpret_u16_s16_x2(__VA_ARGS__)
+#define svreinterpret_u16_u16_x2(...) __builtin_sve_reinterpret_u16_u16_x2(__VA_ARGS__)
+#define svreinterpret_u16_s32_x2(...) __builtin_sve_reinterpret_u16_s32_x2(__VA_ARGS__)
+#define svreinterpret_u16_u32_x2(...) __builtin_sve_reinterpret_u16_u32_x2(__VA_ARGS__)
+#define svreinterpret_u16_s64_x2(...) __builtin_sve_reinterpret_u16_s64_x2(__VA_ARGS__)
+#define svreinterpret_u16_u64_x2(...) __builtin_sve_reinterpret_u16_u64_x2(__VA_ARGS__)
+#define svreinterpret_u16_f16_x2(...) __builtin_sve_reinterpret_u16_f16_x2(__VA_ARGS__)
+#define svreinterpret_u16_bf16_x2(...) __builtin_sve_reinterpret_u16_bf16_x2(__VA_ARGS__)
+#define svreinterpret_u16_f32_x2(...) __builtin_sve_reinterpret_u16_f32_x2(__VA_ARGS__)
+#define svreinterpret_u16_f64_x2(...) __builtin_sve_reinterpret_u16_f64_x2(__VA_ARGS__)
+#define svreinterpret_s32_s8_x2(...) __builtin_sve_reinterpret_s32_s8_x2(__VA_ARGS__)
+#define svreinterpret_s32_u8_x2(...) __builtin_sve_reinterpret_s32_u8_x2(__VA_ARGS__)
+#define svreinterpret_s32_s16_x2(...) __builtin_sve_reinterpret_s32_s16_x2(__VA_ARGS__)
+#define svreinterpret_s32_u16_x2(...) __builtin_sve_reinterpret_s32_u16_x2(__VA_ARGS__)
+#define svreinterpret_s32_s32_x2(...) __builtin_sve_reinterpret_s32_s32_x2(__VA_ARGS__)
+#define svreinterpret_s32_u32_x2(...) __builtin_sve_reinterpret_s32_u32_x2(__VA_ARGS__)
+#define svreinterpret_s32_s64_x2(...) __builtin_sve_reinterpret_s32_s64_x2(__VA_ARGS__)
+#define svreinterpret_s32_u64_x2(...) __builtin_sve_reinterpret_s32_u64_x2(__VA_ARGS__)
+#define svreinterpret_s32_f16_x2(...) __builtin_sve_reinterpret_s32_f16_x2(__VA_ARGS__)
+#define svreinterpret_s32_bf16_x2(...) __builtin_sve_reinterpret_s32_bf16_x2(__VA_ARGS__)
+#define svreinterpret_s32_f32_x2(...) __builtin_sve_reinterpret_s32_f32_x2(__VA_ARGS__)
+#define svreinterpret_s32_f64_x2(...) __builtin_sve_reinterpret_s32_f64_x2(__VA_ARGS__)
+#define svreinterpret_u32_s8_x2(...) __builtin_sve_reinterpret_u32_s8_x2(__VA_ARGS__)
+#define svreinterpret_u32_u8_x2(...) __builtin_sve_reinterpret_u32_u8_x2(__VA_ARGS__)
+#define svreinterpret_u32_s16_x2(...) __builtin_sve_reinterpret_u32_s16_x2(__VA_ARGS__)
+#define svreinterpret_u32_u16_x2(...) __builtin_sve_reinterpret_u32_u16_x2(__VA_ARGS__)
+#define svreinterpret_u32_s32_x2(...) __builtin_sve_reinterpret_u32_s32_x2(__VA_ARGS__)
+#define svreinterpret_u32_u32_x2(...) __builtin_sve_reinterpret_u32_u32_x2(__VA_ARGS__)
+#define svreinterpret_u32_s64_x2(...) __builtin_sve_reinterpret_u32_s64_x2(__VA_ARGS__)
+#define svreinterpret_u32_u64_x2(...) __builtin_sve_reinterpret_u32_u64_x2(__VA_ARGS__)
+#define svreinterpret_u32_f16_x2(...) __builtin_sve_reinterpret_u32_f16_x2(__VA_ARGS__)
+#define svreinterpret_u32_bf16_x2(...) __builtin_sve_reinterpret_u32_bf16_x2(__VA_ARGS__)
+#define svreinterpret_u32_f32_x2(...) __builtin_sve_reinterpret_u32_f32_x2(__VA_ARGS__)
+#define svreinterpret_u32_f64_x2(...) __builtin_sve_reinterpret_u32_f64_x2(__VA_ARGS__)
+#define svreinterpret_s64_s8_x2(...) __builtin_sve_reinterpret_s64_s8_x2(__VA_ARGS__)
+#define svreinterpret_s64_u8_x2(...) __builtin_sve_reinterpret_s64_u8_x2(__VA_ARGS__)
+#define svreinterpret_s64_s16_x2(...) __builtin_sve_reinterpret_s64_s16_x2(__VA_ARGS__)
+#define svreinterpret_s64_u16_x2(...) __builtin_sve_reinterpret_s64_u16_x2(__VA_ARGS__)
+#define svreinterpret_s64_s32_x2(...) __builtin_sve_reinterpret_s64_s32_x2(__VA_ARGS__)
+#define svreinterpret_s64_u32_x2(...) __builtin_sve_reinterpret_s64_u32_x2(__VA_ARGS__)
+#define svreinterpret_s64_s64_x2(...) __builtin_sve_reinterpret_s64_s64_x2(__VA_ARGS__)
+#define svreinterpret_s64_u64_x2(...) __builtin_sve_reinterpret_s64_u64_x2(__VA_ARGS__)
+#define svreinterpret_s64_f16_x2(...) __builtin_sve_reinterpret_s64_f16_x2(__VA_ARGS__)
+#define svreinterpret_s64_bf16_x2(...) __builtin_sve_reinterpret_s64_bf16_x2(__VA_ARGS__)
+#define svreinterpret_s64_f32_x2(...) __builtin_sve_reinterpret_s64_f32_x2(__VA_ARGS__)
+#define svreinterpret_s64_f64_x2(...) __builtin_sve_reinterpret_s64_f64_x2(__VA_ARGS__)
+#define svreinterpret_u64_s8_x2(...) __builtin_sve_reinterpret_u64_s8_x2(__VA_ARGS__)
+#define svreinterpret_u64_u8_x2(...) __builtin_sve_reinterpret_u64_u8_x2(__VA_ARGS__)
+#define svreinterpret_u64_s16_x2(...) __builtin_sve_reinterpret_u64_s16_x2(__VA_ARGS__)
+#define svreinterpret_u64_u16_x2(...) __builtin_sve_reinterpret_u64_u16_x2(__VA_ARGS__)
+#define svreinterpret_u64_s32_x2(...) __builtin_sve_reinterpret_u64_s32_x2(__VA_ARGS__)
+#define svreinterpret_u64_u32_x2(...) __builtin_sve_reinterpret_u64_u32_x2(__VA_ARGS__)
+#define svreinterpret_u64_s64_x2(...) __builtin_sve_reinterpret_u64_s64_x2(__VA_ARGS__)
+#define svreinterpret_u64_u64_x2(...) __builtin_sve_reinterpret_u64_u64_x2(__VA_ARGS__)
+#define svreinterpret_u64_f16_x2(...) __builtin_sve_reinterpret_u64_f16_x2(__VA_ARGS__)
+#define svreinterpret_u64_bf16_x2(...) __builtin_sve_reinterpret_u64_bf16_x2(__VA_ARGS__)
+#define svreinterpret_u64_f32_x2(...) __builtin_sve_reinterpret_u64_f32_x2(__VA_ARGS__)
+#define svreinterpret_u64_f64_x2(...) __builtin_sve_reinterpret_u64_f64_x2(__VA_ARGS__)
+#define svreinterpret_f16_s8_x2(...) __builtin_sve_reinterpret_f16_s8_x2(__VA_ARGS__)
+#define svreinterpret_f16_u8_x2(...) __builtin_sve_reinterpret_f16_u8_x2(__VA_ARGS__)
+#define svreinterpret_f16_s16_x2(...) __builtin_sve_reinterpret_f16_s16_x2(__VA_ARGS__)
+#define svreinterpret_f16_u16_x2(...) __builtin_sve_reinterpret_f16_u16_x2(__VA_ARGS__)
+#define svreinterpret_f16_s32_x2(...) __builtin_sve_reinterpret_f16_s32_x2(__VA_ARGS__)
+#define svreinterpret_f16_u32_x2(...) __builtin_sve_reinterpret_f16_u32_x2(__VA_ARGS__)
+#define svreinterpret_f16_s64_x2(...) __builtin_sve_reinterpret_f16_s64_x2(__VA_ARGS__)
+#define svreinterpret_f16_u64_x2(...) __builtin_sve_reinterpret_f16_u64_x2(__VA_ARGS__)
+#define svreinterpret_f16_f16_x2(...) __builtin_sve_reinterpret_f16_f16_x2(__VA_ARGS__)
+#define svreinterpret_f16_bf16_x2(...) __builtin_sve_reinterpret_f16_bf16_x2(__VA_ARGS__)
+#define svreinterpret_f16_f32_x2(...) __builtin_sve_reinterpret_f16_f32_x2(__VA_ARGS__)
+#define svreinterpret_f16_f64_x2(...) __builtin_sve_reinterpret_f16_f64_x2(__VA_ARGS__)
+#define svreinterpret_bf16_s8_x2(...) __builtin_sve_reinterpret_bf16_s8_x2(__VA_ARGS__)
+#define svreinterpret_bf16_u8_x2(...) __builtin_sve_reinterpret_bf16_u8_x2(__VA_ARGS__)
+#define svreinterpret_bf16_s16_x2(...) __builtin_sve_reinterpret_bf16_s16_x2(__VA_ARGS__)
+#define svreinterpret_bf16_u16_x2(...) __builtin_sve_reinterpret_bf16_u16_x2(__VA_ARGS__)
+#define svreinterpret_bf16_s32_x2(...) __builtin_sve_reinterpret_bf16_s32_x2(__VA_ARGS__)
+#define svreinterpret_bf16_u32_x2(...) __builtin_sve_reinterpret_bf16_u32_x2(__VA_ARGS__)
+#define svreinterpret_bf16_s64_x2(...) __builtin_sve_reinterpret_bf16_s64_x2(__VA_ARGS__)
+#define svreinterpret_bf16_u64_x2(...) __builtin_sve_reinterpret_bf16_u64_x2(__VA_ARGS__)
+#define svreinterpret_bf16_f16_x2(...) __builtin_sve_reinterpret_bf16_f16_x2(__VA_ARGS__)
+#define svreinterpret_bf16_bf16_x2(...) __builtin_sve_reinterpret_bf16_bf16_x2(__VA_ARGS__)
+#define svreinterpret_bf16_f32_x2(...) __builtin_sve_reinterpret_bf16_f32_x2(__VA_ARGS__)
+#define svreinterpret_bf16_f64_x2(...) __builtin_sve_reinterpret_bf16_f64_x2(__VA_ARGS__)
+#define svreinterpret_f32_s8_x2(...) __builtin_sve_reinterpret_f32_s8_x2(__VA_ARGS__)
+#define svreinterpret_f32_u8_x2(...) __builtin_sve_reinterpret_f32_u8_x2(__VA_ARGS__)
+#define svreinterpret_f32_s16_x2(...) __builtin_sve_reinterpret_f32_s16_x2(__VA_ARGS__)
+#define svreinterpret_f32_u16_x2(...) __builtin_sve_reinterpret_f32_u16_x2(__VA_ARGS__)
+#define svreinterpret_f32_s32_x2(...) __builtin_sve_reinterpret_f32_s32_x2(__VA_ARGS__)
+#define svreinterpret_f32_u32_x2(...) __builtin_sve_reinterpret_f32_u32_x2(__VA_ARGS__)
+#define svreinterpret_f32_s64_x2(...) __builtin_sve_reinterpret_f32_s64_x2(__VA_ARGS__)
+#define svreinterpret_f32_u64_x2(...) __builtin_sve_reinterpret_f32_u64_x2(__VA_ARGS__)
+#define svreinterpret_f32_f16_x2(...) __builtin_sve_reinterpret_f32_f16_x2(__VA_ARGS__)
+#define svreinterpret_f32_bf16_x2(...) __builtin_sve_reinterpret_f32_bf16_x2(__VA_ARGS__)
+#define svreinterpret_f32_f32_x2(...) __builtin_sve_reinterpret_f32_f32_x2(__VA_ARGS__)
+#define svreinterpret_f32_f64_x2(...) __builtin_sve_reinterpret_f32_f64_x2(__VA_ARGS__)
+#define svreinterpret_f64_s8_x2(...) __builtin_sve_reinterpret_f64_s8_x2(__VA_ARGS__)
+#define svreinterpret_f64_u8_x2(...) __builtin_sve_reinterpret_f64_u8_x2(__VA_ARGS__)
+#define svreinterpret_f64_s16_x2(...) __builtin_sve_reinterpret_f64_s16_x2(__VA_ARGS__)
+#define svreinterpret_f64_u16_x2(...) __builtin_sve_reinterpret_f64_u16_x2(__VA_ARGS__)
+#define svreinterpret_f64_s32_x2(...) __builtin_sve_reinterpret_f64_s32_x2(__VA_ARGS__)
+#define svreinterpret_f64_u32_x2(...) __builtin_sve_reinterpret_f64_u32_x2(__VA_ARGS__)
+#define svreinterpret_f64_s64_x2(...) __builtin_sve_reinterpret_f64_s64_x2(__VA_ARGS__)
+#define svreinterpret_f64_u64_x2(...) __builtin_sve_reinterpret_f64_u64_x2(__VA_ARGS__)
+#define svreinterpret_f64_f16_x2(...) __builtin_sve_reinterpret_f64_f16_x2(__VA_ARGS__)
+#define svreinterpret_f64_bf16_x2(...) __builtin_sve_reinterpret_f64_bf16_x2(__VA_ARGS__)
+#define svreinterpret_f64_f32_x2(...) __builtin_sve_reinterpret_f64_f32_x2(__VA_ARGS__)
+#define svreinterpret_f64_f64_x2(...) __builtin_sve_reinterpret_f64_f64_x2(__VA_ARGS__)
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint8x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u8_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u64_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svbfloat16x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_bf16_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat32x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f32_x2(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat64x2_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f64_x2(op);
+}
+
+#define svreinterpret_s8_s8_x3(...) __builtin_sve_reinterpret_s8_s8_x3(__VA_ARGS__)
+#define svreinterpret_s8_u8_x3(...) __builtin_sve_reinterpret_s8_u8_x3(__VA_ARGS__)
+#define svreinterpret_s8_s16_x3(...) __builtin_sve_reinterpret_s8_s16_x3(__VA_ARGS__)
+#define svreinterpret_s8_u16_x3(...) __builtin_sve_reinterpret_s8_u16_x3(__VA_ARGS__)
+#define svreinterpret_s8_s32_x3(...) __builtin_sve_reinterpret_s8_s32_x3(__VA_ARGS__)
+#define svreinterpret_s8_u32_x3(...) __builtin_sve_reinterpret_s8_u32_x3(__VA_ARGS__)
+#define svreinterpret_s8_s64_x3(...) __builtin_sve_reinterpret_s8_s64_x3(__VA_ARGS__)
+#define svreinterpret_s8_u64_x3(...) __builtin_sve_reinterpret_s8_u64_x3(__VA_ARGS__)
+#define svreinterpret_s8_f16_x3(...) __builtin_sve_reinterpret_s8_f16_x3(__VA_ARGS__)
+#define svreinterpret_s8_bf16_x3(...) __builtin_sve_reinterpret_s8_bf16_x3(__VA_ARGS__)
+#define svreinterpret_s8_f32_x3(...) __builtin_sve_reinterpret_s8_f32_x3(__VA_ARGS__)
+#define svreinterpret_s8_f64_x3(...) __builtin_sve_reinterpret_s8_f64_x3(__VA_ARGS__)
+#define svreinterpret_u8_s8_x3(...) __builtin_sve_reinterpret_u8_s8_x3(__VA_ARGS__)
+#define svreinterpret_u8_u8_x3(...) __builtin_sve_reinterpret_u8_u8_x3(__VA_ARGS__)
+#define svreinterpret_u8_s16_x3(...) __builtin_sve_reinterpret_u8_s16_x3(__VA_ARGS__)
+#define svreinterpret_u8_u16_x3(...) __builtin_sve_reinterpret_u8_u16_x3(__VA_ARGS__)
+#define svreinterpret_u8_s32_x3(...) __builtin_sve_reinterpret_u8_s32_x3(__VA_ARGS__)
+#define svreinterpret_u8_u32_x3(...) __builtin_sve_reinterpret_u8_u32_x3(__VA_ARGS__)
+#define svreinterpret_u8_s64_x3(...) __builtin_sve_reinterpret_u8_s64_x3(__VA_ARGS__)
+#define svreinterpret_u8_u64_x3(...) __builtin_sve_reinterpret_u8_u64_x3(__VA_ARGS__)
+#define svreinterpret_u8_f16_x3(...) __builtin_sve_reinterpret_u8_f16_x3(__VA_ARGS__)
+#define svreinterpret_u8_bf16_x3(...) __builtin_sve_reinterpret_u8_bf16_x3(__VA_ARGS__)
+#define svreinterpret_u8_f32_x3(...) __builtin_sve_reinterpret_u8_f32_x3(__VA_ARGS__)
+#define svreinterpret_u8_f64_x3(...) __builtin_sve_reinterpret_u8_f64_x3(__VA_ARGS__)
+#define svreinterpret_s16_s8_x3(...) __builtin_sve_reinterpret_s16_s8_x3(__VA_ARGS__)
+#define svreinterpret_s16_u8_x3(...) __builtin_sve_reinterpret_s16_u8_x3(__VA_ARGS__)
+#define svreinterpret_s16_s16_x3(...) __builtin_sve_reinterpret_s16_s16_x3(__VA_ARGS__)
+#define svreinterpret_s16_u16_x3(...) __builtin_sve_reinterpret_s16_u16_x3(__VA_ARGS__)
+#define svreinterpret_s16_s32_x3(...) __builtin_sve_reinterpret_s16_s32_x3(__VA_ARGS__)
+#define svreinterpret_s16_u32_x3(...) __builtin_sve_reinterpret_s16_u32_x3(__VA_ARGS__)
+#define svreinterpret_s16_s64_x3(...) __builtin_sve_reinterpret_s16_s64_x3(__VA_ARGS__)
+#define svreinterpret_s16_u64_x3(...) __builtin_sve_reinterpret_s16_u64_x3(__VA_ARGS__)
+#define svreinterpret_s16_f16_x3(...) __builtin_sve_reinterpret_s16_f16_x3(__VA_ARGS__)
+#define svreinterpret_s16_bf16_x3(...) __builtin_sve_reinterpret_s16_bf16_x3(__VA_ARGS__)
+#define svreinterpret_s16_f32_x3(...) __builtin_sve_reinterpret_s16_f32_x3(__VA_ARGS__)
+#define svreinterpret_s16_f64_x3(...) __builtin_sve_reinterpret_s16_f64_x3(__VA_ARGS__)
+#define svreinterpret_u16_s8_x3(...) __builtin_sve_reinterpret_u16_s8_x3(__VA_ARGS__)
+#define svreinterpret_u16_u8_x3(...) __builtin_sve_reinterpret_u16_u8_x3(__VA_ARGS__)
+#define svreinterpret_u16_s16_x3(...) __builtin_sve_reinterpret_u16_s16_x3(__VA_ARGS__)
+#define svreinterpret_u16_u16_x3(...) __builtin_sve_reinterpret_u16_u16_x3(__VA_ARGS__)
+#define svreinterpret_u16_s32_x3(...) __builtin_sve_reinterpret_u16_s32_x3(__VA_ARGS__)
+#define svreinterpret_u16_u32_x3(...) __builtin_sve_reinterpret_u16_u32_x3(__VA_ARGS__)
+#define svreinterpret_u16_s64_x3(...) __builtin_sve_reinterpret_u16_s64_x3(__VA_ARGS__)
+#define svreinterpret_u16_u64_x3(...) __builtin_sve_reinterpret_u16_u64_x3(__VA_ARGS__)
+#define svreinterpret_u16_f16_x3(...) __builtin_sve_reinterpret_u16_f16_x3(__VA_ARGS__)
+#define svreinterpret_u16_bf16_x3(...) __builtin_sve_reinterpret_u16_bf16_x3(__VA_ARGS__)
+#define svreinterpret_u16_f32_x3(...) __builtin_sve_reinterpret_u16_f32_x3(__VA_ARGS__)
+#define svreinterpret_u16_f64_x3(...) __builtin_sve_reinterpret_u16_f64_x3(__VA_ARGS__)
+#define svreinterpret_s32_s8_x3(...) __builtin_sve_reinterpret_s32_s8_x3(__VA_ARGS__)
+#define svreinterpret_s32_u8_x3(...) __builtin_sve_reinterpret_s32_u8_x3(__VA_ARGS__)
+#define svreinterpret_s32_s16_x3(...) __builtin_sve_reinterpret_s32_s16_x3(__VA_ARGS__)
+#define svreinterpret_s32_u16_x3(...) __builtin_sve_reinterpret_s32_u16_x3(__VA_ARGS__)
+#define svreinterpret_s32_s32_x3(...) __builtin_sve_reinterpret_s32_s32_x3(__VA_ARGS__)
+#define svreinterpret_s32_u32_x3(...) __builtin_sve_reinterpret_s32_u32_x3(__VA_ARGS__)
+#define svreinterpret_s32_s64_x3(...) __builtin_sve_reinterpret_s32_s64_x3(__VA_ARGS__)
+#define svreinterpret_s32_u64_x3(...) __builtin_sve_reinterpret_s32_u64_x3(__VA_ARGS__)
+#define svreinterpret_s32_f16_x3(...) __builtin_sve_reinterpret_s32_f16_x3(__VA_ARGS__)
+#define svreinterpret_s32_bf16_x3(...) __builtin_sve_reinterpret_s32_bf16_x3(__VA_ARGS__)
+#define svreinterpret_s32_f32_x3(...) __builtin_sve_reinterpret_s32_f32_x3(__VA_ARGS__)
+#define svreinterpret_s32_f64_x3(...) __builtin_sve_reinterpret_s32_f64_x3(__VA_ARGS__)
+#define svreinterpret_u32_s8_x3(...) __builtin_sve_reinterpret_u32_s8_x3(__VA_ARGS__)
+#define svreinterpret_u32_u8_x3(...) __builtin_sve_reinterpret_u32_u8_x3(__VA_ARGS__)
+#define svreinterpret_u32_s16_x3(...) __builtin_sve_reinterpret_u32_s16_x3(__VA_ARGS__)
+#define svreinterpret_u32_u16_x3(...) __builtin_sve_reinterpret_u32_u16_x3(__VA_ARGS__)
+#define svreinterpret_u32_s32_x3(...) __builtin_sve_reinterpret_u32_s32_x3(__VA_ARGS__)
+#define svreinterpret_u32_u32_x3(...) __builtin_sve_reinterpret_u32_u32_x3(__VA_ARGS__)
+#define svreinterpret_u32_s64_x3(...) __builtin_sve_reinterpret_u32_s64_x3(__VA_ARGS__)
+#define svreinterpret_u32_u64_x3(...) __builtin_sve_reinterpret_u32_u64_x3(__VA_ARGS__)
+#define svreinterpret_u32_f16_x3(...) __builtin_sve_reinterpret_u32_f16_x3(__VA_ARGS__)
+#define svreinterpret_u32_bf16_x3(...) __builtin_sve_reinterpret_u32_bf16_x3(__VA_ARGS__)
+#define svreinterpret_u32_f32_x3(...) __builtin_sve_reinterpret_u32_f32_x3(__VA_ARGS__)
+#define svreinterpret_u32_f64_x3(...) __builtin_sve_reinterpret_u32_f64_x3(__VA_ARGS__)
+#define svreinterpret_s64_s8_x3(...) __builtin_sve_reinterpret_s64_s8_x3(__VA_ARGS__)
+#define svreinterpret_s64_u8_x3(...) __builtin_sve_reinterpret_s64_u8_x3(__VA_ARGS__)
+#define svreinterpret_s64_s16_x3(...) __builtin_sve_reinterpret_s64_s16_x3(__VA_ARGS__)
+#define svreinterpret_s64_u16_x3(...) __builtin_sve_reinterpret_s64_u16_x3(__VA_ARGS__)
+#define svreinterpret_s64_s32_x3(...) __builtin_sve_reinterpret_s64_s32_x3(__VA_ARGS__)
+#define svreinterpret_s64_u32_x3(...) __builtin_sve_reinterpret_s64_u32_x3(__VA_ARGS__)
+#define svreinterpret_s64_s64_x3(...) __builtin_sve_reinterpret_s64_s64_x3(__VA_ARGS__)
+#define svreinterpret_s64_u64_x3(...) __builtin_sve_reinterpret_s64_u64_x3(__VA_ARGS__)
+#define svreinterpret_s64_f16_x3(...) __builtin_sve_reinterpret_s64_f16_x3(__VA_ARGS__)
+#define svreinterpret_s64_bf16_x3(...) __builtin_sve_reinterpret_s64_bf16_x3(__VA_ARGS__)
+#define svreinterpret_s64_f32_x3(...) __builtin_sve_reinterpret_s64_f32_x3(__VA_ARGS__)
+#define svreinterpret_s64_f64_x3(...) __builtin_sve_reinterpret_s64_f64_x3(__VA_ARGS__)
+#define svreinterpret_u64_s8_x3(...) __builtin_sve_reinterpret_u64_s8_x3(__VA_ARGS__)
+#define svreinterpret_u64_u8_x3(...) __builtin_sve_reinterpret_u64_u8_x3(__VA_ARGS__)
+#define svreinterpret_u64_s16_x3(...) __builtin_sve_reinterpret_u64_s16_x3(__VA_ARGS__)
+#define svreinterpret_u64_u16_x3(...) __builtin_sve_reinterpret_u64_u16_x3(__VA_ARGS__)
+#define svreinterpret_u64_s32_x3(...) __builtin_sve_reinterpret_u64_s32_x3(__VA_ARGS__)
+#define svreinterpret_u64_u32_x3(...) __builtin_sve_reinterpret_u64_u32_x3(__VA_ARGS__)
+#define svreinterpret_u64_s64_x3(...) __builtin_sve_reinterpret_u64_s64_x3(__VA_ARGS__)
+#define svreinterpret_u64_u64_x3(...) __builtin_sve_reinterpret_u64_u64_x3(__VA_ARGS__)
+#define svreinterpret_u64_f16_x3(...) __builtin_sve_reinterpret_u64_f16_x3(__VA_ARGS__)
+#define svreinterpret_u64_bf16_x3(...) __builtin_sve_reinterpret_u64_bf16_x3(__VA_ARGS__)
+#define svreinterpret_u64_f32_x3(...) __builtin_sve_reinterpret_u64_f32_x3(__VA_ARGS__)
+#define svreinterpret_u64_f64_x3(...) __builtin_sve_reinterpret_u64_f64_x3(__VA_ARGS__)
+#define svreinterpret_f16_s8_x3(...) __builtin_sve_reinterpret_f16_s8_x3(__VA_ARGS__)
+#define svreinterpret_f16_u8_x3(...) __builtin_sve_reinterpret_f16_u8_x3(__VA_ARGS__)
+#define svreinterpret_f16_s16_x3(...) __builtin_sve_reinterpret_f16_s16_x3(__VA_ARGS__)
+#define svreinterpret_f16_u16_x3(...) __builtin_sve_reinterpret_f16_u16_x3(__VA_ARGS__)
+#define svreinterpret_f16_s32_x3(...) __builtin_sve_reinterpret_f16_s32_x3(__VA_ARGS__)
+#define svreinterpret_f16_u32_x3(...) __builtin_sve_reinterpret_f16_u32_x3(__VA_ARGS__)
+#define svreinterpret_f16_s64_x3(...) __builtin_sve_reinterpret_f16_s64_x3(__VA_ARGS__)
+#define svreinterpret_f16_u64_x3(...) __builtin_sve_reinterpret_f16_u64_x3(__VA_ARGS__)
+#define svreinterpret_f16_f16_x3(...) __builtin_sve_reinterpret_f16_f16_x3(__VA_ARGS__)
+#define svreinterpret_f16_bf16_x3(...) __builtin_sve_reinterpret_f16_bf16_x3(__VA_ARGS__)
+#define svreinterpret_f16_f32_x3(...) __builtin_sve_reinterpret_f16_f32_x3(__VA_ARGS__)
+#define svreinterpret_f16_f64_x3(...) __builtin_sve_reinterpret_f16_f64_x3(__VA_ARGS__)
+#define svreinterpret_bf16_s8_x3(...) __builtin_sve_reinterpret_bf16_s8_x3(__VA_ARGS__)
+#define svreinterpret_bf16_u8_x3(...) __builtin_sve_reinterpret_bf16_u8_x3(__VA_ARGS__)
+#define svreinterpret_bf16_s16_x3(...) __builtin_sve_reinterpret_bf16_s16_x3(__VA_ARGS__)
+#define svreinterpret_bf16_u16_x3(...) __builtin_sve_reinterpret_bf16_u16_x3(__VA_ARGS__)
+#define svreinterpret_bf16_s32_x3(...) __builtin_sve_reinterpret_bf16_s32_x3(__VA_ARGS__)
+#define svreinterpret_bf16_u32_x3(...) __builtin_sve_reinterpret_bf16_u32_x3(__VA_ARGS__)
+#define svreinterpret_bf16_s64_x3(...) __builtin_sve_reinterpret_bf16_s64_x3(__VA_ARGS__)
+#define svreinterpret_bf16_u64_x3(...) __builtin_sve_reinterpret_bf16_u64_x3(__VA_ARGS__)
+#define svreinterpret_bf16_f16_x3(...) __builtin_sve_reinterpret_bf16_f16_x3(__VA_ARGS__)
+#define svreinterpret_bf16_bf16_x3(...) __builtin_sve_reinterpret_bf16_bf16_x3(__VA_ARGS__)
+#define svreinterpret_bf16_f32_x3(...) __builtin_sve_reinterpret_bf16_f32_x3(__VA_ARGS__)
+#define svreinterpret_bf16_f64_x3(...) __builtin_sve_reinterpret_bf16_f64_x3(__VA_ARGS__)
+#define svreinterpret_f32_s8_x3(...) __builtin_sve_reinterpret_f32_s8_x3(__VA_ARGS__)
+#define svreinterpret_f32_u8_x3(...) __builtin_sve_reinterpret_f32_u8_x3(__VA_ARGS__)
+#define svreinterpret_f32_s16_x3(...) __builtin_sve_reinterpret_f32_s16_x3(__VA_ARGS__)
+#define svreinterpret_f32_u16_x3(...) __builtin_sve_reinterpret_f32_u16_x3(__VA_ARGS__)
+#define svreinterpret_f32_s32_x3(...) __builtin_sve_reinterpret_f32_s32_x3(__VA_ARGS__)
+#define svreinterpret_f32_u32_x3(...) __builtin_sve_reinterpret_f32_u32_x3(__VA_ARGS__)
+#define svreinterpret_f32_s64_x3(...) __builtin_sve_reinterpret_f32_s64_x3(__VA_ARGS__)
+#define svreinterpret_f32_u64_x3(...) __builtin_sve_reinterpret_f32_u64_x3(__VA_ARGS__)
+#define svreinterpret_f32_f16_x3(...) __builtin_sve_reinterpret_f32_f16_x3(__VA_ARGS__)
+#define svreinterpret_f32_bf16_x3(...) __builtin_sve_reinterpret_f32_bf16_x3(__VA_ARGS__)
+#define svreinterpret_f32_f32_x3(...) __builtin_sve_reinterpret_f32_f32_x3(__VA_ARGS__)
+#define svreinterpret_f32_f64_x3(...) __builtin_sve_reinterpret_f32_f64_x3(__VA_ARGS__)
+#define svreinterpret_f64_s8_x3(...) __builtin_sve_reinterpret_f64_s8_x3(__VA_ARGS__)
+#define svreinterpret_f64_u8_x3(...) __builtin_sve_reinterpret_f64_u8_x3(__VA_ARGS__)
+#define svreinterpret_f64_s16_x3(...) __builtin_sve_reinterpret_f64_s16_x3(__VA_ARGS__)
+#define svreinterpret_f64_u16_x3(...) __builtin_sve_reinterpret_f64_u16_x3(__VA_ARGS__)
+#define svreinterpret_f64_s32_x3(...) __builtin_sve_reinterpret_f64_s32_x3(__VA_ARGS__)
+#define svreinterpret_f64_u32_x3(...) __builtin_sve_reinterpret_f64_u32_x3(__VA_ARGS__)
+#define svreinterpret_f64_s64_x3(...) __builtin_sve_reinterpret_f64_s64_x3(__VA_ARGS__)
+#define svreinterpret_f64_u64_x3(...) __builtin_sve_reinterpret_f64_u64_x3(__VA_ARGS__)
+#define svreinterpret_f64_f16_x3(...) __builtin_sve_reinterpret_f64_f16_x3(__VA_ARGS__)
+#define svreinterpret_f64_bf16_x3(...) __builtin_sve_reinterpret_f64_bf16_x3(__VA_ARGS__)
+#define svreinterpret_f64_f32_x3(...) __builtin_sve_reinterpret_f64_f32_x3(__VA_ARGS__)
+#define svreinterpret_f64_f64_x3(...) __builtin_sve_reinterpret_f64_f64_x3(__VA_ARGS__)
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_bf16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_bf16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_bf16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_bf16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_bf16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_bf16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_bf16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f64_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u8_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u16_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s32_x3(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u32_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint32_t op) {
- return __builtin_sve_reinterpret_s8_s32(op);
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s64_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint64_t op) {
- return __builtin_sve_reinterpret_s8_s64(op);
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u64_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint8_t op) {
- return __builtin_sve_reinterpret_s8_u8(op);
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f16_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint16_t op) {
- return __builtin_sve_reinterpret_s8_u16(op);
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_bf16_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint32_t op) {
- return __builtin_sve_reinterpret_s8_u32(op);
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f32_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint64_t op) {
- return __builtin_sve_reinterpret_s8_u64(op);
+__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f64_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat16_t op) {
- return __builtin_sve_reinterpret_s8_f16(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s8_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svbfloat16_t op) {
- return __builtin_sve_reinterpret_s8_bf16(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u8_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat32_t op) {
- return __builtin_sve_reinterpret_s8_f32(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s16_x3(op);
}
-__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat64_t op) {
- return __builtin_sve_reinterpret_s8_f64(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u16_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint8_t op) {
- return __builtin_sve_reinterpret_s16_s8(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s32_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint16_t op) {
- return __builtin_sve_reinterpret_s16_s16(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u32_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint32_t op) {
- return __builtin_sve_reinterpret_s16_s32(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s64_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint64_t op) {
- return __builtin_sve_reinterpret_s16_s64(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u64_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint8_t op) {
- return __builtin_sve_reinterpret_s16_u8(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f16_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint16_t op) {
- return __builtin_sve_reinterpret_s16_u16(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_bf16_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint32_t op) {
- return __builtin_sve_reinterpret_s16_u32(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f32_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint64_t op) {
- return __builtin_sve_reinterpret_s16_u64(op);
+__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f64_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat16_t op) {
- return __builtin_sve_reinterpret_s16_f16(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s8_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svbfloat16_t op) {
- return __builtin_sve_reinterpret_s16_bf16(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u8_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat32_t op) {
- return __builtin_sve_reinterpret_s16_f32(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s16_x3(op);
}
-__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat64_t op) {
- return __builtin_sve_reinterpret_s16_f64(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u16_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint8_t op) {
- return __builtin_sve_reinterpret_s32_s8(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s32_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint16_t op) {
- return __builtin_sve_reinterpret_s32_s16(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u32_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint32_t op) {
- return __builtin_sve_reinterpret_s32_s32(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s64_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint64_t op) {
- return __builtin_sve_reinterpret_s32_s64(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u64_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint8_t op) {
- return __builtin_sve_reinterpret_s32_u8(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f16_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint16_t op) {
- return __builtin_sve_reinterpret_s32_u16(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_bf16_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint32_t op) {
- return __builtin_sve_reinterpret_s32_u32(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f32_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint64_t op) {
- return __builtin_sve_reinterpret_s32_u64(op);
+__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f64_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat16_t op) {
- return __builtin_sve_reinterpret_s32_f16(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s8_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svbfloat16_t op) {
- return __builtin_sve_reinterpret_s32_bf16(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u8_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat32_t op) {
- return __builtin_sve_reinterpret_s32_f32(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s16_x3(op);
}
-__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat64_t op) {
- return __builtin_sve_reinterpret_s32_f64(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u16_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint8_t op) {
- return __builtin_sve_reinterpret_s64_s8(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s32_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint16_t op) {
- return __builtin_sve_reinterpret_s64_s16(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u32_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint32_t op) {
- return __builtin_sve_reinterpret_s64_s32(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s64_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint64_t op) {
- return __builtin_sve_reinterpret_s64_s64(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u64_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint8_t op) {
- return __builtin_sve_reinterpret_s64_u8(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f16_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint16_t op) {
- return __builtin_sve_reinterpret_s64_u16(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_bf16_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint32_t op) {
- return __builtin_sve_reinterpret_s64_u32(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f32_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint64_t op) {
- return __builtin_sve_reinterpret_s64_u64(op);
+__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f64_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat16_t op) {
- return __builtin_sve_reinterpret_s64_f16(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s8_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svbfloat16_t op) {
- return __builtin_sve_reinterpret_s64_bf16(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint8x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u8_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat32_t op) {
- return __builtin_sve_reinterpret_s64_f32(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s16_x3(op);
}
-__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat64_t op) {
- return __builtin_sve_reinterpret_s64_f64(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u16_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint8_t op) {
- return __builtin_sve_reinterpret_u8_s8(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s32_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint16_t op) {
- return __builtin_sve_reinterpret_u8_s16(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u32_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint32_t op) {
- return __builtin_sve_reinterpret_u8_s32(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s64_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint64_t op) {
- return __builtin_sve_reinterpret_u8_s64(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u64_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint8_t op) {
- return __builtin_sve_reinterpret_u8_u8(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f16_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint16_t op) {
- return __builtin_sve_reinterpret_u8_u16(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svbfloat16x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_bf16_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint32_t op) {
- return __builtin_sve_reinterpret_u8_u32(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat32x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f32_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint64_t op) {
- return __builtin_sve_reinterpret_u8_u64(op);
+__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat64x3_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f64_x3(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat16_t op) {
- return __builtin_sve_reinterpret_u8_f16(op);
+#define svreinterpret_s8_s8_x4(...) __builtin_sve_reinterpret_s8_s8_x4(__VA_ARGS__)
+#define svreinterpret_s8_u8_x4(...) __builtin_sve_reinterpret_s8_u8_x4(__VA_ARGS__)
+#define svreinterpret_s8_s16_x4(...) __builtin_sve_reinterpret_s8_s16_x4(__VA_ARGS__)
+#define svreinterpret_s8_u16_x4(...) __builtin_sve_reinterpret_s8_u16_x4(__VA_ARGS__)
+#define svreinterpret_s8_s32_x4(...) __builtin_sve_reinterpret_s8_s32_x4(__VA_ARGS__)
+#define svreinterpret_s8_u32_x4(...) __builtin_sve_reinterpret_s8_u32_x4(__VA_ARGS__)
+#define svreinterpret_s8_s64_x4(...) __builtin_sve_reinterpret_s8_s64_x4(__VA_ARGS__)
+#define svreinterpret_s8_u64_x4(...) __builtin_sve_reinterpret_s8_u64_x4(__VA_ARGS__)
+#define svreinterpret_s8_f16_x4(...) __builtin_sve_reinterpret_s8_f16_x4(__VA_ARGS__)
+#define svreinterpret_s8_bf16_x4(...) __builtin_sve_reinterpret_s8_bf16_x4(__VA_ARGS__)
+#define svreinterpret_s8_f32_x4(...) __builtin_sve_reinterpret_s8_f32_x4(__VA_ARGS__)
+#define svreinterpret_s8_f64_x4(...) __builtin_sve_reinterpret_s8_f64_x4(__VA_ARGS__)
+#define svreinterpret_u8_s8_x4(...) __builtin_sve_reinterpret_u8_s8_x4(__VA_ARGS__)
+#define svreinterpret_u8_u8_x4(...) __builtin_sve_reinterpret_u8_u8_x4(__VA_ARGS__)
+#define svreinterpret_u8_s16_x4(...) __builtin_sve_reinterpret_u8_s16_x4(__VA_ARGS__)
+#define svreinterpret_u8_u16_x4(...) __builtin_sve_reinterpret_u8_u16_x4(__VA_ARGS__)
+#define svreinterpret_u8_s32_x4(...) __builtin_sve_reinterpret_u8_s32_x4(__VA_ARGS__)
+#define svreinterpret_u8_u32_x4(...) __builtin_sve_reinterpret_u8_u32_x4(__VA_ARGS__)
+#define svreinterpret_u8_s64_x4(...) __builtin_sve_reinterpret_u8_s64_x4(__VA_ARGS__)
+#define svreinterpret_u8_u64_x4(...) __builtin_sve_reinterpret_u8_u64_x4(__VA_ARGS__)
+#define svreinterpret_u8_f16_x4(...) __builtin_sve_reinterpret_u8_f16_x4(__VA_ARGS__)
+#define svreinterpret_u8_bf16_x4(...) __builtin_sve_reinterpret_u8_bf16_x4(__VA_ARGS__)
+#define svreinterpret_u8_f32_x4(...) __builtin_sve_reinterpret_u8_f32_x4(__VA_ARGS__)
+#define svreinterpret_u8_f64_x4(...) __builtin_sve_reinterpret_u8_f64_x4(__VA_ARGS__)
+#define svreinterpret_s16_s8_x4(...) __builtin_sve_reinterpret_s16_s8_x4(__VA_ARGS__)
+#define svreinterpret_s16_u8_x4(...) __builtin_sve_reinterpret_s16_u8_x4(__VA_ARGS__)
+#define svreinterpret_s16_s16_x4(...) __builtin_sve_reinterpret_s16_s16_x4(__VA_ARGS__)
+#define svreinterpret_s16_u16_x4(...) __builtin_sve_reinterpret_s16_u16_x4(__VA_ARGS__)
+#define svreinterpret_s16_s32_x4(...) __builtin_sve_reinterpret_s16_s32_x4(__VA_ARGS__)
+#define svreinterpret_s16_u32_x4(...) __builtin_sve_reinterpret_s16_u32_x4(__VA_ARGS__)
+#define svreinterpret_s16_s64_x4(...) __builtin_sve_reinterpret_s16_s64_x4(__VA_ARGS__)
+#define svreinterpret_s16_u64_x4(...) __builtin_sve_reinterpret_s16_u64_x4(__VA_ARGS__)
+#define svreinterpret_s16_f16_x4(...) __builtin_sve_reinterpret_s16_f16_x4(__VA_ARGS__)
+#define svreinterpret_s16_bf16_x4(...) __builtin_sve_reinterpret_s16_bf16_x4(__VA_ARGS__)
+#define svreinterpret_s16_f32_x4(...) __builtin_sve_reinterpret_s16_f32_x4(__VA_ARGS__)
+#define svreinterpret_s16_f64_x4(...) __builtin_sve_reinterpret_s16_f64_x4(__VA_ARGS__)
+#define svreinterpret_u16_s8_x4(...) __builtin_sve_reinterpret_u16_s8_x4(__VA_ARGS__)
+#define svreinterpret_u16_u8_x4(...) __builtin_sve_reinterpret_u16_u8_x4(__VA_ARGS__)
+#define svreinterpret_u16_s16_x4(...) __builtin_sve_reinterpret_u16_s16_x4(__VA_ARGS__)
+#define svreinterpret_u16_u16_x4(...) __builtin_sve_reinterpret_u16_u16_x4(__VA_ARGS__)
+#define svreinterpret_u16_s32_x4(...) __builtin_sve_reinterpret_u16_s32_x4(__VA_ARGS__)
+#define svreinterpret_u16_u32_x4(...) __builtin_sve_reinterpret_u16_u32_x4(__VA_ARGS__)
+#define svreinterpret_u16_s64_x4(...) __builtin_sve_reinterpret_u16_s64_x4(__VA_ARGS__)
+#define svreinterpret_u16_u64_x4(...) __builtin_sve_reinterpret_u16_u64_x4(__VA_ARGS__)
+#define svreinterpret_u16_f16_x4(...) __builtin_sve_reinterpret_u16_f16_x4(__VA_ARGS__)
+#define svreinterpret_u16_bf16_x4(...) __builtin_sve_reinterpret_u16_bf16_x4(__VA_ARGS__)
+#define svreinterpret_u16_f32_x4(...) __builtin_sve_reinterpret_u16_f32_x4(__VA_ARGS__)
+#define svreinterpret_u16_f64_x4(...) __builtin_sve_reinterpret_u16_f64_x4(__VA_ARGS__)
+#define svreinterpret_s32_s8_x4(...) __builtin_sve_reinterpret_s32_s8_x4(__VA_ARGS__)
+#define svreinterpret_s32_u8_x4(...) __builtin_sve_reinterpret_s32_u8_x4(__VA_ARGS__)
+#define svreinterpret_s32_s16_x4(...) __builtin_sve_reinterpret_s32_s16_x4(__VA_ARGS__)
+#define svreinterpret_s32_u16_x4(...) __builtin_sve_reinterpret_s32_u16_x4(__VA_ARGS__)
+#define svreinterpret_s32_s32_x4(...) __builtin_sve_reinterpret_s32_s32_x4(__VA_ARGS__)
+#define svreinterpret_s32_u32_x4(...) __builtin_sve_reinterpret_s32_u32_x4(__VA_ARGS__)
+#define svreinterpret_s32_s64_x4(...) __builtin_sve_reinterpret_s32_s64_x4(__VA_ARGS__)
+#define svreinterpret_s32_u64_x4(...) __builtin_sve_reinterpret_s32_u64_x4(__VA_ARGS__)
+#define svreinterpret_s32_f16_x4(...) __builtin_sve_reinterpret_s32_f16_x4(__VA_ARGS__)
+#define svreinterpret_s32_bf16_x4(...) __builtin_sve_reinterpret_s32_bf16_x4(__VA_ARGS__)
+#define svreinterpret_s32_f32_x4(...) __builtin_sve_reinterpret_s32_f32_x4(__VA_ARGS__)
+#define svreinterpret_s32_f64_x4(...) __builtin_sve_reinterpret_s32_f64_x4(__VA_ARGS__)
+#define svreinterpret_u32_s8_x4(...) __builtin_sve_reinterpret_u32_s8_x4(__VA_ARGS__)
+#define svreinterpret_u32_u8_x4(...) __builtin_sve_reinterpret_u32_u8_x4(__VA_ARGS__)
+#define svreinterpret_u32_s16_x4(...) __builtin_sve_reinterpret_u32_s16_x4(__VA_ARGS__)
+#define svreinterpret_u32_u16_x4(...) __builtin_sve_reinterpret_u32_u16_x4(__VA_ARGS__)
+#define svreinterpret_u32_s32_x4(...) __builtin_sve_reinterpret_u32_s32_x4(__VA_ARGS__)
+#define svreinterpret_u32_u32_x4(...) __builtin_sve_reinterpret_u32_u32_x4(__VA_ARGS__)
+#define svreinterpret_u32_s64_x4(...) __builtin_sve_reinterpret_u32_s64_x4(__VA_ARGS__)
+#define svreinterpret_u32_u64_x4(...) __builtin_sve_reinterpret_u32_u64_x4(__VA_ARGS__)
+#define svreinterpret_u32_f16_x4(...) __builtin_sve_reinterpret_u32_f16_x4(__VA_ARGS__)
+#define svreinterpret_u32_bf16_x4(...) __builtin_sve_reinterpret_u32_bf16_x4(__VA_ARGS__)
+#define svreinterpret_u32_f32_x4(...) __builtin_sve_reinterpret_u32_f32_x4(__VA_ARGS__)
+#define svreinterpret_u32_f64_x4(...) __builtin_sve_reinterpret_u32_f64_x4(__VA_ARGS__)
+#define svreinterpret_s64_s8_x4(...) __builtin_sve_reinterpret_s64_s8_x4(__VA_ARGS__)
+#define svreinterpret_s64_u8_x4(...) __builtin_sve_reinterpret_s64_u8_x4(__VA_ARGS__)
+#define svreinterpret_s64_s16_x4(...) __builtin_sve_reinterpret_s64_s16_x4(__VA_ARGS__)
+#define svreinterpret_s64_u16_x4(...) __builtin_sve_reinterpret_s64_u16_x4(__VA_ARGS__)
+#define svreinterpret_s64_s32_x4(...) __builtin_sve_reinterpret_s64_s32_x4(__VA_ARGS__)
+#define svreinterpret_s64_u32_x4(...) __builtin_sve_reinterpret_s64_u32_x4(__VA_ARGS__)
+#define svreinterpret_s64_s64_x4(...) __builtin_sve_reinterpret_s64_s64_x4(__VA_ARGS__)
+#define svreinterpret_s64_u64_x4(...) __builtin_sve_reinterpret_s64_u64_x4(__VA_ARGS__)
+#define svreinterpret_s64_f16_x4(...) __builtin_sve_reinterpret_s64_f16_x4(__VA_ARGS__)
+#define svreinterpret_s64_bf16_x4(...) __builtin_sve_reinterpret_s64_bf16_x4(__VA_ARGS__)
+#define svreinterpret_s64_f32_x4(...) __builtin_sve_reinterpret_s64_f32_x4(__VA_ARGS__)
+#define svreinterpret_s64_f64_x4(...) __builtin_sve_reinterpret_s64_f64_x4(__VA_ARGS__)
+#define svreinterpret_u64_s8_x4(...) __builtin_sve_reinterpret_u64_s8_x4(__VA_ARGS__)
+#define svreinterpret_u64_u8_x4(...) __builtin_sve_reinterpret_u64_u8_x4(__VA_ARGS__)
+#define svreinterpret_u64_s16_x4(...) __builtin_sve_reinterpret_u64_s16_x4(__VA_ARGS__)
+#define svreinterpret_u64_u16_x4(...) __builtin_sve_reinterpret_u64_u16_x4(__VA_ARGS__)
+#define svreinterpret_u64_s32_x4(...) __builtin_sve_reinterpret_u64_s32_x4(__VA_ARGS__)
+#define svreinterpret_u64_u32_x4(...) __builtin_sve_reinterpret_u64_u32_x4(__VA_ARGS__)
+#define svreinterpret_u64_s64_x4(...) __builtin_sve_reinterpret_u64_s64_x4(__VA_ARGS__)
+#define svreinterpret_u64_u64_x4(...) __builtin_sve_reinterpret_u64_u64_x4(__VA_ARGS__)
+#define svreinterpret_u64_f16_x4(...) __builtin_sve_reinterpret_u64_f16_x4(__VA_ARGS__)
+#define svreinterpret_u64_bf16_x4(...) __builtin_sve_reinterpret_u64_bf16_x4(__VA_ARGS__)
+#define svreinterpret_u64_f32_x4(...) __builtin_sve_reinterpret_u64_f32_x4(__VA_ARGS__)
+#define svreinterpret_u64_f64_x4(...) __builtin_sve_reinterpret_u64_f64_x4(__VA_ARGS__)
+#define svreinterpret_f16_s8_x4(...) __builtin_sve_reinterpret_f16_s8_x4(__VA_ARGS__)
+#define svreinterpret_f16_u8_x4(...) __builtin_sve_reinterpret_f16_u8_x4(__VA_ARGS__)
+#define svreinterpret_f16_s16_x4(...) __builtin_sve_reinterpret_f16_s16_x4(__VA_ARGS__)
+#define svreinterpret_f16_u16_x4(...) __builtin_sve_reinterpret_f16_u16_x4(__VA_ARGS__)
+#define svreinterpret_f16_s32_x4(...) __builtin_sve_reinterpret_f16_s32_x4(__VA_ARGS__)
+#define svreinterpret_f16_u32_x4(...) __builtin_sve_reinterpret_f16_u32_x4(__VA_ARGS__)
+#define svreinterpret_f16_s64_x4(...) __builtin_sve_reinterpret_f16_s64_x4(__VA_ARGS__)
+#define svreinterpret_f16_u64_x4(...) __builtin_sve_reinterpret_f16_u64_x4(__VA_ARGS__)
+#define svreinterpret_f16_f16_x4(...) __builtin_sve_reinterpret_f16_f16_x4(__VA_ARGS__)
+#define svreinterpret_f16_bf16_x4(...) __builtin_sve_reinterpret_f16_bf16_x4(__VA_ARGS__)
+#define svreinterpret_f16_f32_x4(...) __builtin_sve_reinterpret_f16_f32_x4(__VA_ARGS__)
+#define svreinterpret_f16_f64_x4(...) __builtin_sve_reinterpret_f16_f64_x4(__VA_ARGS__)
+#define svreinterpret_bf16_s8_x4(...) __builtin_sve_reinterpret_bf16_s8_x4(__VA_ARGS__)
+#define svreinterpret_bf16_u8_x4(...) __builtin_sve_reinterpret_bf16_u8_x4(__VA_ARGS__)
+#define svreinterpret_bf16_s16_x4(...) __builtin_sve_reinterpret_bf16_s16_x4(__VA_ARGS__)
+#define svreinterpret_bf16_u16_x4(...) __builtin_sve_reinterpret_bf16_u16_x4(__VA_ARGS__)
+#define svreinterpret_bf16_s32_x4(...) __builtin_sve_reinterpret_bf16_s32_x4(__VA_ARGS__)
+#define svreinterpret_bf16_u32_x4(...) __builtin_sve_reinterpret_bf16_u32_x4(__VA_ARGS__)
+#define svreinterpret_bf16_s64_x4(...) __builtin_sve_reinterpret_bf16_s64_x4(__VA_ARGS__)
+#define svreinterpret_bf16_u64_x4(...) __builtin_sve_reinterpret_bf16_u64_x4(__VA_ARGS__)
+#define svreinterpret_bf16_f16_x4(...) __builtin_sve_reinterpret_bf16_f16_x4(__VA_ARGS__)
+#define svreinterpret_bf16_bf16_x4(...) __builtin_sve_reinterpret_bf16_bf16_x4(__VA_ARGS__)
+#define svreinterpret_bf16_f32_x4(...) __builtin_sve_reinterpret_bf16_f32_x4(__VA_ARGS__)
+#define svreinterpret_bf16_f64_x4(...) __builtin_sve_reinterpret_bf16_f64_x4(__VA_ARGS__)
+#define svreinterpret_f32_s8_x4(...) __builtin_sve_reinterpret_f32_s8_x4(__VA_ARGS__)
+#define svreinterpret_f32_u8_x4(...) __builtin_sve_reinterpret_f32_u8_x4(__VA_ARGS__)
+#define svreinterpret_f32_s16_x4(...) __builtin_sve_reinterpret_f32_s16_x4(__VA_ARGS__)
+#define svreinterpret_f32_u16_x4(...) __builtin_sve_reinterpret_f32_u16_x4(__VA_ARGS__)
+#define svreinterpret_f32_s32_x4(...) __builtin_sve_reinterpret_f32_s32_x4(__VA_ARGS__)
+#define svreinterpret_f32_u32_x4(...) __builtin_sve_reinterpret_f32_u32_x4(__VA_ARGS__)
+#define svreinterpret_f32_s64_x4(...) __builtin_sve_reinterpret_f32_s64_x4(__VA_ARGS__)
+#define svreinterpret_f32_u64_x4(...) __builtin_sve_reinterpret_f32_u64_x4(__VA_ARGS__)
+#define svreinterpret_f32_f16_x4(...) __builtin_sve_reinterpret_f32_f16_x4(__VA_ARGS__)
+#define svreinterpret_f32_bf16_x4(...) __builtin_sve_reinterpret_f32_bf16_x4(__VA_ARGS__)
+#define svreinterpret_f32_f32_x4(...) __builtin_sve_reinterpret_f32_f32_x4(__VA_ARGS__)
+#define svreinterpret_f32_f64_x4(...) __builtin_sve_reinterpret_f32_f64_x4(__VA_ARGS__)
+#define svreinterpret_f64_s8_x4(...) __builtin_sve_reinterpret_f64_s8_x4(__VA_ARGS__)
+#define svreinterpret_f64_u8_x4(...) __builtin_sve_reinterpret_f64_u8_x4(__VA_ARGS__)
+#define svreinterpret_f64_s16_x4(...) __builtin_sve_reinterpret_f64_s16_x4(__VA_ARGS__)
+#define svreinterpret_f64_u16_x4(...) __builtin_sve_reinterpret_f64_u16_x4(__VA_ARGS__)
+#define svreinterpret_f64_s32_x4(...) __builtin_sve_reinterpret_f64_s32_x4(__VA_ARGS__)
+#define svreinterpret_f64_u32_x4(...) __builtin_sve_reinterpret_f64_u32_x4(__VA_ARGS__)
+#define svreinterpret_f64_s64_x4(...) __builtin_sve_reinterpret_f64_s64_x4(__VA_ARGS__)
+#define svreinterpret_f64_u64_x4(...) __builtin_sve_reinterpret_f64_u64_x4(__VA_ARGS__)
+#define svreinterpret_f64_f16_x4(...) __builtin_sve_reinterpret_f64_f16_x4(__VA_ARGS__)
+#define svreinterpret_f64_bf16_x4(...) __builtin_sve_reinterpret_f64_bf16_x4(__VA_ARGS__)
+#define svreinterpret_f64_f32_x4(...) __builtin_sve_reinterpret_f64_f32_x4(__VA_ARGS__)
+#define svreinterpret_f64_f64_x4(...) __builtin_sve_reinterpret_f64_f64_x4(__VA_ARGS__)
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svbfloat16_t op) {
- return __builtin_sve_reinterpret_u8_bf16(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat32_t op) {
- return __builtin_sve_reinterpret_u8_f32(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat64_t op) {
- return __builtin_sve_reinterpret_u8_f64(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint8_t op) {
- return __builtin_sve_reinterpret_u16_s8(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint16_t op) {
- return __builtin_sve_reinterpret_u16_s16(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint32_t op) {
- return __builtin_sve_reinterpret_u16_s32(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_s64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint64_t op) {
- return __builtin_sve_reinterpret_u16_s64(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_u64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint8_t op) {
- return __builtin_sve_reinterpret_u16_u8(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint16_t op) {
- return __builtin_sve_reinterpret_u16_u16(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_bf16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint32_t op) {
- return __builtin_sve_reinterpret_u16_u32(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint64_t op) {
- return __builtin_sve_reinterpret_u16_u64(op);
+__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s8_f64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat16_t op) {
- return __builtin_sve_reinterpret_u16_f16(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svbfloat16_t op) {
- return __builtin_sve_reinterpret_u16_bf16(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat32_t op) {
- return __builtin_sve_reinterpret_u16_f32(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat64_t op) {
- return __builtin_sve_reinterpret_u16_f64(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint8_t op) {
- return __builtin_sve_reinterpret_u32_s8(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint16_t op) {
- return __builtin_sve_reinterpret_u32_s16(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint32_t op) {
- return __builtin_sve_reinterpret_u32_s32(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_s64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint64_t op) {
- return __builtin_sve_reinterpret_u32_s64(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_u64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint8_t op) {
- return __builtin_sve_reinterpret_u32_u8(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint16_t op) {
- return __builtin_sve_reinterpret_u32_u16(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_bf16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint32_t op) {
- return __builtin_sve_reinterpret_u32_u32(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint64_t op) {
- return __builtin_sve_reinterpret_u32_u64(op);
+__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u8_f64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat16_t op) {
- return __builtin_sve_reinterpret_u32_f16(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svbfloat16_t op) {
- return __builtin_sve_reinterpret_u32_bf16(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat32_t op) {
- return __builtin_sve_reinterpret_u32_f32(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat64_t op) {
- return __builtin_sve_reinterpret_u32_f64(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint8_t op) {
- return __builtin_sve_reinterpret_u64_s8(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint16_t op) {
- return __builtin_sve_reinterpret_u64_s16(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint32_t op) {
- return __builtin_sve_reinterpret_u64_s32(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_s64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint64_t op) {
- return __builtin_sve_reinterpret_u64_s64(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_u64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint8_t op) {
- return __builtin_sve_reinterpret_u64_u8(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint16_t op) {
- return __builtin_sve_reinterpret_u64_u16(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_bf16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint32_t op) {
- return __builtin_sve_reinterpret_u64_u32(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f32_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint64_t op) {
- return __builtin_sve_reinterpret_u64_u64(op);
+__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s16_f64_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat16_t op) {
- return __builtin_sve_reinterpret_u64_f16(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svbfloat16_t op) {
- return __builtin_sve_reinterpret_u64_bf16(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat32_t op) {
- return __builtin_sve_reinterpret_u64_f32(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat64_t op) {
- return __builtin_sve_reinterpret_u64_f64(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint8_t op) {
- return __builtin_sve_reinterpret_f16_s8(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint16_t op) {
- return __builtin_sve_reinterpret_f16_s16(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint32_t op) {
- return __builtin_sve_reinterpret_f16_s32(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_s64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint64_t op) {
- return __builtin_sve_reinterpret_f16_s64(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_u64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint8_t op) {
- return __builtin_sve_reinterpret_f16_u8(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint16_t op) {
- return __builtin_sve_reinterpret_f16_u16(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_bf16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint32_t op) {
- return __builtin_sve_reinterpret_f16_u32(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint64_t op) {
- return __builtin_sve_reinterpret_f16_u64(op);
+__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u16_f64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat16_t op) {
- return __builtin_sve_reinterpret_f16_f16(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svbfloat16_t op) {
- return __builtin_sve_reinterpret_f16_bf16(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat32_t op) {
- return __builtin_sve_reinterpret_f16_f32(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat64_t op) {
- return __builtin_sve_reinterpret_f16_f64(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u16_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint8_t op) {
- return __builtin_sve_reinterpret_bf16_s8(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s32_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint16_t op) {
- return __builtin_sve_reinterpret_bf16_s16(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u32_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint32_t op) {
- return __builtin_sve_reinterpret_bf16_s32(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_s64_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint64_t op) {
- return __builtin_sve_reinterpret_bf16_s64(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_u64_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint8_t op) {
- return __builtin_sve_reinterpret_bf16_u8(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f16_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint16_t op) {
- return __builtin_sve_reinterpret_bf16_u16(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_bf16_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint32_t op) {
- return __builtin_sve_reinterpret_bf16_u32(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f32_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint64_t op) {
- return __builtin_sve_reinterpret_bf16_u64(op);
+__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s32_f64_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat16_t op) {
- return __builtin_sve_reinterpret_bf16_f16(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svbfloat16_t op) {
- return __builtin_sve_reinterpret_bf16_bf16(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat32_t op) {
- return __builtin_sve_reinterpret_bf16_f32(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat64_t op) {
- return __builtin_sve_reinterpret_bf16_f64(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint8_t op) {
- return __builtin_sve_reinterpret_f32_s8(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint16_t op) {
- return __builtin_sve_reinterpret_f32_s16(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint32_t op) {
- return __builtin_sve_reinterpret_f32_s32(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_s64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint64_t op) {
- return __builtin_sve_reinterpret_f32_s64(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_u64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint8_t op) {
- return __builtin_sve_reinterpret_f32_u8(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint16_t op) {
- return __builtin_sve_reinterpret_f32_u16(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_bf16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint32_t op) {
- return __builtin_sve_reinterpret_f32_u32(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint64_t op) {
- return __builtin_sve_reinterpret_f32_u64(op);
+__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u32_f64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat16_t op) {
- return __builtin_sve_reinterpret_f32_f16(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svbfloat16_t op) {
- return __builtin_sve_reinterpret_f32_bf16(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat32_t op) {
- return __builtin_sve_reinterpret_f32_f32(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat64_t op) {
- return __builtin_sve_reinterpret_f32_f64(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint8_t op) {
- return __builtin_sve_reinterpret_f64_s8(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint16_t op) {
- return __builtin_sve_reinterpret_f64_s16(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint32_t op) {
- return __builtin_sve_reinterpret_f64_s32(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_s64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint64_t op) {
- return __builtin_sve_reinterpret_f64_s64(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_u64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint8_t op) {
- return __builtin_sve_reinterpret_f64_u8(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint16_t op) {
- return __builtin_sve_reinterpret_f64_u16(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_bf16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint32_t op) {
- return __builtin_sve_reinterpret_f64_u32(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f32_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint64_t op) {
- return __builtin_sve_reinterpret_f64_u64(op);
+__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_s64_f64_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat16_t op) {
- return __builtin_sve_reinterpret_f64_f16(op);
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s8_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svbfloat16_t op) {
- return __builtin_sve_reinterpret_f64_bf16(op);
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u8_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat32_t op) {
- return __builtin_sve_reinterpret_f64_f32(op);
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s16_x4(op);
}
-__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat64_t op) {
- return __builtin_sve_reinterpret_f64_f64(op);
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_s64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_u64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_bf16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_u64_f64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_s64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_u64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_bf16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f16_f64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_s64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_u64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_bf16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_bf16_f64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_s64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_u64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_bf16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f32_f64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint8x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u8_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_s64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_u64_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svbfloat16x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_bf16_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat32x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f32_x4(op);
+}
+
+__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat64x4_t op) __arm_streaming_compatible {
+ return __builtin_sve_reinterpret_f64_f64_x4(op);
}
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m)))
+svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x)))
+svbfloat16_t svadd_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z)))
+svbfloat16_t svadd_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m)))
+svbfloat16_t svadd_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x)))
+svbfloat16_t svadd_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z)))
+svbfloat16_t svadd_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16)))
+svbfloat16_t svclamp_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m)))
+svbfloat16_t svmax_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x)))
+svbfloat16_t svmax_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z)))
+svbfloat16_t svmax_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m)))
+svbfloat16_t svmax_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x)))
+svbfloat16_t svmax_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z)))
+svbfloat16_t svmax_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m)))
+svbfloat16_t svmaxnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x)))
+svbfloat16_t svmaxnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z)))
+svbfloat16_t svmaxnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m)))
+svbfloat16_t svmaxnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x)))
+svbfloat16_t svmaxnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z)))
+svbfloat16_t svmaxnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m)))
+svbfloat16_t svmin_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x)))
+svbfloat16_t svmin_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z)))
+svbfloat16_t svmin_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m)))
+svbfloat16_t svmin_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x)))
+svbfloat16_t svmin_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z)))
+svbfloat16_t svmin_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m)))
+svbfloat16_t svminnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x)))
+svbfloat16_t svminnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z)))
+svbfloat16_t svminnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m)))
+svbfloat16_t svminnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x)))
+svbfloat16_t svminnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z)))
+svbfloat16_t svminnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m)))
+svbfloat16_t svmla_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x)))
+svbfloat16_t svmla_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z)))
+svbfloat16_t svmla_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m)))
+svbfloat16_t svmla_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x)))
+svbfloat16_t svmla_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z)))
+svbfloat16_t svmla_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16)))
+svbfloat16_t svmla_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m)))
+svbfloat16_t svmls_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x)))
+svbfloat16_t svmls_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z)))
+svbfloat16_t svmls_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m)))
+svbfloat16_t svmls_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x)))
+svbfloat16_t svmls_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z)))
+svbfloat16_t svmls_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16)))
+svbfloat16_t svmls_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m)))
+svbfloat16_t svmul_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x)))
+svbfloat16_t svmul_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z)))
+svbfloat16_t svmul_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m)))
+svbfloat16_t svmul_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x)))
+svbfloat16_t svmul_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z)))
+svbfloat16_t svmul_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16)))
+svbfloat16_t svmul_lane_bf16(svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m)))
+svbfloat16_t svsub_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x)))
+svbfloat16_t svsub_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z)))
+svbfloat16_t svsub_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m)))
+svbfloat16_t svsub_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x)))
+svbfloat16_t svsub_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z)))
+svbfloat16_t svsub_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m)))
+svbfloat16_t svadd_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x)))
+svbfloat16_t svadd_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z)))
+svbfloat16_t svadd_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m)))
+svbfloat16_t svadd_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x)))
+svbfloat16_t svadd_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z)))
+svbfloat16_t svadd_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16)))
+svbfloat16_t svclamp(svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m)))
+svbfloat16_t svmax_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x)))
+svbfloat16_t svmax_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z)))
+svbfloat16_t svmax_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m)))
+svbfloat16_t svmax_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x)))
+svbfloat16_t svmax_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z)))
+svbfloat16_t svmax_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m)))
+svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x)))
+svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z)))
+svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m)))
+svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x)))
+svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z)))
+svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m)))
+svbfloat16_t svmin_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x)))
+svbfloat16_t svmin_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z)))
+svbfloat16_t svmin_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m)))
+svbfloat16_t svmin_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x)))
+svbfloat16_t svmin_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z)))
+svbfloat16_t svmin_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m)))
+svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x)))
+svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z)))
+svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m)))
+svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x)))
+svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z)))
+svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m)))
+svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x)))
+svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z)))
+svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m)))
+svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x)))
+svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z)))
+svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16)))
+svbfloat16_t svmla_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m)))
+svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x)))
+svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z)))
+svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m)))
+svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x)))
+svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z)))
+svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16)))
+svbfloat16_t svmls_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m)))
+svbfloat16_t svmul_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x)))
+svbfloat16_t svmul_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z)))
+svbfloat16_t svmul_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m)))
+svbfloat16_t svmul_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x)))
+svbfloat16_t svmul_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z)))
+svbfloat16_t svmul_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16)))
+svbfloat16_t svmul_lane(svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m)))
+svbfloat16_t svsub_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x)))
+svbfloat16_t svsub_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z)))
+svbfloat16_t svsub_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m)))
+svbfloat16_t svsub_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x)))
+svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z)))
+svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x2)))
+svuint8x2_t svadd_single_u8_x2(svuint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x2)))
+svuint32x2_t svadd_single_u32_x2(svuint32x2_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x2)))
+svuint64x2_t svadd_single_u64_x2(svuint64x2_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x2)))
+svuint16x2_t svadd_single_u16_x2(svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x2)))
+svint8x2_t svadd_single_s8_x2(svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x2)))
+svint32x2_t svadd_single_s32_x2(svint32x2_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x2)))
+svint64x2_t svadd_single_s64_x2(svint64x2_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x2)))
+svint16x2_t svadd_single_s16_x2(svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x4)))
+svuint8x4_t svadd_single_u8_x4(svuint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x4)))
+svuint32x4_t svadd_single_u32_x4(svuint32x4_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x4)))
+svuint64x4_t svadd_single_u64_x4(svuint64x4_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x4)))
+svuint16x4_t svadd_single_u16_x4(svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x4)))
+svint8x4_t svadd_single_s8_x4(svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x4)))
+svint32x4_t svadd_single_s32_x4(svint32x4_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x4)))
+svint64x4_t svadd_single_s64_x4(svint64x4_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x4)))
+svint16x4_t svadd_single_s16_x4(svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x2)))
+svfloat64x2_t svclamp_single_f64_x2(svfloat64x2_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x2)))
+svfloat32x2_t svclamp_single_f32_x2(svfloat32x2_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x2)))
+svfloat16x2_t svclamp_single_f16_x2(svfloat16x2_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x2)))
+svint8x2_t svclamp_single_s8_x2(svint8x2_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x2)))
+svint32x2_t svclamp_single_s32_x2(svint32x2_t, svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x2)))
+svint64x2_t svclamp_single_s64_x2(svint64x2_t, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x2)))
+svint16x2_t svclamp_single_s16_x2(svint16x2_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x2)))
+svuint8x2_t svclamp_single_u8_x2(svuint8x2_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x2)))
+svuint32x2_t svclamp_single_u32_x2(svuint32x2_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x2)))
+svuint64x2_t svclamp_single_u64_x2(svuint64x2_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x2)))
+svuint16x2_t svclamp_single_u16_x2(svuint16x2_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x4)))
+svfloat64x4_t svclamp_single_f64_x4(svfloat64x4_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x4)))
+svfloat32x4_t svclamp_single_f32_x4(svfloat32x4_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x4)))
+svfloat16x4_t svclamp_single_f16_x4(svfloat16x4_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x4)))
+svint8x4_t svclamp_single_s8_x4(svint8x4_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x4)))
+svint32x4_t svclamp_single_s32_x4(svint32x4_t, svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x4)))
+svint64x4_t svclamp_single_s64_x4(svint64x4_t, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x4)))
+svint16x4_t svclamp_single_s16_x4(svint16x4_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x4)))
+svuint8x4_t svclamp_single_u8_x4(svuint8x4_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x4)))
+svuint32x4_t svclamp_single_u32_x4(svuint32x4_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x4)))
+svuint64x4_t svclamp_single_u64_x4(svuint64x4_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x4)))
+svuint16x4_t svclamp_single_u16_x4(svuint16x4_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x2)))
+svbfloat16_t svcvt_bf16_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x2)))
+svfloat16_t svcvt_f16_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x2)))
+svint32x2_t svcvt_s32_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x2)))
+svuint32x2_t svcvt_u32_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x4)))
+svint32x4_t svcvt_s32_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x4)))
+svuint32x4_t svcvt_u32_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x2)))
+svfloat32x2_t svcvt_f32_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x4)))
+svfloat32x4_t svcvt_f32_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x2)))
+svfloat32x2_t svcvt_f32_u32_x2(svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x4)))
+svfloat32x4_t svcvt_f32_u32_x4(svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_bf16_f32_x2)))
+svbfloat16_t svcvtn_bf16_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_f16_f32_x2)))
+svfloat16_t svcvtn_f16_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x2)))
+svfloat64x2_t svmax_single_f64_x2(svfloat64x2_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x2)))
+svfloat32x2_t svmax_single_f32_x2(svfloat32x2_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x2)))
+svfloat16x2_t svmax_single_f16_x2(svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x2)))
+svint8x2_t svmax_single_s8_x2(svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x2)))
+svint32x2_t svmax_single_s32_x2(svint32x2_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x2)))
+svint64x2_t svmax_single_s64_x2(svint64x2_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x2)))
+svint16x2_t svmax_single_s16_x2(svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x2)))
+svuint8x2_t svmax_single_u8_x2(svuint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x2)))
+svuint32x2_t svmax_single_u32_x2(svuint32x2_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x2)))
+svuint64x2_t svmax_single_u64_x2(svuint64x2_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x2)))
+svuint16x2_t svmax_single_u16_x2(svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x4)))
+svfloat64x4_t svmax_single_f64_x4(svfloat64x4_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x4)))
+svfloat32x4_t svmax_single_f32_x4(svfloat32x4_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x4)))
+svfloat16x4_t svmax_single_f16_x4(svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x4)))
+svint8x4_t svmax_single_s8_x4(svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x4)))
+svint32x4_t svmax_single_s32_x4(svint32x4_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x4)))
+svint64x4_t svmax_single_s64_x4(svint64x4_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x4)))
+svint16x4_t svmax_single_s16_x4(svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x4)))
+svuint8x4_t svmax_single_u8_x4(svuint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x4)))
+svuint32x4_t svmax_single_u32_x4(svuint32x4_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x4)))
+svuint64x4_t svmax_single_u64_x4(svuint64x4_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x4)))
+svuint16x4_t svmax_single_u16_x4(svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x2)))
+svfloat64x2_t svmax_f64_x2(svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x2)))
+svfloat32x2_t svmax_f32_x2(svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x2)))
+svfloat16x2_t svmax_f16_x2(svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x2)))
+svint8x2_t svmax_s8_x2(svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x2)))
+svint32x2_t svmax_s32_x2(svint32x2_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x2)))
+svint64x2_t svmax_s64_x2(svint64x2_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x2)))
+svint16x2_t svmax_s16_x2(svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x2)))
+svuint8x2_t svmax_u8_x2(svuint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x2)))
+svuint32x2_t svmax_u32_x2(svuint32x2_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x2)))
+svuint64x2_t svmax_u64_x2(svuint64x2_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x2)))
+svuint16x2_t svmax_u16_x2(svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x4)))
+svfloat64x4_t svmax_f64_x4(svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x4)))
+svfloat32x4_t svmax_f32_x4(svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x4)))
+svfloat16x4_t svmax_f16_x4(svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x4)))
+svint8x4_t svmax_s8_x4(svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x4)))
+svint32x4_t svmax_s32_x4(svint32x4_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x4)))
+svint64x4_t svmax_s64_x4(svint64x4_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x4)))
+svint16x4_t svmax_s16_x4(svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x4)))
+svuint8x4_t svmax_u8_x4(svuint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x4)))
+svuint32x4_t svmax_u32_x4(svuint32x4_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x4)))
+svuint64x4_t svmax_u64_x4(svuint64x4_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x4)))
+svuint16x4_t svmax_u16_x4(svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x2)))
+svfloat64x2_t svmaxnm_single_f64_x2(svfloat64x2_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x2)))
+svfloat32x2_t svmaxnm_single_f32_x2(svfloat32x2_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x2)))
+svfloat16x2_t svmaxnm_single_f16_x2(svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x4)))
+svfloat64x4_t svmaxnm_single_f64_x4(svfloat64x4_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x4)))
+svfloat32x4_t svmaxnm_single_f32_x4(svfloat32x4_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x4)))
+svfloat16x4_t svmaxnm_single_f16_x4(svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x2)))
+svfloat64x2_t svmaxnm_f64_x2(svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x2)))
+svfloat32x2_t svmaxnm_f32_x2(svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x2)))
+svfloat16x2_t svmaxnm_f16_x2(svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x4)))
+svfloat64x4_t svmaxnm_f64_x4(svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x4)))
+svfloat32x4_t svmaxnm_f32_x4(svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x4)))
+svfloat16x4_t svmaxnm_f16_x4(svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x2)))
+svfloat64x2_t svmin_single_f64_x2(svfloat64x2_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x2)))
+svfloat32x2_t svmin_single_f32_x2(svfloat32x2_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x2)))
+svfloat16x2_t svmin_single_f16_x2(svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x2)))
+svint8x2_t svmin_single_s8_x2(svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x2)))
+svint32x2_t svmin_single_s32_x2(svint32x2_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x2)))
+svint64x2_t svmin_single_s64_x2(svint64x2_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x2)))
+svint16x2_t svmin_single_s16_x2(svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x2)))
+svuint8x2_t svmin_single_u8_x2(svuint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x2)))
+svuint32x2_t svmin_single_u32_x2(svuint32x2_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x2)))
+svuint64x2_t svmin_single_u64_x2(svuint64x2_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x2)))
+svuint16x2_t svmin_single_u16_x2(svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x4)))
+svfloat64x4_t svmin_single_f64_x4(svfloat64x4_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x4)))
+svfloat32x4_t svmin_single_f32_x4(svfloat32x4_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x4)))
+svfloat16x4_t svmin_single_f16_x4(svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x4)))
+svint8x4_t svmin_single_s8_x4(svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x4)))
+svint32x4_t svmin_single_s32_x4(svint32x4_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x4)))
+svint64x4_t svmin_single_s64_x4(svint64x4_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x4)))
+svint16x4_t svmin_single_s16_x4(svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x4)))
+svuint8x4_t svmin_single_u8_x4(svuint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x4)))
+svuint32x4_t svmin_single_u32_x4(svuint32x4_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x4)))
+svuint64x4_t svmin_single_u64_x4(svuint64x4_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x4)))
+svuint16x4_t svmin_single_u16_x4(svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x2)))
+svfloat64x2_t svmin_f64_x2(svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x2)))
+svfloat32x2_t svmin_f32_x2(svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x2)))
+svfloat16x2_t svmin_f16_x2(svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x2)))
+svint8x2_t svmin_s8_x2(svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x2)))
+svint32x2_t svmin_s32_x2(svint32x2_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x2)))
+svint64x2_t svmin_s64_x2(svint64x2_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x2)))
+svint16x2_t svmin_s16_x2(svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x2)))
+svuint8x2_t svmin_u8_x2(svuint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x2)))
+svuint32x2_t svmin_u32_x2(svuint32x2_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x2)))
+svuint64x2_t svmin_u64_x2(svuint64x2_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x2)))
+svuint16x2_t svmin_u16_x2(svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x4)))
+svfloat64x4_t svmin_f64_x4(svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x4)))
+svfloat32x4_t svmin_f32_x4(svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x4)))
+svfloat16x4_t svmin_f16_x4(svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x4)))
+svint8x4_t svmin_s8_x4(svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x4)))
+svint32x4_t svmin_s32_x4(svint32x4_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x4)))
+svint64x4_t svmin_s64_x4(svint64x4_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x4)))
+svint16x4_t svmin_s16_x4(svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x4)))
+svuint8x4_t svmin_u8_x4(svuint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x4)))
+svuint32x4_t svmin_u32_x4(svuint32x4_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x4)))
+svuint64x4_t svmin_u64_x4(svuint64x4_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x4)))
+svuint16x4_t svmin_u16_x4(svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x2)))
+svfloat64x2_t svminnm_single_f64_x2(svfloat64x2_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x2)))
+svfloat32x2_t svminnm_single_f32_x2(svfloat32x2_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x2)))
+svfloat16x2_t svminnm_single_f16_x2(svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x4)))
+svfloat64x4_t svminnm_single_f64_x4(svfloat64x4_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x4)))
+svfloat32x4_t svminnm_single_f32_x4(svfloat32x4_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x4)))
+svfloat16x4_t svminnm_single_f16_x4(svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x2)))
+svfloat64x2_t svminnm_f64_x2(svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x2)))
+svfloat32x2_t svminnm_f32_x2(svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x2)))
+svfloat16x2_t svminnm_f16_x2(svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x4)))
+svfloat64x4_t svminnm_f64_x4(svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x4)))
+svfloat32x4_t svminnm_f32_x4(svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x4)))
+svfloat16x4_t svminnm_f16_x4(svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s32_x2)))
+svint16_t svqcvt_s16_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s64_x4)))
+svint16_t svqcvt_s16_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s8_s32_x4)))
+svint8_t svqcvt_s8_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s32_x2)))
+svuint16_t svqcvt_u16_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u32_x2)))
+svuint16_t svqcvt_u16_u32_x2(svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s64_x4)))
+svuint16_t svqcvt_u16_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u64_x4)))
+svuint16_t svqcvt_u16_u64_x4(svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_s32_x4)))
+svuint8_t svqcvt_u8_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_u32_x4)))
+svuint8_t svqcvt_u8_u32_x4(svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s64_x4)))
+svint16_t svqcvtn_s16_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s8_s32_x4)))
+svint8_t svqcvtn_s8_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s64_x4)))
+svuint16_t svqcvtn_u16_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u64_x4)))
+svuint16_t svqcvtn_u16_u64_x4(svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_s32_x4)))
+svuint8_t svqcvtn_u8_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_u32_x4)))
+svuint8_t svqcvtn_u8_u32_x4(svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x2)))
+svint8x2_t svqdmulh_single_s8_x2(svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x2)))
+svint32x2_t svqdmulh_single_s32_x2(svint32x2_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x2)))
+svint64x2_t svqdmulh_single_s64_x2(svint64x2_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x2)))
+svint16x2_t svqdmulh_single_s16_x2(svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x4)))
+svint8x4_t svqdmulh_single_s8_x4(svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x4)))
+svint32x4_t svqdmulh_single_s32_x4(svint32x4_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x4)))
+svint64x4_t svqdmulh_single_s64_x4(svint64x4_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x4)))
+svint16x4_t svqdmulh_single_s16_x4(svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x2)))
+svint8x2_t svqdmulh_s8_x2(svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x2)))
+svint32x2_t svqdmulh_s32_x2(svint32x2_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x2)))
+svint64x2_t svqdmulh_s64_x2(svint64x2_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x2)))
+svint16x2_t svqdmulh_s16_x2(svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x4)))
+svint8x4_t svqdmulh_s8_x4(svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x4)))
+svint32x4_t svqdmulh_s32_x4(svint32x4_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x4)))
+svint64x4_t svqdmulh_s64_x4(svint64x4_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x4)))
+svint16x4_t svqdmulh_s16_x4(svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s32_x2)))
+svint16_t svqrshr_n_s16_s32_x2(svint32x2_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u32_x2)))
+svuint16_t svqrshr_n_u16_u32_x2(svuint32x2_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s8_s32_x4)))
+svint8_t svqrshr_n_s8_s32_x4(svint32x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s64_x4)))
+svint16_t svqrshr_n_s16_s64_x4(svint64x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u8_u32_x4)))
+svuint8_t svqrshr_n_u8_u32_x4(svuint32x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u64_x4)))
+svuint16_t svqrshr_n_u16_u64_x4(svuint64x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s8_s32_x4)))
+svint8_t svqrshrn_n_s8_s32_x4(svint32x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s64_x4)))
+svint16_t svqrshrn_n_s16_s64_x4(svint64x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u8_u32_x4)))
+svuint8_t svqrshrn_n_u8_u32_x4(svuint32x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u64_x4)))
+svuint16_t svqrshrn_n_u16_u64_x4(svuint64x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s32_x2)))
+svuint16_t svqrshru_n_u16_s32_x2(svint32x2_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u8_s32_x4)))
+svuint8_t svqrshru_n_u8_s32_x4(svint32x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s64_x4)))
+svuint16_t svqrshru_n_u16_s64_x4(svint64x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u8_s32_x4)))
+svuint8_t svqrshrun_n_u8_s32_x4(svint32x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s64_x4)))
+svuint16_t svqrshrun_n_u16_s64_x4(svint64x4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b)))
+svbool_t svreinterpret_b(svcount_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c)))
+svcount_t svreinterpret_c(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x2)))
+svfloat32x2_t svrinta_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x4)))
+svfloat32x4_t svrinta_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x2)))
+svfloat32x2_t svrintm_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x4)))
+svfloat32x4_t svrintm_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x2)))
+svfloat32x2_t svrintn_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x4)))
+svfloat32x4_t svrintn_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x2)))
+svfloat32x2_t svrintp_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x4)))
+svfloat32x4_t svrintp_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x2)))
+svint8x2_t svrshl_single_s8_x2(svint8x2_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x2)))
+svint32x2_t svrshl_single_s32_x2(svint32x2_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x2)))
+svint64x2_t svrshl_single_s64_x2(svint64x2_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x2)))
+svint16x2_t svrshl_single_s16_x2(svint16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x2)))
+svuint8x2_t svrshl_single_u8_x2(svuint8x2_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x2)))
+svuint32x2_t svrshl_single_u32_x2(svuint32x2_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x2)))
+svuint64x2_t svrshl_single_u64_x2(svuint64x2_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x2)))
+svuint16x2_t svrshl_single_u16_x2(svuint16x2_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x4)))
+svint8x4_t svrshl_single_s8_x4(svint8x4_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x4)))
+svint32x4_t svrshl_single_s32_x4(svint32x4_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x4)))
+svint64x4_t svrshl_single_s64_x4(svint64x4_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x4)))
+svint16x4_t svrshl_single_s16_x4(svint16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x4)))
+svuint8x4_t svrshl_single_u8_x4(svuint8x4_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x4)))
+svuint32x4_t svrshl_single_u32_x4(svuint32x4_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x4)))
+svuint64x4_t svrshl_single_u64_x4(svuint64x4_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x4)))
+svuint16x4_t svrshl_single_u16_x4(svuint16x4_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x2)))
+svint8x2_t svrshl_s8_x2(svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x2)))
+svint32x2_t svrshl_s32_x2(svint32x2_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x2)))
+svint64x2_t svrshl_s64_x2(svint64x2_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x2)))
+svint16x2_t svrshl_s16_x2(svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x2)))
+svuint8x2_t svrshl_u8_x2(svuint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x2)))
+svuint32x2_t svrshl_u32_x2(svuint32x2_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x2)))
+svuint64x2_t svrshl_u64_x2(svuint64x2_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x2)))
+svuint16x2_t svrshl_u16_x2(svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x4)))
+svint8x4_t svrshl_s8_x4(svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x4)))
+svint32x4_t svrshl_s32_x4(svint32x4_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x4)))
+svint64x4_t svrshl_s64_x4(svint64x4_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x4)))
+svint16x4_t svrshl_s16_x4(svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x4)))
+svuint8x4_t svrshl_u8_x4(svuint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x4)))
+svuint32x4_t svrshl_u32_x4(svuint32x4_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x4)))
+svuint64x4_t svrshl_u64_x4(svuint64x4_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x4)))
+svuint16x4_t svrshl_u16_x4(svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x2)))
+svuint8x2_t svsel_u8_x2(svcount_t, svuint8x2_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x2)))
+svuint32x2_t svsel_u32_x2(svcount_t, svuint32x2_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x2)))
+svuint64x2_t svsel_u64_x2(svcount_t, svuint64x2_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x2)))
+svuint16x2_t svsel_u16_x2(svcount_t, svuint16x2_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x2)))
+svbfloat16x2_t svsel_bf16_x2(svcount_t, svbfloat16x2_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x2)))
+svint8x2_t svsel_s8_x2(svcount_t, svint8x2_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x2)))
+svfloat64x2_t svsel_f64_x2(svcount_t, svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x2)))
+svfloat32x2_t svsel_f32_x2(svcount_t, svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x2)))
+svfloat16x2_t svsel_f16_x2(svcount_t, svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2)))
+svint32x2_t svsel_s32_x2(svcount_t, svint32x2_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2)))
+svint64x2_t svsel_s64_x2(svcount_t, svint64x2_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2)))
+svint16x2_t svsel_s16_x2(svcount_t, svint16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4)))
+svuint8x4_t svsel_u8_x4(svcount_t, svuint8x4_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x4)))
+svuint32x4_t svsel_u32_x4(svcount_t, svuint32x4_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x4)))
+svuint64x4_t svsel_u64_x4(svcount_t, svuint64x4_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x4)))
+svuint16x4_t svsel_u16_x4(svcount_t, svuint16x4_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x4)))
+svbfloat16x4_t svsel_bf16_x4(svcount_t, svbfloat16x4_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x4)))
+svint8x4_t svsel_s8_x4(svcount_t, svint8x4_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x4)))
+svfloat64x4_t svsel_f64_x4(svcount_t, svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x4)))
+svfloat32x4_t svsel_f32_x4(svcount_t, svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x4)))
+svfloat16x4_t svsel_f16_x4(svcount_t, svfloat16x4_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4)))
+svint32x4_t svsel_s32_x4(svcount_t, svint32x4_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4)))
+svint64x4_t svsel_s64_x4(svcount_t, svint64x4_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4)))
+svint16x4_t svsel_s16_x4(svcount_t, svint16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2)))
+svint32x2_t svunpk_s32_s16_x2(svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x2)))
+svint64x2_t svunpk_s64_s32_x2(svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x2)))
+svint16x2_t svunpk_s16_s8_x2(svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x2)))
+svuint32x2_t svunpk_u32_u16_x2(svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x2)))
+svuint64x2_t svunpk_u64_u32_x2(svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x2)))
+svuint16x2_t svunpk_u16_u8_x2(svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x4)))
+svint32x4_t svunpk_s32_s16_x4(svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x4)))
+svint64x4_t svunpk_s64_s32_x4(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x4)))
+svint16x4_t svunpk_s16_s8_x4(svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x4)))
+svuint32x4_t svunpk_u32_u16_x4(svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x4)))
+svuint64x4_t svunpk_u64_u32_x4(svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x4)))
+svuint16x4_t svunpk_u16_u8_x4(svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x2)))
+svuint8x2_t svuzp_u8_x2(svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x2)))
+svuint32x2_t svuzp_u32_x2(svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x2)))
+svuint64x2_t svuzp_u64_x2(svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x2)))
+svuint16x2_t svuzp_u16_x2(svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x2)))
+svbfloat16x2_t svuzp_bf16_x2(svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x2)))
+svint8x2_t svuzp_s8_x2(svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x2)))
+svfloat64x2_t svuzp_f64_x2(svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x2)))
+svfloat32x2_t svuzp_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x2)))
+svfloat16x2_t svuzp_f16_x2(svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2)))
+svint32x2_t svuzp_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2)))
+svint64x2_t svuzp_s64_x2(svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2)))
+svint16x2_t svuzp_s16_x2(svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4)))
+svuint8x4_t svuzp_u8_x4(svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x4)))
+svuint32x4_t svuzp_u32_x4(svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x4)))
+svuint64x4_t svuzp_u64_x4(svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x4)))
+svuint16x4_t svuzp_u16_x4(svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x4)))
+svbfloat16x4_t svuzp_bf16_x4(svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x4)))
+svint8x4_t svuzp_s8_x4(svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x4)))
+svfloat64x4_t svuzp_f64_x4(svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x4)))
+svfloat32x4_t svuzp_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x4)))
+svfloat16x4_t svuzp_f16_x4(svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4)))
+svint32x4_t svuzp_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4)))
+svint64x4_t svuzp_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4)))
+svint16x4_t svuzp_s16_x4(svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2)))
+svuint8x2_t svuzpq_u8_x2(svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x2)))
+svuint32x2_t svuzpq_u32_x2(svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x2)))
+svuint64x2_t svuzpq_u64_x2(svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x2)))
+svuint16x2_t svuzpq_u16_x2(svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x2)))
+svbfloat16x2_t svuzpq_bf16_x2(svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x2)))
+svint8x2_t svuzpq_s8_x2(svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x2)))
+svfloat64x2_t svuzpq_f64_x2(svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x2)))
+svfloat32x2_t svuzpq_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x2)))
+svfloat16x2_t svuzpq_f16_x2(svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2)))
+svint32x2_t svuzpq_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2)))
+svint64x2_t svuzpq_s64_x2(svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2)))
+svint16x2_t svuzpq_s16_x2(svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4)))
+svuint8x4_t svuzpq_u8_x4(svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x4)))
+svuint32x4_t svuzpq_u32_x4(svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x4)))
+svuint64x4_t svuzpq_u64_x4(svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x4)))
+svuint16x4_t svuzpq_u16_x4(svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x4)))
+svbfloat16x4_t svuzpq_bf16_x4(svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x4)))
+svint8x4_t svuzpq_s8_x4(svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x4)))
+svfloat64x4_t svuzpq_f64_x4(svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x4)))
+svfloat32x4_t svuzpq_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x4)))
+svfloat16x4_t svuzpq_f16_x4(svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4)))
+svint32x4_t svuzpq_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4)))
+svint64x4_t svuzpq_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4)))
+svint16x4_t svuzpq_s16_x4(svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2)))
+svuint8x2_t svzip_u8_x2(svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x2)))
+svuint32x2_t svzip_u32_x2(svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x2)))
+svuint64x2_t svzip_u64_x2(svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x2)))
+svuint16x2_t svzip_u16_x2(svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x2)))
+svbfloat16x2_t svzip_bf16_x2(svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x2)))
+svint8x2_t svzip_s8_x2(svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x2)))
+svfloat64x2_t svzip_f64_x2(svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x2)))
+svfloat32x2_t svzip_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x2)))
+svfloat16x2_t svzip_f16_x2(svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2)))
+svint32x2_t svzip_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2)))
+svint64x2_t svzip_s64_x2(svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2)))
+svint16x2_t svzip_s16_x2(svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4)))
+svuint8x4_t svzip_u8_x4(svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x4)))
+svuint32x4_t svzip_u32_x4(svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x4)))
+svuint64x4_t svzip_u64_x4(svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x4)))
+svuint16x4_t svzip_u16_x4(svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x4)))
+svbfloat16x4_t svzip_bf16_x4(svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x4)))
+svint8x4_t svzip_s8_x4(svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x4)))
+svfloat64x4_t svzip_f64_x4(svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x4)))
+svfloat32x4_t svzip_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x4)))
+svfloat16x4_t svzip_f16_x4(svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4)))
+svint32x4_t svzip_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4)))
+svint64x4_t svzip_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4)))
+svint16x4_t svzip_s16_x4(svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2)))
+svuint8x2_t svzipq_u8_x2(svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x2)))
+svuint32x2_t svzipq_u32_x2(svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x2)))
+svuint64x2_t svzipq_u64_x2(svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x2)))
+svuint16x2_t svzipq_u16_x2(svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x2)))
+svbfloat16x2_t svzipq_bf16_x2(svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x2)))
+svint8x2_t svzipq_s8_x2(svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x2)))
+svfloat64x2_t svzipq_f64_x2(svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x2)))
+svfloat32x2_t svzipq_f32_x2(svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x2)))
+svfloat16x2_t svzipq_f16_x2(svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2)))
+svint32x2_t svzipq_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2)))
+svint64x2_t svzipq_s64_x2(svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2)))
+svint16x2_t svzipq_s16_x2(svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4)))
+svuint8x4_t svzipq_u8_x4(svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x4)))
+svuint32x4_t svzipq_u32_x4(svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x4)))
+svuint64x4_t svzipq_u64_x4(svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x4)))
+svuint16x4_t svzipq_u16_x4(svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x4)))
+svbfloat16x4_t svzipq_bf16_x4(svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x4)))
+svint8x4_t svzipq_s8_x4(svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x4)))
+svfloat64x4_t svzipq_f64_x4(svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x4)))
+svfloat32x4_t svzipq_f32_x4(svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x4)))
+svfloat16x4_t svzipq_f16_x4(svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4)))
+svint32x4_t svzipq_s32_x4(svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4)))
+svint64x4_t svzipq_s64_x4(svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4)))
+svint16x4_t svzipq_s16_x4(svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x2)))
+svuint8x2_t svadd(svuint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x2)))
+svuint32x2_t svadd(svuint32x2_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x2)))
+svuint64x2_t svadd(svuint64x2_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x2)))
+svuint16x2_t svadd(svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x2)))
+svint8x2_t svadd(svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x2)))
+svint32x2_t svadd(svint32x2_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x2)))
+svint64x2_t svadd(svint64x2_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x2)))
+svint16x2_t svadd(svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x4)))
+svuint8x4_t svadd(svuint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x4)))
+svuint32x4_t svadd(svuint32x4_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x4)))
+svuint64x4_t svadd(svuint64x4_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x4)))
+svuint16x4_t svadd(svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x4)))
+svint8x4_t svadd(svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x4)))
+svint32x4_t svadd(svint32x4_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x4)))
+svint64x4_t svadd(svint64x4_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x4)))
+svint16x4_t svadd(svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x2)))
+svfloat64x2_t svclamp(svfloat64x2_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x2)))
+svfloat32x2_t svclamp(svfloat32x2_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x2)))
+svfloat16x2_t svclamp(svfloat16x2_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x2)))
+svint8x2_t svclamp(svint8x2_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x2)))
+svint32x2_t svclamp(svint32x2_t, svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x2)))
+svint64x2_t svclamp(svint64x2_t, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x2)))
+svint16x2_t svclamp(svint16x2_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x2)))
+svuint8x2_t svclamp(svuint8x2_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x2)))
+svuint32x2_t svclamp(svuint32x2_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x2)))
+svuint64x2_t svclamp(svuint64x2_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x2)))
+svuint16x2_t svclamp(svuint16x2_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x4)))
+svfloat64x4_t svclamp(svfloat64x4_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x4)))
+svfloat32x4_t svclamp(svfloat32x4_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x4)))
+svfloat16x4_t svclamp(svfloat16x4_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x4)))
+svint8x4_t svclamp(svint8x4_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x4)))
+svint32x4_t svclamp(svint32x4_t, svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x4)))
+svint64x4_t svclamp(svint64x4_t, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x4)))
+svint16x4_t svclamp(svint16x4_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x4)))
+svuint8x4_t svclamp(svuint8x4_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x4)))
+svuint32x4_t svclamp(svuint32x4_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x4)))
+svuint64x4_t svclamp(svuint64x4_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x4)))
+svuint16x4_t svclamp(svuint16x4_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x2)))
+svbfloat16_t svcvt_bf16(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x2)))
+svfloat16_t svcvt_f16(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x2)))
+svint32x2_t svcvt_s32(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x2)))
+svuint32x2_t svcvt_u32(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x4)))
+svint32x4_t svcvt_s32(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x4)))
+svuint32x4_t svcvt_u32(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x2)))
+svfloat32x2_t svcvt_f32(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x4)))
+svfloat32x4_t svcvt_f32(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x2)))
+svfloat32x2_t svcvt_f32(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x4)))
+svfloat32x4_t svcvt_f32(svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_bf16_f32_x2)))
+svbfloat16_t svcvtn_bf16(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_f16_f32_x2)))
+svfloat16_t svcvtn_f16(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x2)))
+svfloat64x2_t svmax(svfloat64x2_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x2)))
+svfloat32x2_t svmax(svfloat32x2_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x2)))
+svfloat16x2_t svmax(svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x2)))
+svint8x2_t svmax(svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x2)))
+svint32x2_t svmax(svint32x2_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x2)))
+svint64x2_t svmax(svint64x2_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x2)))
+svint16x2_t svmax(svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x2)))
+svuint8x2_t svmax(svuint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x2)))
+svuint32x2_t svmax(svuint32x2_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x2)))
+svuint64x2_t svmax(svuint64x2_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x2)))
+svuint16x2_t svmax(svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x4)))
+svfloat64x4_t svmax(svfloat64x4_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x4)))
+svfloat32x4_t svmax(svfloat32x4_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x4)))
+svfloat16x4_t svmax(svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x4)))
+svint8x4_t svmax(svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x4)))
+svint32x4_t svmax(svint32x4_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x4)))
+svint64x4_t svmax(svint64x4_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x4)))
+svint16x4_t svmax(svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x4)))
+svuint8x4_t svmax(svuint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x4)))
+svuint32x4_t svmax(svuint32x4_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x4)))
+svuint64x4_t svmax(svuint64x4_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x4)))
+svuint16x4_t svmax(svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x2)))
+svfloat64x2_t svmax(svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x2)))
+svfloat32x2_t svmax(svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x2)))
+svfloat16x2_t svmax(svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x2)))
+svint8x2_t svmax(svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x2)))
+svint32x2_t svmax(svint32x2_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x2)))
+svint64x2_t svmax(svint64x2_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x2)))
+svint16x2_t svmax(svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x2)))
+svuint8x2_t svmax(svuint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x2)))
+svuint32x2_t svmax(svuint32x2_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x2)))
+svuint64x2_t svmax(svuint64x2_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x2)))
+svuint16x2_t svmax(svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x4)))
+svfloat64x4_t svmax(svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x4)))
+svfloat32x4_t svmax(svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x4)))
+svfloat16x4_t svmax(svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x4)))
+svint8x4_t svmax(svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x4)))
+svint32x4_t svmax(svint32x4_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x4)))
+svint64x4_t svmax(svint64x4_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x4)))
+svint16x4_t svmax(svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x4)))
+svuint8x4_t svmax(svuint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x4)))
+svuint32x4_t svmax(svuint32x4_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x4)))
+svuint64x4_t svmax(svuint64x4_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x4)))
+svuint16x4_t svmax(svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x2)))
+svfloat64x2_t svmaxnm(svfloat64x2_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x2)))
+svfloat32x2_t svmaxnm(svfloat32x2_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x2)))
+svfloat16x2_t svmaxnm(svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x4)))
+svfloat64x4_t svmaxnm(svfloat64x4_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x4)))
+svfloat32x4_t svmaxnm(svfloat32x4_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x4)))
+svfloat16x4_t svmaxnm(svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x2)))
+svfloat64x2_t svmaxnm(svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x2)))
+svfloat32x2_t svmaxnm(svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x2)))
+svfloat16x2_t svmaxnm(svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x4)))
+svfloat64x4_t svmaxnm(svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x4)))
+svfloat32x4_t svmaxnm(svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x4)))
+svfloat16x4_t svmaxnm(svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x2)))
+svfloat64x2_t svmin(svfloat64x2_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x2)))
+svfloat32x2_t svmin(svfloat32x2_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x2)))
+svfloat16x2_t svmin(svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x2)))
+svint8x2_t svmin(svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x2)))
+svint32x2_t svmin(svint32x2_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x2)))
+svint64x2_t svmin(svint64x2_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x2)))
+svint16x2_t svmin(svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x2)))
+svuint8x2_t svmin(svuint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x2)))
+svuint32x2_t svmin(svuint32x2_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x2)))
+svuint64x2_t svmin(svuint64x2_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x2)))
+svuint16x2_t svmin(svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x4)))
+svfloat64x4_t svmin(svfloat64x4_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x4)))
+svfloat32x4_t svmin(svfloat32x4_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x4)))
+svfloat16x4_t svmin(svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x4)))
+svint8x4_t svmin(svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x4)))
+svint32x4_t svmin(svint32x4_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x4)))
+svint64x4_t svmin(svint64x4_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x4)))
+svint16x4_t svmin(svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x4)))
+svuint8x4_t svmin(svuint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x4)))
+svuint32x4_t svmin(svuint32x4_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x4)))
+svuint64x4_t svmin(svuint64x4_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x4)))
+svuint16x4_t svmin(svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x2)))
+svfloat64x2_t svmin(svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x2)))
+svfloat32x2_t svmin(svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x2)))
+svfloat16x2_t svmin(svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x2)))
+svint8x2_t svmin(svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x2)))
+svint32x2_t svmin(svint32x2_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x2)))
+svint64x2_t svmin(svint64x2_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x2)))
+svint16x2_t svmin(svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x2)))
+svuint8x2_t svmin(svuint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x2)))
+svuint32x2_t svmin(svuint32x2_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x2)))
+svuint64x2_t svmin(svuint64x2_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x2)))
+svuint16x2_t svmin(svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x4)))
+svfloat64x4_t svmin(svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x4)))
+svfloat32x4_t svmin(svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x4)))
+svfloat16x4_t svmin(svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x4)))
+svint8x4_t svmin(svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x4)))
+svint32x4_t svmin(svint32x4_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x4)))
+svint64x4_t svmin(svint64x4_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x4)))
+svint16x4_t svmin(svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x4)))
+svuint8x4_t svmin(svuint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x4)))
+svuint32x4_t svmin(svuint32x4_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x4)))
+svuint64x4_t svmin(svuint64x4_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x4)))
+svuint16x4_t svmin(svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x2)))
+svfloat64x2_t svminnm(svfloat64x2_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x2)))
+svfloat32x2_t svminnm(svfloat32x2_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x2)))
+svfloat16x2_t svminnm(svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x4)))
+svfloat64x4_t svminnm(svfloat64x4_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x4)))
+svfloat32x4_t svminnm(svfloat32x4_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x4)))
+svfloat16x4_t svminnm(svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x2)))
+svfloat64x2_t svminnm(svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x2)))
+svfloat32x2_t svminnm(svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x2)))
+svfloat16x2_t svminnm(svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x4)))
+svfloat64x4_t svminnm(svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x4)))
+svfloat32x4_t svminnm(svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x4)))
+svfloat16x4_t svminnm(svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s32_x2)))
+svint16_t svqcvt_s16(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s64_x4)))
+svint16_t svqcvt_s16(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s8_s32_x4)))
+svint8_t svqcvt_s8(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s32_x2)))
+svuint16_t svqcvt_u16(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u32_x2)))
+svuint16_t svqcvt_u16(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s64_x4)))
+svuint16_t svqcvt_u16(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u64_x4)))
+svuint16_t svqcvt_u16(svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_s32_x4)))
+svuint8_t svqcvt_u8(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_u32_x4)))
+svuint8_t svqcvt_u8(svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s64_x4)))
+svint16_t svqcvtn_s16(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s8_s32_x4)))
+svint8_t svqcvtn_s8(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s64_x4)))
+svuint16_t svqcvtn_u16(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u64_x4)))
+svuint16_t svqcvtn_u16(svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_s32_x4)))
+svuint8_t svqcvtn_u8(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_u32_x4)))
+svuint8_t svqcvtn_u8(svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x2)))
+svint8x2_t svqdmulh(svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x2)))
+svint32x2_t svqdmulh(svint32x2_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x2)))
+svint64x2_t svqdmulh(svint64x2_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x2)))
+svint16x2_t svqdmulh(svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x4)))
+svint8x4_t svqdmulh(svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x4)))
+svint32x4_t svqdmulh(svint32x4_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x4)))
+svint64x4_t svqdmulh(svint64x4_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x4)))
+svint16x4_t svqdmulh(svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x2)))
+svint8x2_t svqdmulh(svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x2)))
+svint32x2_t svqdmulh(svint32x2_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x2)))
+svint64x2_t svqdmulh(svint64x2_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x2)))
+svint16x2_t svqdmulh(svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x4)))
+svint8x4_t svqdmulh(svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x4)))
+svint32x4_t svqdmulh(svint32x4_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x4)))
+svint64x4_t svqdmulh(svint64x4_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x4)))
+svint16x4_t svqdmulh(svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s32_x2)))
+svint16_t svqrshr_s16(svint32x2_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u32_x2)))
+svuint16_t svqrshr_u16(svuint32x2_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s8_s32_x4)))
+svint8_t svqrshr_s8(svint32x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s64_x4)))
+svint16_t svqrshr_s16(svint64x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u8_u32_x4)))
+svuint8_t svqrshr_u8(svuint32x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u64_x4)))
+svuint16_t svqrshr_u16(svuint64x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s8_s32_x4)))
+svint8_t svqrshrn_s8(svint32x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s64_x4)))
+svint16_t svqrshrn_s16(svint64x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u8_u32_x4)))
+svuint8_t svqrshrn_u8(svuint32x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u64_x4)))
+svuint16_t svqrshrn_u16(svuint64x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s32_x2)))
+svuint16_t svqrshru_u16(svint32x2_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u8_s32_x4)))
+svuint8_t svqrshru_u8(svint32x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s64_x4)))
+svuint16_t svqrshru_u16(svint64x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u8_s32_x4)))
+svuint8_t svqrshrun_u8(svint32x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s64_x4)))
+svuint16_t svqrshrun_u16(svint64x4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b)))
+svbool_t svreinterpret(svcount_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c)))
+svcount_t svreinterpret(svbool_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x2)))
+svfloat32x2_t svrinta(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x4)))
+svfloat32x4_t svrinta(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x2)))
+svfloat32x2_t svrintm(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x4)))
+svfloat32x4_t svrintm(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x2)))
+svfloat32x2_t svrintn(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x4)))
+svfloat32x4_t svrintn(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x2)))
+svfloat32x2_t svrintp(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x4)))
+svfloat32x4_t svrintp(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x2)))
+svint8x2_t svrshl(svint8x2_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x2)))
+svint32x2_t svrshl(svint32x2_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x2)))
+svint64x2_t svrshl(svint64x2_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x2)))
+svint16x2_t svrshl(svint16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x2)))
+svuint8x2_t svrshl(svuint8x2_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x2)))
+svuint32x2_t svrshl(svuint32x2_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x2)))
+svuint64x2_t svrshl(svuint64x2_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x2)))
+svuint16x2_t svrshl(svuint16x2_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x4)))
+svint8x4_t svrshl(svint8x4_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x4)))
+svint32x4_t svrshl(svint32x4_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x4)))
+svint64x4_t svrshl(svint64x4_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x4)))
+svint16x4_t svrshl(svint16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x4)))
+svuint8x4_t svrshl(svuint8x4_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x4)))
+svuint32x4_t svrshl(svuint32x4_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x4)))
+svuint64x4_t svrshl(svuint64x4_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x4)))
+svuint16x4_t svrshl(svuint16x4_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x2)))
+svint8x2_t svrshl(svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x2)))
+svint32x2_t svrshl(svint32x2_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x2)))
+svint64x2_t svrshl(svint64x2_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x2)))
+svint16x2_t svrshl(svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x2)))
+svuint8x2_t svrshl(svuint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x2)))
+svuint32x2_t svrshl(svuint32x2_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x2)))
+svuint64x2_t svrshl(svuint64x2_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x2)))
+svuint16x2_t svrshl(svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x4)))
+svint8x4_t svrshl(svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x4)))
+svint32x4_t svrshl(svint32x4_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x4)))
+svint64x4_t svrshl(svint64x4_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x4)))
+svint16x4_t svrshl(svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x4)))
+svuint8x4_t svrshl(svuint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x4)))
+svuint32x4_t svrshl(svuint32x4_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x4)))
+svuint64x4_t svrshl(svuint64x4_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x4)))
+svuint16x4_t svrshl(svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x2)))
+svuint8x2_t svsel(svcount_t, svuint8x2_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x2)))
+svuint32x2_t svsel(svcount_t, svuint32x2_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x2)))
+svuint64x2_t svsel(svcount_t, svuint64x2_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x2)))
+svuint16x2_t svsel(svcount_t, svuint16x2_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x2)))
+svbfloat16x2_t svsel(svcount_t, svbfloat16x2_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x2)))
+svint8x2_t svsel(svcount_t, svint8x2_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x2)))
+svfloat64x2_t svsel(svcount_t, svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x2)))
+svfloat32x2_t svsel(svcount_t, svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x2)))
+svfloat16x2_t svsel(svcount_t, svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2)))
+svint32x2_t svsel(svcount_t, svint32x2_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2)))
+svint64x2_t svsel(svcount_t, svint64x2_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2)))
+svint16x2_t svsel(svcount_t, svint16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4)))
+svuint8x4_t svsel(svcount_t, svuint8x4_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x4)))
+svuint32x4_t svsel(svcount_t, svuint32x4_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x4)))
+svuint64x4_t svsel(svcount_t, svuint64x4_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x4)))
+svuint16x4_t svsel(svcount_t, svuint16x4_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x4)))
+svbfloat16x4_t svsel(svcount_t, svbfloat16x4_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x4)))
+svint8x4_t svsel(svcount_t, svint8x4_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x4)))
+svfloat64x4_t svsel(svcount_t, svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x4)))
+svfloat32x4_t svsel(svcount_t, svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x4)))
+svfloat16x4_t svsel(svcount_t, svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4)))
+svint32x4_t svsel(svcount_t, svint32x4_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4)))
+svint64x4_t svsel(svcount_t, svint64x4_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4)))
+svint16x4_t svsel(svcount_t, svint16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2)))
+svint32x2_t svunpk_s32(svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x2)))
+svint64x2_t svunpk_s64(svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x2)))
+svint16x2_t svunpk_s16(svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x2)))
+svuint32x2_t svunpk_u32(svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x2)))
+svuint64x2_t svunpk_u64(svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x2)))
+svuint16x2_t svunpk_u16(svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x4)))
+svint32x4_t svunpk_s32(svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x4)))
+svint64x4_t svunpk_s64(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x4)))
+svint16x4_t svunpk_s16(svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x4)))
+svuint32x4_t svunpk_u32(svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x4)))
+svuint64x4_t svunpk_u64(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x4)))
+svuint16x4_t svunpk_u16(svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x2)))
+svuint8x2_t svuzp(svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x2)))
+svuint32x2_t svuzp(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x2)))
+svuint64x2_t svuzp(svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x2)))
+svuint16x2_t svuzp(svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x2)))
+svbfloat16x2_t svuzp(svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x2)))
+svint8x2_t svuzp(svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x2)))
+svfloat64x2_t svuzp(svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x2)))
+svfloat32x2_t svuzp(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x2)))
+svfloat16x2_t svuzp(svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2)))
+svint32x2_t svuzp(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2)))
+svint64x2_t svuzp(svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2)))
+svint16x2_t svuzp(svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4)))
+svuint8x4_t svuzp(svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x4)))
+svuint32x4_t svuzp(svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x4)))
+svuint64x4_t svuzp(svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x4)))
+svuint16x4_t svuzp(svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x4)))
+svbfloat16x4_t svuzp(svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x4)))
+svint8x4_t svuzp(svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x4)))
+svfloat64x4_t svuzp(svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x4)))
+svfloat32x4_t svuzp(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x4)))
+svfloat16x4_t svuzp(svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4)))
+svint32x4_t svuzp(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4)))
+svint64x4_t svuzp(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4)))
+svint16x4_t svuzp(svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2)))
+svuint8x2_t svuzpq(svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x2)))
+svuint32x2_t svuzpq(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x2)))
+svuint64x2_t svuzpq(svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x2)))
+svuint16x2_t svuzpq(svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x2)))
+svbfloat16x2_t svuzpq(svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x2)))
+svint8x2_t svuzpq(svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x2)))
+svfloat64x2_t svuzpq(svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x2)))
+svfloat32x2_t svuzpq(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x2)))
+svfloat16x2_t svuzpq(svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2)))
+svint32x2_t svuzpq(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2)))
+svint64x2_t svuzpq(svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2)))
+svint16x2_t svuzpq(svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4)))
+svuint8x4_t svuzpq(svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x4)))
+svuint32x4_t svuzpq(svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x4)))
+svuint64x4_t svuzpq(svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x4)))
+svuint16x4_t svuzpq(svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x4)))
+svbfloat16x4_t svuzpq(svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x4)))
+svint8x4_t svuzpq(svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x4)))
+svfloat64x4_t svuzpq(svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x4)))
+svfloat32x4_t svuzpq(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x4)))
+svfloat16x4_t svuzpq(svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4)))
+svint32x4_t svuzpq(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4)))
+svint64x4_t svuzpq(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4)))
+svint16x4_t svuzpq(svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2)))
+svuint8x2_t svzip(svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x2)))
+svuint32x2_t svzip(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x2)))
+svuint64x2_t svzip(svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x2)))
+svuint16x2_t svzip(svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x2)))
+svbfloat16x2_t svzip(svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x2)))
+svint8x2_t svzip(svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x2)))
+svfloat64x2_t svzip(svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x2)))
+svfloat32x2_t svzip(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x2)))
+svfloat16x2_t svzip(svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2)))
+svint32x2_t svzip(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2)))
+svint64x2_t svzip(svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2)))
+svint16x2_t svzip(svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4)))
+svuint8x4_t svzip(svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x4)))
+svuint32x4_t svzip(svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x4)))
+svuint64x4_t svzip(svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x4)))
+svuint16x4_t svzip(svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x4)))
+svbfloat16x4_t svzip(svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x4)))
+svint8x4_t svzip(svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x4)))
+svfloat64x4_t svzip(svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x4)))
+svfloat32x4_t svzip(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x4)))
+svfloat16x4_t svzip(svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4)))
+svint32x4_t svzip(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4)))
+svint64x4_t svzip(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4)))
+svint16x4_t svzip(svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2)))
+svuint8x2_t svzipq(svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x2)))
+svuint32x2_t svzipq(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x2)))
+svuint64x2_t svzipq(svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x2)))
+svuint16x2_t svzipq(svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x2)))
+svbfloat16x2_t svzipq(svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x2)))
+svint8x2_t svzipq(svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x2)))
+svfloat64x2_t svzipq(svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x2)))
+svfloat32x2_t svzipq(svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x2)))
+svfloat16x2_t svzipq(svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2)))
+svint32x2_t svzipq(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2)))
+svint64x2_t svzipq(svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2)))
+svint16x2_t svzipq(svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4)))
+svuint8x4_t svzipq(svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x4)))
+svuint32x4_t svzipq(svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x4)))
+svuint64x4_t svzipq(svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x4)))
+svuint16x4_t svzipq(svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x4)))
+svbfloat16x4_t svzipq(svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x4)))
+svint8x4_t svzipq(svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x4)))
+svfloat64x4_t svzipq(svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x4)))
+svfloat32x4_t svzipq(svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x4)))
+svfloat16x4_t svzipq(svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4)))
+svint32x4_t svzipq(svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4)))
+svint64x4_t svzipq(svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4)))
+svint16x4_t svzipq(svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2)))
+svint16_t svqcvtn_s16_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2)))
+svuint16_t svqcvtn_u16_s32_x2(svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2)))
+svuint16_t svqcvtn_u16_u32_x2(svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2)))
+svint16_t svqcvtn_s16(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2)))
+svuint16_t svqcvtn_u16(svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2)))
+svuint16_t svqcvtn_u16(svuint32x2_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m)))
svfloat64_t svabd_n_f64_m(svbool_t, svfloat64_t, float64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m)))
@@ -23877,6 +27810,1834 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32)))
svuint32_t svsm4e(svuint32_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32)))
svuint32_t svsm4ekey(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8)))
+uint8x16_t svaddqv_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32)))
+uint32x4_t svaddqv_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64)))
+uint64x2_t svaddqv_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16)))
+uint16x8_t svaddqv_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8)))
+int8x16_t svaddqv_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64)))
+float64x2_t svaddqv_f64(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32)))
+float32x4_t svaddqv_f32(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16)))
+float16x8_t svaddqv_f16(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32)))
+int32x4_t svaddqv_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64)))
+int64x2_t svaddqv_s64(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16)))
+int16x8_t svaddqv_s16(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8)))
+uint8x16_t svandqv_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32)))
+uint32x4_t svandqv_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64)))
+uint64x2_t svandqv_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16)))
+uint16x8_t svandqv_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8)))
+int8x16_t svandqv_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32)))
+int32x4_t svandqv_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64)))
+int64x2_t svandqv_s64(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16)))
+int16x8_t svandqv_s16(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8)))
+uint8x16_t sveorqv_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32)))
+uint32x4_t sveorqv_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64)))
+uint64x2_t sveorqv_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16)))
+uint16x8_t sveorqv_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8)))
+int8x16_t sveorqv_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32)))
+int32x4_t sveorqv_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64)))
+int64x2_t sveorqv_s64(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16)))
+int16x8_t sveorqv_s16(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8)))
+svuint8_t svextq_u8(svuint8_t, svuint8_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32)))
+svuint32_t svextq_u32(svuint32_t, svuint32_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64)))
+svuint64_t svextq_u64(svuint64_t, svuint64_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16)))
+svuint16_t svextq_u16(svuint16_t, svuint16_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16)))
+svbfloat16_t svextq_bf16(svbfloat16_t, svbfloat16_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8)))
+svint8_t svextq_s8(svint8_t, svint8_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64)))
+svfloat64_t svextq_f64(svfloat64_t, svfloat64_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32)))
+svfloat32_t svextq_f32(svfloat32_t, svfloat32_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16)))
+svfloat16_t svextq_f16(svfloat16_t, svfloat16_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32)))
+svint32_t svextq_s32(svint32_t, svint32_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64)))
+svint64_t svextq_s64(svint64_t, svint64_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16)))
+svint16_t svextq_s16(svint16_t, svint16_t, int32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32)))
+svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64)))
+svuint64_t svld1q_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16)))
+svuint16_t svld1q_gather_u64base_index_u16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16)))
+svbfloat16_t svld1q_gather_u64base_index_bf16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64)))
+svfloat64_t svld1q_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32)))
+svfloat32_t svld1q_gather_u64base_index_f32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16)))
+svfloat16_t svld1q_gather_u64base_index_f16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32)))
+svint32_t svld1q_gather_u64base_index_s32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64)))
+svint64_t svld1q_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16)))
+svint16_t svld1q_gather_u64base_index_s16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8)))
+svuint8_t svld1q_gather_u64base_offset_u8(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32)))
+svuint32_t svld1q_gather_u64base_offset_u32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64)))
+svuint64_t svld1q_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16)))
+svuint16_t svld1q_gather_u64base_offset_u16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16)))
+svbfloat16_t svld1q_gather_u64base_offset_bf16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8)))
+svint8_t svld1q_gather_u64base_offset_s8(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64)))
+svfloat64_t svld1q_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32)))
+svfloat32_t svld1q_gather_u64base_offset_f32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16)))
+svfloat16_t svld1q_gather_u64base_offset_f16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32)))
+svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64)))
+svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16)))
+svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8)))
+svuint8_t svld1q_gather_u64base_u8(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32)))
+svuint32_t svld1q_gather_u64base_u32(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64)))
+svuint64_t svld1q_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16)))
+svuint16_t svld1q_gather_u64base_u16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16)))
+svbfloat16_t svld1q_gather_u64base_bf16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8)))
+svint8_t svld1q_gather_u64base_s8(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64)))
+svfloat64_t svld1q_gather_u64base_f64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32)))
+svfloat32_t svld1q_gather_u64base_f32(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16)))
+svfloat16_t svld1q_gather_u64base_f16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32)))
+svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64)))
+svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16)))
+svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32)))
+svuint32_t svld1q_gather_u64index_u32(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64)))
+svuint64_t svld1q_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16)))
+svuint16_t svld1q_gather_u64index_u16(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16)))
+svbfloat16_t svld1q_gather_u64index_bf16(svbool_t, bfloat16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64)))
+svfloat64_t svld1q_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32)))
+svfloat32_t svld1q_gather_u64index_f32(svbool_t, float32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16)))
+svfloat16_t svld1q_gather_u64index_f16(svbool_t, float16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32)))
+svint32_t svld1q_gather_u64index_s32(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64)))
+svint64_t svld1q_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16)))
+svint16_t svld1q_gather_u64index_s16(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8)))
+svuint8_t svld1q_gather_u64offset_u8(svbool_t, uint8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32)))
+svuint32_t svld1q_gather_u64offset_u32(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64)))
+svuint64_t svld1q_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16)))
+svuint16_t svld1q_gather_u64offset_u16(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16)))
+svbfloat16_t svld1q_gather_u64offset_bf16(svbool_t, bfloat16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8)))
+svint8_t svld1q_gather_u64offset_s8(svbool_t, int8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64)))
+svfloat64_t svld1q_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32)))
+svfloat32_t svld1q_gather_u64offset_f32(svbool_t, float32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16)))
+svfloat16_t svld1q_gather_u64offset_f16(svbool_t, float16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32)))
+svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64)))
+svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16)))
+svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64)))
+svuint64_t svld1udq_u64(svbool_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64)))
+svfloat64_t svld1udq_f64(svbool_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64)))
+svint64_t svld1udq_s64(svbool_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64)))
+svuint64_t svld1udq_vnum_u64(svbool_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64)))
+svfloat64_t svld1udq_vnum_f64(svbool_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64)))
+svint64_t svld1udq_vnum_s64(svbool_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32)))
+svuint32_t svld1uwq_u32(svbool_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32)))
+svfloat32_t svld1uwq_f32(svbool_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32)))
+svint32_t svld1uwq_s32(svbool_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32)))
+svuint32_t svld1uwq_vnum_u32(svbool_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32)))
+svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32)))
+svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8)))
+svuint8x2_t svld2q_u8(svbool_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32)))
+svuint32x2_t svld2q_u32(svbool_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64)))
+svuint64x2_t svld2q_u64(svbool_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16)))
+svuint16x2_t svld2q_u16(svbool_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8)))
+svint8x2_t svld2q_s8(svbool_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64)))
+svfloat64x2_t svld2q_f64(svbool_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32)))
+svfloat32x2_t svld2q_f32(svbool_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16)))
+svfloat16x2_t svld2q_f16(svbool_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32)))
+svint32x2_t svld2q_s32(svbool_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64)))
+svint64x2_t svld2q_s64(svbool_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16)))
+svint16x2_t svld2q_s16(svbool_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16)))
+svbfloat16x2_t svld2q_bf16(svbool_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8)))
+svuint8x2_t svld2q_vnum_u8(svbool_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32)))
+svuint32x2_t svld2q_vnum_u32(svbool_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64)))
+svuint64x2_t svld2q_vnum_u64(svbool_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16)))
+svuint16x2_t svld2q_vnum_u16(svbool_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8)))
+svint8x2_t svld2q_vnum_s8(svbool_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64)))
+svfloat64x2_t svld2q_vnum_f64(svbool_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32)))
+svfloat32x2_t svld2q_vnum_f32(svbool_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16)))
+svfloat16x2_t svld2q_vnum_f16(svbool_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32)))
+svint32x2_t svld2q_vnum_s32(svbool_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64)))
+svint64x2_t svld2q_vnum_s64(svbool_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16)))
+svint16x2_t svld2q_vnum_s16(svbool_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16)))
+svbfloat16x2_t svld2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8)))
+svuint8x3_t svld3q_u8(svbool_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32)))
+svuint32x3_t svld3q_u32(svbool_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64)))
+svuint64x3_t svld3q_u64(svbool_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16)))
+svuint16x3_t svld3q_u16(svbool_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8)))
+svint8x3_t svld3q_s8(svbool_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64)))
+svfloat64x3_t svld3q_f64(svbool_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32)))
+svfloat32x3_t svld3q_f32(svbool_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16)))
+svfloat16x3_t svld3q_f16(svbool_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32)))
+svint32x3_t svld3q_s32(svbool_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64)))
+svint64x3_t svld3q_s64(svbool_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16)))
+svint16x3_t svld3q_s16(svbool_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16)))
+svbfloat16x3_t svld3q_bf16(svbool_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8)))
+svuint8x3_t svld3q_vnum_u8(svbool_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32)))
+svuint32x3_t svld3q_vnum_u32(svbool_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64)))
+svuint64x3_t svld3q_vnum_u64(svbool_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16)))
+svuint16x3_t svld3q_vnum_u16(svbool_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8)))
+svint8x3_t svld3q_vnum_s8(svbool_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64)))
+svfloat64x3_t svld3q_vnum_f64(svbool_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32)))
+svfloat32x3_t svld3q_vnum_f32(svbool_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16)))
+svfloat16x3_t svld3q_vnum_f16(svbool_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32)))
+svint32x3_t svld3q_vnum_s32(svbool_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64)))
+svint64x3_t svld3q_vnum_s64(svbool_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16)))
+svint16x3_t svld3q_vnum_s16(svbool_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16)))
+svbfloat16x3_t svld3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8)))
+svuint8x4_t svld4q_u8(svbool_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32)))
+svuint32x4_t svld4q_u32(svbool_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64)))
+svuint64x4_t svld4q_u64(svbool_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16)))
+svuint16x4_t svld4q_u16(svbool_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8)))
+svint8x4_t svld4q_s8(svbool_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64)))
+svfloat64x4_t svld4q_f64(svbool_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32)))
+svfloat32x4_t svld4q_f32(svbool_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16)))
+svfloat16x4_t svld4q_f16(svbool_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32)))
+svint32x4_t svld4q_s32(svbool_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64)))
+svint64x4_t svld4q_s64(svbool_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16)))
+svint16x4_t svld4q_s16(svbool_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16)))
+svbfloat16x4_t svld4q_bf16(svbool_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8)))
+svuint8x4_t svld4q_vnum_u8(svbool_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32)))
+svuint32x4_t svld4q_vnum_u32(svbool_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64)))
+svuint64x4_t svld4q_vnum_u64(svbool_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16)))
+svuint16x4_t svld4q_vnum_u16(svbool_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8)))
+svint8x4_t svld4q_vnum_s8(svbool_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64)))
+svfloat64x4_t svld4q_vnum_f64(svbool_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32)))
+svfloat32x4_t svld4q_vnum_f32(svbool_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16)))
+svfloat16x4_t svld4q_vnum_f16(svbool_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32)))
+svint32x4_t svld4q_vnum_s32(svbool_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64)))
+svint64x4_t svld4q_vnum_s64(svbool_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16)))
+svint16x4_t svld4q_vnum_s16(svbool_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16)))
+svbfloat16x4_t svld4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64)))
+float64x2_t svmaxnmqv_f64(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32)))
+float32x4_t svmaxnmqv_f32(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16)))
+float16x8_t svmaxnmqv_f16(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64)))
+float64x2_t svmaxqv_f64(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32)))
+float32x4_t svmaxqv_f32(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16)))
+float16x8_t svmaxqv_f16(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8)))
+int8x16_t svmaxqv_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32)))
+int32x4_t svmaxqv_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64)))
+int64x2_t svmaxqv_s64(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16)))
+int16x8_t svmaxqv_s16(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8)))
+uint8x16_t svmaxqv_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32)))
+uint32x4_t svmaxqv_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64)))
+uint64x2_t svmaxqv_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16)))
+uint16x8_t svmaxqv_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64)))
+float64x2_t svminnmqv_f64(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32)))
+float32x4_t svminnmqv_f32(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16)))
+float16x8_t svminnmqv_f16(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64)))
+float64x2_t svminqv_f64(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32)))
+float32x4_t svminqv_f32(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16)))
+float16x8_t svminqv_f16(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8)))
+int8x16_t svminqv_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32)))
+int32x4_t svminqv_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64)))
+int64x2_t svminqv_s64(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16)))
+int16x8_t svminqv_s16(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8)))
+uint8x16_t svminqv_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32)))
+uint32x4_t svminqv_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64)))
+uint64x2_t svminqv_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16)))
+uint16x8_t svminqv_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8)))
+uint8x16_t svorqv_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32)))
+uint32x4_t svorqv_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64)))
+uint64x2_t svorqv_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16)))
+uint16x8_t svorqv_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8)))
+int8x16_t svorqv_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32)))
+int32x4_t svorqv_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64)))
+int64x2_t svorqv_s64(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16)))
+int16x8_t svorqv_s16(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8)))
+svbool_t svpmov_u8(svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8)))
+svbool_t svpmov_s8(svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64)))
+svbool_t svpmov_u64(svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64)))
+svbool_t svpmov_s64(svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16)))
+svbool_t svpmov_u16(svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16)))
+svbool_t svpmov_s16(svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32)))
+svbool_t svpmov_u32(svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32)))
+svbool_t svpmov_s32(svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8)))
+svbool_t svpmov_lane_u8(svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8)))
+svbool_t svpmov_lane_s8(svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64)))
+svbool_t svpmov_lane_u64(svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64)))
+svbool_t svpmov_lane_s64(svint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16)))
+svbool_t svpmov_lane_u16(svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16)))
+svbool_t svpmov_lane_s16(svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32)))
+svbool_t svpmov_lane_u32(svuint32_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32)))
+svbool_t svpmov_lane_s32(svint32_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m)))
+svuint64_t svpmov_lane_u64_m(svuint64_t, svbool_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m)))
+svint64_t svpmov_lane_s64_m(svint64_t, svbool_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m)))
+svuint16_t svpmov_lane_u16_m(svuint16_t, svbool_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m)))
+svint16_t svpmov_lane_s16_m(svint16_t, svbool_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m)))
+svuint32_t svpmov_lane_u32_m(svuint32_t, svbool_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m)))
+svint32_t svpmov_lane_s32_m(svint32_t, svbool_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8_z)))
+svuint8_t svpmov_u8_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8_z)))
+svint8_t svpmov_s8_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64_z)))
+svuint64_t svpmov_u64_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64_z)))
+svint64_t svpmov_s64_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16_z)))
+svuint16_t svpmov_u16_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16_z)))
+svint16_t svpmov_s16_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32_z)))
+svuint32_t svpmov_u32_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32_z)))
+svint32_t svpmov_s32_z(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64)))
+void svst1dq_u64(svbool_t, uint64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64)))
+void svst1dq_f64(svbool_t, float64_t const *, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64)))
+void svst1dq_s64(svbool_t, int64_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64)))
+void svst1dq_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64)))
+void svst1dq_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64)))
+void svst1dq_vnum_s64(svbool_t, int64_t const *, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8)))
+void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32)))
+void svst1q_scatter_u64base_u32(svbool_t, svuint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64)))
+void svst1q_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16)))
+void svst1q_scatter_u64base_u16(svbool_t, svuint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16)))
+void svst1q_scatter_u64base_bf16(svbool_t, svuint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8)))
+void svst1q_scatter_u64base_s8(svbool_t, svuint64_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64)))
+void svst1q_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32)))
+void svst1q_scatter_u64base_f32(svbool_t, svuint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16)))
+void svst1q_scatter_u64base_f16(svbool_t, svuint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32)))
+void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64)))
+void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16)))
+void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32)))
+void svst1q_scatter_u64base_index_u32(svbool_t, svuint64_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64)))
+void svst1q_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16)))
+void svst1q_scatter_u64base_index_u16(svbool_t, svuint64_t, int64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16)))
+void svst1q_scatter_u64base_index_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64)))
+void svst1q_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32)))
+void svst1q_scatter_u64base_index_f32(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16)))
+void svst1q_scatter_u64base_index_f16(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32)))
+void svst1q_scatter_u64base_index_s32(svbool_t, svuint64_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64)))
+void svst1q_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16)))
+void svst1q_scatter_u64base_index_s16(svbool_t, svuint64_t, int64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8)))
+void svst1q_scatter_u64base_offset_u8(svbool_t, svuint64_t, int64_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32)))
+void svst1q_scatter_u64base_offset_u32(svbool_t, svuint64_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64)))
+void svst1q_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16)))
+void svst1q_scatter_u64base_offset_u16(svbool_t, svuint64_t, int64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16)))
+void svst1q_scatter_u64base_offset_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8)))
+void svst1q_scatter_u64base_offset_s8(svbool_t, svuint64_t, int64_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64)))
+void svst1q_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32)))
+void svst1q_scatter_u64base_offset_f32(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16)))
+void svst1q_scatter_u64base_offset_f16(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32)))
+void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64)))
+void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16)))
+void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32)))
+void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64)))
+void svst1q_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16)))
+void svst1q_scatter_u64index_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16)))
+void svst1q_scatter_u64index_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64)))
+void svst1q_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32)))
+void svst1q_scatter_u64index_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16)))
+void svst1q_scatter_u64index_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32)))
+void svst1q_scatter_u64index_s32(svbool_t, int32_t *, svuint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64)))
+void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16)))
+void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8)))
+void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32)))
+void svst1q_scatter_u64offset_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64)))
+void svst1q_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16)))
+void svst1q_scatter_u64offset_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16)))
+void svst1q_scatter_u64offset_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8)))
+void svst1q_scatter_u64offset_s8(svbool_t, int8_t *, svuint64_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64)))
+void svst1q_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32)))
+void svst1q_scatter_u64offset_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16)))
+void svst1q_scatter_u64offset_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32)))
+void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64)))
+void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16)))
+void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32)))
+void svst1wq_u32(svbool_t, uint32_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32)))
+void svst1wq_f32(svbool_t, float32_t const *, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32)))
+void svst1wq_s32(svbool_t, int32_t const *, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32)))
+void svst1wq_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32)))
+void svst1wq_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32)))
+void svst1wq_vnum_s32(svbool_t, int32_t const *, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8)))
+void svst2q_u8(svbool_t, uint8_t const *, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32)))
+void svst2q_u32(svbool_t, uint32_t const *, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64)))
+void svst2q_u64(svbool_t, uint64_t const *, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16)))
+void svst2q_u16(svbool_t, uint16_t const *, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8)))
+void svst2q_s8(svbool_t, int8_t const *, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64)))
+void svst2q_f64(svbool_t, float64_t const *, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32)))
+void svst2q_f32(svbool_t, float32_t const *, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16)))
+void svst2q_f16(svbool_t, float16_t const *, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32)))
+void svst2q_s32(svbool_t, int32_t const *, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64)))
+void svst2q_s64(svbool_t, int64_t const *, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16)))
+void svst2q_s16(svbool_t, int16_t const *, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16)))
+void svst2q_bf16(svbool_t, bfloat16_t const *, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8)))
+void svst2q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32)))
+void svst2q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64)))
+void svst2q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16)))
+void svst2q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8)))
+void svst2q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64)))
+void svst2q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32)))
+void svst2q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16)))
+void svst2q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32)))
+void svst2q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64)))
+void svst2q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16)))
+void svst2q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16)))
+void svst2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8)))
+void svst3q_u8(svbool_t, uint8_t const *, svuint8x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32)))
+void svst3q_u32(svbool_t, uint32_t const *, svuint32x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64)))
+void svst3q_u64(svbool_t, uint64_t const *, svuint64x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16)))
+void svst3q_u16(svbool_t, uint16_t const *, svuint16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8)))
+void svst3q_s8(svbool_t, int8_t const *, svint8x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64)))
+void svst3q_f64(svbool_t, float64_t const *, svfloat64x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32)))
+void svst3q_f32(svbool_t, float32_t const *, svfloat32x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16)))
+void svst3q_f16(svbool_t, float16_t const *, svfloat16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32)))
+void svst3q_s32(svbool_t, int32_t const *, svint32x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64)))
+void svst3q_s64(svbool_t, int64_t const *, svint64x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16)))
+void svst3q_s16(svbool_t, int16_t const *, svint16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16)))
+void svst3q_bf16(svbool_t, bfloat16_t const *, svbfloat16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8)))
+void svst3q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32)))
+void svst3q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64)))
+void svst3q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16)))
+void svst3q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8)))
+void svst3q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64)))
+void svst3q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32)))
+void svst3q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16)))
+void svst3q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32)))
+void svst3q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64)))
+void svst3q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16)))
+void svst3q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16)))
+void svst3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8)))
+void svst4q_u8(svbool_t, uint8_t const *, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32)))
+void svst4q_u32(svbool_t, uint32_t const *, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64)))
+void svst4q_u64(svbool_t, uint64_t const *, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16)))
+void svst4q_u16(svbool_t, uint16_t const *, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8)))
+void svst4q_s8(svbool_t, int8_t const *, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64)))
+void svst4q_f64(svbool_t, float64_t const *, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32)))
+void svst4q_f32(svbool_t, float32_t const *, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16)))
+void svst4q_f16(svbool_t, float16_t const *, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32)))
+void svst4q_s32(svbool_t, int32_t const *, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64)))
+void svst4q_s64(svbool_t, int64_t const *, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16)))
+void svst4q_s16(svbool_t, int16_t const *, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16)))
+void svst4q_bf16(svbool_t, bfloat16_t const *, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8)))
+void svst4q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32)))
+void svst4q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64)))
+void svst4q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16)))
+void svst4q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8)))
+void svst4q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64)))
+void svst4q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32)))
+void svst4q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16)))
+void svst4q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32)))
+void svst4q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64)))
+void svst4q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16)))
+void svst4q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16)))
+void svst4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8)))
+svuint8_t svtblq_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32)))
+svuint32_t svtblq_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64)))
+svuint64_t svtblq_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16)))
+svuint16_t svtblq_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16)))
+svbfloat16_t svtblq_bf16(svbfloat16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8)))
+svint8_t svtblq_s8(svint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64)))
+svfloat64_t svtblq_f64(svfloat64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32)))
+svfloat32_t svtblq_f32(svfloat32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16)))
+svfloat16_t svtblq_f16(svfloat16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32)))
+svint32_t svtblq_s32(svint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64)))
+svint64_t svtblq_s64(svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16)))
+svint16_t svtblq_s16(svint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8)))
+svuint8_t svtbxq_u8(svuint8_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32)))
+svuint32_t svtbxq_u32(svuint32_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64)))
+svuint64_t svtbxq_u64(svuint64_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16)))
+svuint16_t svtbxq_u16(svuint16_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16)))
+svbfloat16_t svtbxq_bf16(svbfloat16_t, svbfloat16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8)))
+svint8_t svtbxq_s8(svint8_t, svint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64)))
+svfloat64_t svtbxq_f64(svfloat64_t, svfloat64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32)))
+svfloat32_t svtbxq_f32(svfloat32_t, svfloat32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16)))
+svfloat16_t svtbxq_f16(svfloat16_t, svfloat16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32)))
+svint32_t svtbxq_s32(svint32_t, svint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64)))
+svint64_t svtbxq_s64(svint64_t, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16)))
+svint16_t svtbxq_s16(svint16_t, svint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8)))
+svuint8_t svuzpq1_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32)))
+svuint32_t svuzpq1_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64)))
+svuint64_t svuzpq1_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16)))
+svuint16_t svuzpq1_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16)))
+svbfloat16_t svuzpq1_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8)))
+svint8_t svuzpq1_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64)))
+svfloat64_t svuzpq1_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32)))
+svfloat32_t svuzpq1_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16)))
+svfloat16_t svuzpq1_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32)))
+svint32_t svuzpq1_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64)))
+svint64_t svuzpq1_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16)))
+svint16_t svuzpq1_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8)))
+svuint8_t svuzpq2_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32)))
+svuint32_t svuzpq2_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64)))
+svuint64_t svuzpq2_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16)))
+svuint16_t svuzpq2_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16)))
+svbfloat16_t svuzpq2_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8)))
+svint8_t svuzpq2_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64)))
+svfloat64_t svuzpq2_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32)))
+svfloat32_t svuzpq2_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16)))
+svfloat16_t svuzpq2_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32)))
+svint32_t svuzpq2_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64)))
+svint64_t svuzpq2_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16)))
+svint16_t svuzpq2_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8)))
+svuint8_t svzipq1_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32)))
+svuint32_t svzipq1_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64)))
+svuint64_t svzipq1_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16)))
+svuint16_t svzipq1_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16)))
+svbfloat16_t svzipq1_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8)))
+svint8_t svzipq1_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64)))
+svfloat64_t svzipq1_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32)))
+svfloat32_t svzipq1_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16)))
+svfloat16_t svzipq1_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32)))
+svint32_t svzipq1_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64)))
+svint64_t svzipq1_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16)))
+svint16_t svzipq1_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8)))
+svuint8_t svzipq2_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32)))
+svuint32_t svzipq2_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64)))
+svuint64_t svzipq2_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16)))
+svuint16_t svzipq2_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16)))
+svbfloat16_t svzipq2_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8)))
+svint8_t svzipq2_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64)))
+svfloat64_t svzipq2_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32)))
+svfloat32_t svzipq2_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16)))
+svfloat16_t svzipq2_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32)))
+svint32_t svzipq2_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64)))
+svint64_t svzipq2_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16)))
+svint16_t svzipq2_s16(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8)))
+uint8x16_t svaddqv(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32)))
+uint32x4_t svaddqv(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64)))
+uint64x2_t svaddqv(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16)))
+uint16x8_t svaddqv(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8)))
+int8x16_t svaddqv(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64)))
+float64x2_t svaddqv(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32)))
+float32x4_t svaddqv(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16)))
+float16x8_t svaddqv(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32)))
+int32x4_t svaddqv(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64)))
+int64x2_t svaddqv(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16)))
+int16x8_t svaddqv(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8)))
+uint8x16_t svandqv(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32)))
+uint32x4_t svandqv(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64)))
+uint64x2_t svandqv(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16)))
+uint16x8_t svandqv(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8)))
+int8x16_t svandqv(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32)))
+int32x4_t svandqv(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64)))
+int64x2_t svandqv(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16)))
+int16x8_t svandqv(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8)))
+uint8x16_t sveorqv(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32)))
+uint32x4_t sveorqv(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64)))
+uint64x2_t sveorqv(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16)))
+uint16x8_t sveorqv(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8)))
+int8x16_t sveorqv(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32)))
+int32x4_t sveorqv(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64)))
+int64x2_t sveorqv(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16)))
+int16x8_t sveorqv(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8)))
+svuint8_t svextq(svuint8_t, svuint8_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32)))
+svuint32_t svextq(svuint32_t, svuint32_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64)))
+svuint64_t svextq(svuint64_t, svuint64_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16)))
+svuint16_t svextq(svuint16_t, svuint16_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16)))
+svbfloat16_t svextq(svbfloat16_t, svbfloat16_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8)))
+svint8_t svextq(svint8_t, svint8_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64)))
+svfloat64_t svextq(svfloat64_t, svfloat64_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32)))
+svfloat32_t svextq(svfloat32_t, svfloat32_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16)))
+svfloat16_t svextq(svfloat16_t, svfloat16_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32)))
+svint32_t svextq(svint32_t, svint32_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64)))
+svint64_t svextq(svint64_t, svint64_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16)))
+svint16_t svextq(svint16_t, svint16_t, int32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32)))
+svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64)))
+svuint64_t svld1q_gather_index_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16)))
+svuint16_t svld1q_gather_index_u16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16)))
+svbfloat16_t svld1q_gather_index_bf16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64)))
+svfloat64_t svld1q_gather_index_f64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32)))
+svfloat32_t svld1q_gather_index_f32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16)))
+svfloat16_t svld1q_gather_index_f16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32)))
+svint32_t svld1q_gather_index_s32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64)))
+svint64_t svld1q_gather_index_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16)))
+svint16_t svld1q_gather_index_s16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8)))
+svuint8_t svld1q_gather_offset_u8(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32)))
+svuint32_t svld1q_gather_offset_u32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64)))
+svuint64_t svld1q_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16)))
+svuint16_t svld1q_gather_offset_u16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16)))
+svbfloat16_t svld1q_gather_offset_bf16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8)))
+svint8_t svld1q_gather_offset_s8(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64)))
+svfloat64_t svld1q_gather_offset_f64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32)))
+svfloat32_t svld1q_gather_offset_f32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16)))
+svfloat16_t svld1q_gather_offset_f16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32)))
+svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64)))
+svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16)))
+svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8)))
+svuint8_t svld1q_gather_u8(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32)))
+svuint32_t svld1q_gather_u32(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64)))
+svuint64_t svld1q_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16)))
+svuint16_t svld1q_gather_u16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16)))
+svbfloat16_t svld1q_gather_bf16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8)))
+svint8_t svld1q_gather_s8(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64)))
+svfloat64_t svld1q_gather_f64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32)))
+svfloat32_t svld1q_gather_f32(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16)))
+svfloat16_t svld1q_gather_f16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32)))
+svint32_t svld1q_gather_s32(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64)))
+svint64_t svld1q_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16)))
+svint16_t svld1q_gather_s16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32)))
+svuint32_t svld1q_gather_index(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64)))
+svuint64_t svld1q_gather_index(svbool_t, uint64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16)))
+svuint16_t svld1q_gather_index(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16)))
+svbfloat16_t svld1q_gather_index(svbool_t, bfloat16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64)))
+svfloat64_t svld1q_gather_index(svbool_t, float64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32)))
+svfloat32_t svld1q_gather_index(svbool_t, float32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16)))
+svfloat16_t svld1q_gather_index(svbool_t, float16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32)))
+svint32_t svld1q_gather_index(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64)))
+svint64_t svld1q_gather_index(svbool_t, int64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16)))
+svint16_t svld1q_gather_index(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8)))
+svuint8_t svld1q_gather_offset(svbool_t, uint8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32)))
+svuint32_t svld1q_gather_offset(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64)))
+svuint64_t svld1q_gather_offset(svbool_t, uint64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16)))
+svuint16_t svld1q_gather_offset(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16)))
+svbfloat16_t svld1q_gather_offset(svbool_t, bfloat16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8)))
+svint8_t svld1q_gather_offset(svbool_t, int8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64)))
+svfloat64_t svld1q_gather_offset(svbool_t, float64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32)))
+svfloat32_t svld1q_gather_offset(svbool_t, float32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16)))
+svfloat16_t svld1q_gather_offset(svbool_t, float16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32)))
+svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64)))
+svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16)))
+svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64)))
+svuint64_t svld1udq(svbool_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64)))
+svfloat64_t svld1udq(svbool_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64)))
+svint64_t svld1udq(svbool_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64)))
+svuint64_t svld1udq_vnum(svbool_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64)))
+svfloat64_t svld1udq_vnum(svbool_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64)))
+svint64_t svld1udq_vnum(svbool_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32)))
+svuint32_t svld1uwq(svbool_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32)))
+svfloat32_t svld1uwq(svbool_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32)))
+svint32_t svld1uwq(svbool_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32)))
+svuint32_t svld1uwq_vnum(svbool_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32)))
+svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32)))
+svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8)))
+svuint8x2_t svld2q(svbool_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32)))
+svuint32x2_t svld2q(svbool_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64)))
+svuint64x2_t svld2q(svbool_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16)))
+svuint16x2_t svld2q(svbool_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8)))
+svint8x2_t svld2q(svbool_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64)))
+svfloat64x2_t svld2q(svbool_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32)))
+svfloat32x2_t svld2q(svbool_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16)))
+svfloat16x2_t svld2q(svbool_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32)))
+svint32x2_t svld2q(svbool_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64)))
+svint64x2_t svld2q(svbool_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16)))
+svint16x2_t svld2q(svbool_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16)))
+svbfloat16x2_t svld2q(svbool_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8)))
+svuint8x2_t svld2q_vnum(svbool_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32)))
+svuint32x2_t svld2q_vnum(svbool_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64)))
+svuint64x2_t svld2q_vnum(svbool_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16)))
+svuint16x2_t svld2q_vnum(svbool_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8)))
+svint8x2_t svld2q_vnum(svbool_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64)))
+svfloat64x2_t svld2q_vnum(svbool_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32)))
+svfloat32x2_t svld2q_vnum(svbool_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16)))
+svfloat16x2_t svld2q_vnum(svbool_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32)))
+svint32x2_t svld2q_vnum(svbool_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64)))
+svint64x2_t svld2q_vnum(svbool_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16)))
+svint16x2_t svld2q_vnum(svbool_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16)))
+svbfloat16x2_t svld2q_vnum(svbool_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8)))
+svuint8x3_t svld3q(svbool_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32)))
+svuint32x3_t svld3q(svbool_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64)))
+svuint64x3_t svld3q(svbool_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16)))
+svuint16x3_t svld3q(svbool_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8)))
+svint8x3_t svld3q(svbool_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64)))
+svfloat64x3_t svld3q(svbool_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32)))
+svfloat32x3_t svld3q(svbool_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16)))
+svfloat16x3_t svld3q(svbool_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32)))
+svint32x3_t svld3q(svbool_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64)))
+svint64x3_t svld3q(svbool_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16)))
+svint16x3_t svld3q(svbool_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16)))
+svbfloat16x3_t svld3q(svbool_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8)))
+svuint8x3_t svld3q_vnum(svbool_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32)))
+svuint32x3_t svld3q_vnum(svbool_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64)))
+svuint64x3_t svld3q_vnum(svbool_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16)))
+svuint16x3_t svld3q_vnum(svbool_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8)))
+svint8x3_t svld3q_vnum(svbool_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64)))
+svfloat64x3_t svld3q_vnum(svbool_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32)))
+svfloat32x3_t svld3q_vnum(svbool_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16)))
+svfloat16x3_t svld3q_vnum(svbool_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32)))
+svint32x3_t svld3q_vnum(svbool_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64)))
+svint64x3_t svld3q_vnum(svbool_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16)))
+svint16x3_t svld3q_vnum(svbool_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16)))
+svbfloat16x3_t svld3q_vnum(svbool_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8)))
+svuint8x4_t svld4q(svbool_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32)))
+svuint32x4_t svld4q(svbool_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64)))
+svuint64x4_t svld4q(svbool_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16)))
+svuint16x4_t svld4q(svbool_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8)))
+svint8x4_t svld4q(svbool_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64)))
+svfloat64x4_t svld4q(svbool_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32)))
+svfloat32x4_t svld4q(svbool_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16)))
+svfloat16x4_t svld4q(svbool_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32)))
+svint32x4_t svld4q(svbool_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64)))
+svint64x4_t svld4q(svbool_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16)))
+svint16x4_t svld4q(svbool_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16)))
+svbfloat16x4_t svld4q(svbool_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8)))
+svuint8x4_t svld4q_vnum(svbool_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32)))
+svuint32x4_t svld4q_vnum(svbool_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64)))
+svuint64x4_t svld4q_vnum(svbool_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16)))
+svuint16x4_t svld4q_vnum(svbool_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8)))
+svint8x4_t svld4q_vnum(svbool_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64)))
+svfloat64x4_t svld4q_vnum(svbool_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32)))
+svfloat32x4_t svld4q_vnum(svbool_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16)))
+svfloat16x4_t svld4q_vnum(svbool_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32)))
+svint32x4_t svld4q_vnum(svbool_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64)))
+svint64x4_t svld4q_vnum(svbool_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16)))
+svint16x4_t svld4q_vnum(svbool_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16)))
+svbfloat16x4_t svld4q_vnum(svbool_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64)))
+float64x2_t svmaxnmqv(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32)))
+float32x4_t svmaxnmqv(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16)))
+float16x8_t svmaxnmqv(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64)))
+float64x2_t svmaxqv(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32)))
+float32x4_t svmaxqv(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16)))
+float16x8_t svmaxqv(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8)))
+int8x16_t svmaxqv(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32)))
+int32x4_t svmaxqv(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64)))
+int64x2_t svmaxqv(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16)))
+int16x8_t svmaxqv(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8)))
+uint8x16_t svmaxqv(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32)))
+uint32x4_t svmaxqv(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64)))
+uint64x2_t svmaxqv(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16)))
+uint16x8_t svmaxqv(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64)))
+float64x2_t svminnmqv(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32)))
+float32x4_t svminnmqv(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16)))
+float16x8_t svminnmqv(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64)))
+float64x2_t svminqv(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32)))
+float32x4_t svminqv(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16)))
+float16x8_t svminqv(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8)))
+int8x16_t svminqv(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32)))
+int32x4_t svminqv(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64)))
+int64x2_t svminqv(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16)))
+int16x8_t svminqv(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8)))
+uint8x16_t svminqv(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32)))
+uint32x4_t svminqv(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64)))
+uint64x2_t svminqv(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16)))
+uint16x8_t svminqv(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8)))
+uint8x16_t svorqv(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32)))
+uint32x4_t svorqv(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64)))
+uint64x2_t svorqv(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16)))
+uint16x8_t svorqv(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8)))
+int8x16_t svorqv(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32)))
+int32x4_t svorqv(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64)))
+int64x2_t svorqv(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16)))
+int16x8_t svorqv(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8)))
+svbool_t svpmov(svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8)))
+svbool_t svpmov(svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64)))
+svbool_t svpmov(svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64)))
+svbool_t svpmov(svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16)))
+svbool_t svpmov(svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16)))
+svbool_t svpmov(svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32)))
+svbool_t svpmov(svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32)))
+svbool_t svpmov(svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8)))
+svbool_t svpmov_lane(svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8)))
+svbool_t svpmov_lane(svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64)))
+svbool_t svpmov_lane(svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64)))
+svbool_t svpmov_lane(svint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16)))
+svbool_t svpmov_lane(svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16)))
+svbool_t svpmov_lane(svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32)))
+svbool_t svpmov_lane(svuint32_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32)))
+svbool_t svpmov_lane(svint32_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m)))
+svuint64_t svpmov_lane_m(svuint64_t, svbool_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m)))
+svint64_t svpmov_lane_m(svint64_t, svbool_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m)))
+svuint16_t svpmov_lane_m(svuint16_t, svbool_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m)))
+svint16_t svpmov_lane_m(svint16_t, svbool_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m)))
+svuint32_t svpmov_lane_m(svuint32_t, svbool_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m)))
+svint32_t svpmov_lane_m(svint32_t, svbool_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64)))
+void svst1dq(svbool_t, uint64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64)))
+void svst1dq(svbool_t, float64_t const *, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64)))
+void svst1dq(svbool_t, int64_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64)))
+void svst1dq_vnum(svbool_t, uint64_t const *, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64)))
+void svst1dq_vnum(svbool_t, float64_t const *, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64)))
+void svst1dq_vnum(svbool_t, int64_t const *, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16)))
+void svst1q_scatter(svbool_t, svuint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8)))
+void svst1q_scatter(svbool_t, svuint64_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64)))
+void svst1q_scatter(svbool_t, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32)))
+void svst1q_scatter(svbool_t, svuint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16)))
+void svst1q_scatter(svbool_t, svuint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32)))
+void svst1q_scatter(svbool_t, svuint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64)))
+void svst1q_scatter(svbool_t, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16)))
+void svst1q_scatter(svbool_t, svuint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32)))
+void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64)))
+void svst1q_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16)))
+void svst1q_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16)))
+void svst1q_scatter_index(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64)))
+void svst1q_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32)))
+void svst1q_scatter_index(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16)))
+void svst1q_scatter_index(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32)))
+void svst1q_scatter_index(svbool_t, int32_t *, svuint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64)))
+void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16)))
+void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8)))
+void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32)))
+void svst1q_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64)))
+void svst1q_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16)))
+void svst1q_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16)))
+void svst1q_scatter_offset(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8)))
+void svst1q_scatter_offset(svbool_t, int8_t *, svuint64_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64)))
+void svst1q_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32)))
+void svst1q_scatter_offset(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16)))
+void svst1q_scatter_offset(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32)))
+void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64)))
+void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16)))
+void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32)))
+void svst1wq(svbool_t, uint32_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32)))
+void svst1wq(svbool_t, float32_t const *, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32)))
+void svst1wq(svbool_t, int32_t const *, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32)))
+void svst1wq_vnum(svbool_t, uint32_t const *, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32)))
+void svst1wq_vnum(svbool_t, float32_t const *, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32)))
+void svst1wq_vnum(svbool_t, int32_t const *, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8)))
+void svst2q(svbool_t, uint8_t const *, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32)))
+void svst2q(svbool_t, uint32_t const *, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64)))
+void svst2q(svbool_t, uint64_t const *, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16)))
+void svst2q(svbool_t, uint16_t const *, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8)))
+void svst2q(svbool_t, int8_t const *, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64)))
+void svst2q(svbool_t, float64_t const *, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32)))
+void svst2q(svbool_t, float32_t const *, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16)))
+void svst2q(svbool_t, float16_t const *, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32)))
+void svst2q(svbool_t, int32_t const *, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64)))
+void svst2q(svbool_t, int64_t const *, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16)))
+void svst2q(svbool_t, int16_t const *, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16)))
+void svst2q(svbool_t, bfloat16_t const *, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8)))
+void svst2q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32)))
+void svst2q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64)))
+void svst2q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16)))
+void svst2q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8)))
+void svst2q_vnum(svbool_t, int8_t const *, int64_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64)))
+void svst2q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32)))
+void svst2q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16)))
+void svst2q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32)))
+void svst2q_vnum(svbool_t, int32_t const *, int64_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64)))
+void svst2q_vnum(svbool_t, int64_t const *, int64_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16)))
+void svst2q_vnum(svbool_t, int16_t const *, int64_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16)))
+void svst2q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8)))
+void svst3q(svbool_t, uint8_t const *, svuint8x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32)))
+void svst3q(svbool_t, uint32_t const *, svuint32x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64)))
+void svst3q(svbool_t, uint64_t const *, svuint64x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16)))
+void svst3q(svbool_t, uint16_t const *, svuint16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8)))
+void svst3q(svbool_t, int8_t const *, svint8x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64)))
+void svst3q(svbool_t, float64_t const *, svfloat64x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32)))
+void svst3q(svbool_t, float32_t const *, svfloat32x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16)))
+void svst3q(svbool_t, float16_t const *, svfloat16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32)))
+void svst3q(svbool_t, int32_t const *, svint32x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64)))
+void svst3q(svbool_t, int64_t const *, svint64x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16)))
+void svst3q(svbool_t, int16_t const *, svint16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16)))
+void svst3q(svbool_t, bfloat16_t const *, svbfloat16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8)))
+void svst3q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32)))
+void svst3q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64)))
+void svst3q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16)))
+void svst3q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8)))
+void svst3q_vnum(svbool_t, int8_t const *, int64_t, svint8x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64)))
+void svst3q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32)))
+void svst3q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16)))
+void svst3q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32)))
+void svst3q_vnum(svbool_t, int32_t const *, int64_t, svint32x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64)))
+void svst3q_vnum(svbool_t, int64_t const *, int64_t, svint64x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16)))
+void svst3q_vnum(svbool_t, int16_t const *, int64_t, svint16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16)))
+void svst3q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8)))
+void svst4q(svbool_t, uint8_t const *, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32)))
+void svst4q(svbool_t, uint32_t const *, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64)))
+void svst4q(svbool_t, uint64_t const *, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16)))
+void svst4q(svbool_t, uint16_t const *, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8)))
+void svst4q(svbool_t, int8_t const *, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64)))
+void svst4q(svbool_t, float64_t const *, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32)))
+void svst4q(svbool_t, float32_t const *, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16)))
+void svst4q(svbool_t, float16_t const *, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32)))
+void svst4q(svbool_t, int32_t const *, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64)))
+void svst4q(svbool_t, int64_t const *, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16)))
+void svst4q(svbool_t, int16_t const *, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16)))
+void svst4q(svbool_t, bfloat16_t const *, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8)))
+void svst4q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32)))
+void svst4q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64)))
+void svst4q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16)))
+void svst4q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8)))
+void svst4q_vnum(svbool_t, int8_t const *, int64_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64)))
+void svst4q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32)))
+void svst4q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16)))
+void svst4q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32)))
+void svst4q_vnum(svbool_t, int32_t const *, int64_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64)))
+void svst4q_vnum(svbool_t, int64_t const *, int64_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16)))
+void svst4q_vnum(svbool_t, int16_t const *, int64_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16)))
+void svst4q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8)))
+svuint8_t svtblq(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32)))
+svuint32_t svtblq(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64)))
+svuint64_t svtblq(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16)))
+svuint16_t svtblq(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16)))
+svbfloat16_t svtblq(svbfloat16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8)))
+svint8_t svtblq(svint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64)))
+svfloat64_t svtblq(svfloat64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32)))
+svfloat32_t svtblq(svfloat32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16)))
+svfloat16_t svtblq(svfloat16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32)))
+svint32_t svtblq(svint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64)))
+svint64_t svtblq(svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16)))
+svint16_t svtblq(svint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8)))
+svuint8_t svtbxq(svuint8_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32)))
+svuint32_t svtbxq(svuint32_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64)))
+svuint64_t svtbxq(svuint64_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16)))
+svuint16_t svtbxq(svuint16_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16)))
+svbfloat16_t svtbxq(svbfloat16_t, svbfloat16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8)))
+svint8_t svtbxq(svint8_t, svint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64)))
+svfloat64_t svtbxq(svfloat64_t, svfloat64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32)))
+svfloat32_t svtbxq(svfloat32_t, svfloat32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16)))
+svfloat16_t svtbxq(svfloat16_t, svfloat16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32)))
+svint32_t svtbxq(svint32_t, svint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64)))
+svint64_t svtbxq(svint64_t, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16)))
+svint16_t svtbxq(svint16_t, svint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8)))
+svuint8_t svuzpq1(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32)))
+svuint32_t svuzpq1(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64)))
+svuint64_t svuzpq1(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16)))
+svuint16_t svuzpq1(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16)))
+svbfloat16_t svuzpq1(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8)))
+svint8_t svuzpq1(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64)))
+svfloat64_t svuzpq1(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32)))
+svfloat32_t svuzpq1(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16)))
+svfloat16_t svuzpq1(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32)))
+svint32_t svuzpq1(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64)))
+svint64_t svuzpq1(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16)))
+svint16_t svuzpq1(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8)))
+svuint8_t svuzpq2(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32)))
+svuint32_t svuzpq2(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64)))
+svuint64_t svuzpq2(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16)))
+svuint16_t svuzpq2(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16)))
+svbfloat16_t svuzpq2(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8)))
+svint8_t svuzpq2(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64)))
+svfloat64_t svuzpq2(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32)))
+svfloat32_t svuzpq2(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16)))
+svfloat16_t svuzpq2(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32)))
+svint32_t svuzpq2(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64)))
+svint64_t svuzpq2(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16)))
+svint16_t svuzpq2(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8)))
+svuint8_t svzipq1(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32)))
+svuint32_t svzipq1(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64)))
+svuint64_t svzipq1(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16)))
+svuint16_t svzipq1(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16)))
+svbfloat16_t svzipq1(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8)))
+svint8_t svzipq1(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64)))
+svfloat64_t svzipq1(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32)))
+svfloat32_t svzipq1(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16)))
+svfloat16_t svzipq1(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32)))
+svint32_t svzipq1(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64)))
+svint64_t svzipq1(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16)))
+svint16_t svzipq1(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8)))
+svuint8_t svzipq2(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32)))
+svuint32_t svzipq2(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64)))
+svuint64_t svzipq2(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16)))
+svuint16_t svzipq2(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16)))
+svbfloat16_t svzipq2(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8)))
+svint8_t svzipq2(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64)))
+svfloat64_t svzipq2(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32)))
+svfloat32_t svzipq2(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16)))
+svfloat16_t svzipq2(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32)))
+svint32_t svzipq2(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64)))
+svint64_t svzipq2(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16)))
+svint16_t svzipq2(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b16)))
+svbool_t svpsel_lane_b16(svbool_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b32)))
+svbool_t svpsel_lane_b32(svbool_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b64)))
+svbool_t svpsel_lane_b64(svbool_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b8)))
+svbool_t svpsel_lane_b8(svbool_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32)))
+svfloat32_t svbfmlslb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32)))
+svfloat32_t svbfmlslb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32)))
+svfloat32_t svbfmlslt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32)))
+svfloat32_t svbfmlslt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64)))
svfloat64_t svclamp_f64(svfloat64_t, svfloat64_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32)))
@@ -23899,6 +29660,252 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64)))
svuint64_t svclamp_u64(svuint64_t, svuint64_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16)))
svuint16_t svclamp_u16(svuint16_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8)))
+uint64_t svcntp_c8(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32)))
+uint64_t svcntp_c32(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64)))
+uint64_t svcntp_c64(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16)))
+uint64_t svcntp_c16(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b)))
+svboolx2_t svcreate2_b(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b)))
+svboolx4_t svcreate4_b(svbool_t, svbool_t, svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16)))
+svfloat32_t svdot_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16)))
+svint32_t svdot_s32_s16(svint32_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16)))
+svuint32_t svdot_u32_u16(svuint32_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16)))
+svfloat32_t svdot_lane_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16)))
+svint32_t svdot_lane_s32_s16(svint32_t, svint16_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16)))
+svuint32_t svdot_lane_u32_u16(svuint32_t, svuint16_t, svuint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b)))
+svbool_t svget2_b(svboolx2_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b)))
+svbool_t svget4_b(svboolx4_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2)))
+svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2)))
+svint8x2_t svld1_s8_x2(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2)))
+svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2)))
+svfloat64x2_t svld1_f64_x2(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2)))
+svint64x2_t svld1_s64_x2(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2)))
+svuint16x2_t svld1_u16_x2(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2)))
+svbfloat16x2_t svld1_bf16_x2(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2)))
+svfloat16x2_t svld1_f16_x2(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2)))
+svint16x2_t svld1_s16_x2(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2)))
+svuint32x2_t svld1_u32_x2(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2)))
+svfloat32x2_t svld1_f32_x2(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2)))
+svint32x2_t svld1_s32_x2(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4)))
+svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4)))
+svint8x4_t svld1_s8_x4(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4)))
+svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4)))
+svfloat64x4_t svld1_f64_x4(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4)))
+svint64x4_t svld1_s64_x4(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4)))
+svuint16x4_t svld1_u16_x4(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4)))
+svbfloat16x4_t svld1_bf16_x4(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4)))
+svfloat16x4_t svld1_f16_x4(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4)))
+svint16x4_t svld1_s16_x4(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4)))
+svuint32x4_t svld1_u32_x4(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4)))
+svfloat32x4_t svld1_f32_x4(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4)))
+svint32x4_t svld1_s32_x4(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2)))
+svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2)))
+svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2)))
+svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2)))
+svfloat64x2_t svld1_vnum_f64_x2(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2)))
+svint64x2_t svld1_vnum_s64_x2(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2)))
+svuint16x2_t svld1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2)))
+svbfloat16x2_t svld1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2)))
+svfloat16x2_t svld1_vnum_f16_x2(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2)))
+svint16x2_t svld1_vnum_s16_x2(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2)))
+svuint32x2_t svld1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2)))
+svfloat32x2_t svld1_vnum_f32_x2(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2)))
+svint32x2_t svld1_vnum_s32_x2(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4)))
+svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4)))
+svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4)))
+svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4)))
+svfloat64x4_t svld1_vnum_f64_x4(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4)))
+svint64x4_t svld1_vnum_s64_x4(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4)))
+svuint16x4_t svld1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4)))
+svbfloat16x4_t svld1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4)))
+svfloat16x4_t svld1_vnum_f16_x4(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4)))
+svint16x4_t svld1_vnum_s16_x4(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4)))
+svuint32x4_t svld1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4)))
+svfloat32x4_t svld1_vnum_f32_x4(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4)))
+svint32x4_t svld1_vnum_s32_x4(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2)))
+svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2)))
+svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2)))
+svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2)))
+svfloat64x2_t svldnt1_f64_x2(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2)))
+svint64x2_t svldnt1_s64_x2(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2)))
+svuint16x2_t svldnt1_u16_x2(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2)))
+svbfloat16x2_t svldnt1_bf16_x2(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2)))
+svfloat16x2_t svldnt1_f16_x2(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2)))
+svint16x2_t svldnt1_s16_x2(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2)))
+svuint32x2_t svldnt1_u32_x2(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2)))
+svfloat32x2_t svldnt1_f32_x2(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2)))
+svint32x2_t svldnt1_s32_x2(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4)))
+svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4)))
+svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4)))
+svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4)))
+svfloat64x4_t svldnt1_f64_x4(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4)))
+svint64x4_t svldnt1_s64_x4(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4)))
+svuint16x4_t svldnt1_u16_x4(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4)))
+svbfloat16x4_t svldnt1_bf16_x4(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4)))
+svfloat16x4_t svldnt1_f16_x4(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4)))
+svint16x4_t svldnt1_s16_x4(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4)))
+svuint32x4_t svldnt1_u32_x4(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4)))
+svfloat32x4_t svldnt1_f32_x4(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4)))
+svint32x4_t svldnt1_s32_x4(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2)))
+svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2)))
+svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2)))
+svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2)))
+svfloat64x2_t svldnt1_vnum_f64_x2(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2)))
+svint64x2_t svldnt1_vnum_s64_x2(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2)))
+svuint16x2_t svldnt1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2)))
+svbfloat16x2_t svldnt1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2)))
+svfloat16x2_t svldnt1_vnum_f16_x2(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2)))
+svint16x2_t svldnt1_vnum_s16_x2(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2)))
+svuint32x2_t svldnt1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2)))
+svfloat32x2_t svldnt1_vnum_f32_x2(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2)))
+svint32x2_t svldnt1_vnum_s32_x2(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4)))
+svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4)))
+svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4)))
+svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4)))
+svfloat64x4_t svldnt1_vnum_f64_x4(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4)))
+svint64x4_t svldnt1_vnum_s64_x4(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4)))
+svuint16x4_t svldnt1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4)))
+svbfloat16x4_t svldnt1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4)))
+svfloat16x4_t svldnt1_vnum_f16_x4(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4)))
+svint16x4_t svldnt1_vnum_s16_x4(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4)))
+svuint32x4_t svldnt1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4)))
+svfloat32x4_t svldnt1_vnum_f32_x4(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4)))
+svint32x4_t svldnt1_vnum_s32_x4(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8)))
+svbool_t svpext_lane_c8(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32)))
+svbool_t svpext_lane_c32(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64)))
+svbool_t svpext_lane_c64(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16)))
+svbool_t svpext_lane_c16(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8_x2)))
+svboolx2_t svpext_lane_c8_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32_x2)))
+svboolx2_t svpext_lane_c32_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64_x2)))
+svboolx2_t svpext_lane_c64_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16_x2)))
+svboolx2_t svpext_lane_c16_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_c)))
+svcount_t svpfalse_c(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c16)))
+svcount_t svpsel_lane_c16(svcount_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c32)))
+svcount_t svpsel_lane_c32(svcount_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c64)))
+svcount_t svpsel_lane_c64(svcount_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c8)))
+svcount_t svpsel_lane_c8(svcount_t, svbool_t, uint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8)))
svcount_t svptrue_c8(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32)))
@@ -23907,6 +29914,420 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64)))
svcount_t svptrue_c64(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16)))
svcount_t svptrue_c16(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2)))
+svint16_t svqrshrn_n_s16_s32_x2(svint32x2_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2)))
+svuint16_t svqrshrn_n_u16_u32_x2(svuint32x2_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2)))
+svuint16_t svqrshrun_n_u16_s32_x2(svint32x2_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m)))
+svuint8_t svrevd_u8_m(svuint8_t, svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m)))
+svuint32_t svrevd_u32_m(svuint32_t, svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m)))
+svuint64_t svrevd_u64_m(svuint64_t, svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m)))
+svuint16_t svrevd_u16_m(svuint16_t, svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m)))
+svbfloat16_t svrevd_bf16_m(svbfloat16_t, svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m)))
+svint8_t svrevd_s8_m(svint8_t, svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m)))
+svfloat64_t svrevd_f64_m(svfloat64_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m)))
+svfloat32_t svrevd_f32_m(svfloat32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m)))
+svfloat16_t svrevd_f16_m(svfloat16_t, svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m)))
+svint32_t svrevd_s32_m(svint32_t, svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m)))
+svint64_t svrevd_s64_m(svint64_t, svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m)))
+svint16_t svrevd_s16_m(svint16_t, svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x)))
+svuint8_t svrevd_u8_x(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x)))
+svuint32_t svrevd_u32_x(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x)))
+svuint64_t svrevd_u64_x(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x)))
+svuint16_t svrevd_u16_x(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x)))
+svbfloat16_t svrevd_bf16_x(svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x)))
+svint8_t svrevd_s8_x(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x)))
+svfloat64_t svrevd_f64_x(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x)))
+svfloat32_t svrevd_f32_x(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x)))
+svfloat16_t svrevd_f16_x(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x)))
+svint32_t svrevd_s32_x(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x)))
+svint64_t svrevd_s64_x(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x)))
+svint16_t svrevd_s16_x(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z)))
+svuint8_t svrevd_u8_z(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z)))
+svuint32_t svrevd_u32_z(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z)))
+svuint64_t svrevd_u64_z(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z)))
+svuint16_t svrevd_u16_z(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z)))
+svbfloat16_t svrevd_bf16_z(svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z)))
+svint8_t svrevd_s8_z(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z)))
+svfloat64_t svrevd_f64_z(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z)))
+svfloat32_t svrevd_f32_z(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z)))
+svfloat16_t svrevd_f16_z(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z)))
+svint32_t svrevd_s32_z(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z)))
+svint64_t svrevd_s64_z(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z)))
+svint16_t svrevd_s16_z(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b)))
+svboolx2_t svset2_b(svboolx2_t, uint64_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b)))
+svboolx4_t svset4_b(svboolx4_t, uint64_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2)))
+void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2)))
+void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2)))
+void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2)))
+void svst1_f64_x2(svcount_t, float64_t *, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2)))
+void svst1_s64_x2(svcount_t, int64_t *, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2)))
+void svst1_u16_x2(svcount_t, uint16_t *, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2)))
+void svst1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2)))
+void svst1_f16_x2(svcount_t, float16_t *, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2)))
+void svst1_s16_x2(svcount_t, int16_t *, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2)))
+void svst1_u32_x2(svcount_t, uint32_t *, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2)))
+void svst1_f32_x2(svcount_t, float32_t *, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2)))
+void svst1_s32_x2(svcount_t, int32_t *, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4)))
+void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4)))
+void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4)))
+void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4)))
+void svst1_f64_x4(svcount_t, float64_t *, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4)))
+void svst1_s64_x4(svcount_t, int64_t *, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4)))
+void svst1_u16_x4(svcount_t, uint16_t *, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4)))
+void svst1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4)))
+void svst1_f16_x4(svcount_t, float16_t *, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4)))
+void svst1_s16_x4(svcount_t, int16_t *, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4)))
+void svst1_u32_x4(svcount_t, uint32_t *, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4)))
+void svst1_f32_x4(svcount_t, float32_t *, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4)))
+void svst1_s32_x4(svcount_t, int32_t *, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2)))
+void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2)))
+void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2)))
+void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2)))
+void svst1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2)))
+void svst1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2)))
+void svst1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2)))
+void svst1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2)))
+void svst1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2)))
+void svst1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2)))
+void svst1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2)))
+void svst1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2)))
+void svst1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4)))
+void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4)))
+void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4)))
+void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4)))
+void svst1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4)))
+void svst1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4)))
+void svst1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4)))
+void svst1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4)))
+void svst1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4)))
+void svst1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4)))
+void svst1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4)))
+void svst1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4)))
+void svst1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2)))
+void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2)))
+void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2)))
+void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2)))
+void svstnt1_f64_x2(svcount_t, float64_t *, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2)))
+void svstnt1_s64_x2(svcount_t, int64_t *, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2)))
+void svstnt1_u16_x2(svcount_t, uint16_t *, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2)))
+void svstnt1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2)))
+void svstnt1_f16_x2(svcount_t, float16_t *, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2)))
+void svstnt1_s16_x2(svcount_t, int16_t *, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2)))
+void svstnt1_u32_x2(svcount_t, uint32_t *, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2)))
+void svstnt1_f32_x2(svcount_t, float32_t *, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2)))
+void svstnt1_s32_x2(svcount_t, int32_t *, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4)))
+void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4)))
+void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4)))
+void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4)))
+void svstnt1_f64_x4(svcount_t, float64_t *, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4)))
+void svstnt1_s64_x4(svcount_t, int64_t *, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4)))
+void svstnt1_u16_x4(svcount_t, uint16_t *, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4)))
+void svstnt1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4)))
+void svstnt1_f16_x4(svcount_t, float16_t *, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4)))
+void svstnt1_s16_x4(svcount_t, int16_t *, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4)))
+void svstnt1_u32_x4(svcount_t, uint32_t *, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4)))
+void svstnt1_f32_x4(svcount_t, float32_t *, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4)))
+void svstnt1_s32_x4(svcount_t, int32_t *, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2)))
+void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2)))
+void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2)))
+void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2)))
+void svstnt1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2)))
+void svstnt1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2)))
+void svstnt1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2)))
+void svstnt1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2)))
+void svstnt1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2)))
+void svstnt1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2)))
+void svstnt1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2)))
+void svstnt1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2)))
+void svstnt1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4)))
+void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4)))
+void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4)))
+void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4)))
+void svstnt1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4)))
+void svstnt1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4)))
+void svstnt1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4)))
+void svstnt1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4)))
+void svstnt1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4)))
+void svstnt1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4)))
+void svstnt1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4)))
+void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4)))
+void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_b)))
+svboolx2_t svundef2_b();
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_b)))
+svboolx4_t svundef4_b();
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64)))
+svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64)))
+svcount_t svwhilege_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64)))
+svcount_t svwhilege_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64)))
+svcount_t svwhilege_c16_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64)))
+svcount_t svwhilege_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64)))
+svcount_t svwhilege_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64)))
+svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64)))
+svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2)))
+svboolx2_t svwhilege_b8_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2)))
+svboolx2_t svwhilege_b32_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2)))
+svboolx2_t svwhilege_b64_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2)))
+svboolx2_t svwhilege_b16_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2)))
+svboolx2_t svwhilege_b8_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2)))
+svboolx2_t svwhilege_b32_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2)))
+svboolx2_t svwhilege_b64_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2)))
+svboolx2_t svwhilege_b16_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64)))
+svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64)))
+svcount_t svwhilegt_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64)))
+svcount_t svwhilegt_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64)))
+svcount_t svwhilegt_c16_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64)))
+svcount_t svwhilegt_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64)))
+svcount_t svwhilegt_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64)))
+svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64)))
+svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2)))
+svboolx2_t svwhilegt_b8_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2)))
+svboolx2_t svwhilegt_b32_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2)))
+svboolx2_t svwhilegt_b64_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2)))
+svboolx2_t svwhilegt_b16_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2)))
+svboolx2_t svwhilegt_b8_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2)))
+svboolx2_t svwhilegt_b32_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2)))
+svboolx2_t svwhilegt_b64_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2)))
+svboolx2_t svwhilegt_b16_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64)))
+svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64)))
+svcount_t svwhilele_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64)))
+svcount_t svwhilele_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64)))
+svcount_t svwhilele_c16_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64)))
+svcount_t svwhilele_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64)))
+svcount_t svwhilele_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64)))
+svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64)))
+svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2)))
+svboolx2_t svwhilele_b8_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2)))
+svboolx2_t svwhilele_b32_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2)))
+svboolx2_t svwhilele_b64_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2)))
+svboolx2_t svwhilele_b16_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2)))
+svboolx2_t svwhilele_b8_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2)))
+svboolx2_t svwhilele_b32_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2)))
+svboolx2_t svwhilele_b64_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2)))
+svboolx2_t svwhilele_b16_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64)))
+svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64)))
+svcount_t svwhilelt_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64)))
+svcount_t svwhilelt_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64)))
+svcount_t svwhilelt_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64)))
+svcount_t svwhilelt_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64)))
+svcount_t svwhilelt_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64)))
+svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64)))
+svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2)))
+svboolx2_t svwhilelt_b8_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2)))
+svboolx2_t svwhilelt_b32_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2)))
+svboolx2_t svwhilelt_b64_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2)))
+svboolx2_t svwhilelt_b16_u64_x2(uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2)))
+svboolx2_t svwhilelt_b8_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2)))
+svboolx2_t svwhilelt_b32_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2)))
+svboolx2_t svwhilelt_b64_s64_x2(int64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2)))
+svboolx2_t svwhilelt_b16_s64_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32)))
+svfloat32_t svbfmlslb(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32)))
+svfloat32_t svbfmlslb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32)))
+svfloat32_t svbfmlslt(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32)))
+svfloat32_t svbfmlslt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64)))
svfloat64_t svclamp(svfloat64_t, svfloat64_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32)))
@@ -23929,6 +30350,620 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64)))
svuint64_t svclamp(svuint64_t, svuint64_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16)))
svuint16_t svclamp(svuint16_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b)))
+svboolx2_t svcreate2(svbool_t, svbool_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b)))
+svboolx4_t svcreate4(svbool_t, svbool_t, svbool_t, svbool_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16)))
+svfloat32_t svdot(svfloat32_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16)))
+svint32_t svdot(svint32_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16)))
+svuint32_t svdot(svuint32_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16)))
+svfloat32_t svdot_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16)))
+svint32_t svdot_lane(svint32_t, svint16_t, svint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16)))
+svuint32_t svdot_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b)))
+svbool_t svget2(svboolx2_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b)))
+svbool_t svget4(svboolx4_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2)))
+svuint8x2_t svld1_x2(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2)))
+svint8x2_t svld1_x2(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2)))
+svuint64x2_t svld1_x2(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2)))
+svfloat64x2_t svld1_x2(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2)))
+svint64x2_t svld1_x2(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2)))
+svuint16x2_t svld1_x2(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2)))
+svbfloat16x2_t svld1_x2(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2)))
+svfloat16x2_t svld1_x2(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2)))
+svint16x2_t svld1_x2(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2)))
+svuint32x2_t svld1_x2(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2)))
+svfloat32x2_t svld1_x2(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2)))
+svint32x2_t svld1_x2(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4)))
+svuint8x4_t svld1_x4(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4)))
+svint8x4_t svld1_x4(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4)))
+svuint64x4_t svld1_x4(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4)))
+svfloat64x4_t svld1_x4(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4)))
+svint64x4_t svld1_x4(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4)))
+svuint16x4_t svld1_x4(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4)))
+svbfloat16x4_t svld1_x4(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4)))
+svfloat16x4_t svld1_x4(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4)))
+svint16x4_t svld1_x4(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4)))
+svuint32x4_t svld1_x4(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4)))
+svfloat32x4_t svld1_x4(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4)))
+svint32x4_t svld1_x4(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2)))
+svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2)))
+svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2)))
+svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2)))
+svfloat64x2_t svld1_vnum_x2(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2)))
+svint64x2_t svld1_vnum_x2(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2)))
+svuint16x2_t svld1_vnum_x2(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2)))
+svbfloat16x2_t svld1_vnum_x2(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2)))
+svfloat16x2_t svld1_vnum_x2(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2)))
+svint16x2_t svld1_vnum_x2(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2)))
+svuint32x2_t svld1_vnum_x2(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2)))
+svfloat32x2_t svld1_vnum_x2(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2)))
+svint32x2_t svld1_vnum_x2(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4)))
+svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4)))
+svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4)))
+svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4)))
+svfloat64x4_t svld1_vnum_x4(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4)))
+svint64x4_t svld1_vnum_x4(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4)))
+svuint16x4_t svld1_vnum_x4(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4)))
+svbfloat16x4_t svld1_vnum_x4(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4)))
+svfloat16x4_t svld1_vnum_x4(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4)))
+svint16x4_t svld1_vnum_x4(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4)))
+svuint32x4_t svld1_vnum_x4(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4)))
+svfloat32x4_t svld1_vnum_x4(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4)))
+svint32x4_t svld1_vnum_x4(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2)))
+svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2)))
+svint8x2_t svldnt1_x2(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2)))
+svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2)))
+svfloat64x2_t svldnt1_x2(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2)))
+svint64x2_t svldnt1_x2(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2)))
+svuint16x2_t svldnt1_x2(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2)))
+svbfloat16x2_t svldnt1_x2(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2)))
+svfloat16x2_t svldnt1_x2(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2)))
+svint16x2_t svldnt1_x2(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2)))
+svuint32x2_t svldnt1_x2(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2)))
+svfloat32x2_t svldnt1_x2(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2)))
+svint32x2_t svldnt1_x2(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4)))
+svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4)))
+svint8x4_t svldnt1_x4(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4)))
+svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4)))
+svfloat64x4_t svldnt1_x4(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4)))
+svint64x4_t svldnt1_x4(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4)))
+svuint16x4_t svldnt1_x4(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4)))
+svbfloat16x4_t svldnt1_x4(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4)))
+svfloat16x4_t svldnt1_x4(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4)))
+svint16x4_t svldnt1_x4(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4)))
+svuint32x4_t svldnt1_x4(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4)))
+svfloat32x4_t svldnt1_x4(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4)))
+svint32x4_t svldnt1_x4(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2)))
+svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2)))
+svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2)))
+svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2)))
+svfloat64x2_t svldnt1_vnum_x2(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2)))
+svint64x2_t svldnt1_vnum_x2(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2)))
+svuint16x2_t svldnt1_vnum_x2(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2)))
+svbfloat16x2_t svldnt1_vnum_x2(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2)))
+svfloat16x2_t svldnt1_vnum_x2(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2)))
+svint16x2_t svldnt1_vnum_x2(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2)))
+svuint32x2_t svldnt1_vnum_x2(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2)))
+svfloat32x2_t svldnt1_vnum_x2(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2)))
+svint32x2_t svldnt1_vnum_x2(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4)))
+svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4)))
+svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4)))
+svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4)))
+svfloat64x4_t svldnt1_vnum_x4(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4)))
+svint64x4_t svldnt1_vnum_x4(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4)))
+svuint16x4_t svldnt1_vnum_x4(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4)))
+svbfloat16x4_t svldnt1_vnum_x4(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4)))
+svfloat16x4_t svldnt1_vnum_x4(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4)))
+svint16x4_t svldnt1_vnum_x4(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4)))
+svuint32x4_t svldnt1_vnum_x4(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4)))
+svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4)))
+svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2)))
+svint16_t svqrshrn_s16(svint32x2_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2)))
+svuint16_t svqrshrn_u16(svuint32x2_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2)))
+svuint16_t svqrshrun_u16(svint32x2_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m)))
+svuint8_t svrevd_m(svuint8_t, svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m)))
+svuint32_t svrevd_m(svuint32_t, svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m)))
+svuint64_t svrevd_m(svuint64_t, svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m)))
+svuint16_t svrevd_m(svuint16_t, svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m)))
+svbfloat16_t svrevd_m(svbfloat16_t, svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m)))
+svint8_t svrevd_m(svint8_t, svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m)))
+svfloat64_t svrevd_m(svfloat64_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m)))
+svfloat32_t svrevd_m(svfloat32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m)))
+svfloat16_t svrevd_m(svfloat16_t, svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m)))
+svint32_t svrevd_m(svint32_t, svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m)))
+svint64_t svrevd_m(svint64_t, svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m)))
+svint16_t svrevd_m(svint16_t, svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x)))
+svuint8_t svrevd_x(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x)))
+svuint32_t svrevd_x(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x)))
+svuint64_t svrevd_x(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x)))
+svuint16_t svrevd_x(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x)))
+svbfloat16_t svrevd_x(svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x)))
+svint8_t svrevd_x(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x)))
+svfloat64_t svrevd_x(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x)))
+svfloat32_t svrevd_x(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x)))
+svfloat16_t svrevd_x(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x)))
+svint32_t svrevd_x(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x)))
+svint64_t svrevd_x(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x)))
+svint16_t svrevd_x(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z)))
+svuint8_t svrevd_z(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z)))
+svuint32_t svrevd_z(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z)))
+svuint64_t svrevd_z(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z)))
+svuint16_t svrevd_z(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z)))
+svbfloat16_t svrevd_z(svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z)))
+svint8_t svrevd_z(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z)))
+svfloat64_t svrevd_z(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z)))
+svfloat32_t svrevd_z(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z)))
+svfloat16_t svrevd_z(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z)))
+svint32_t svrevd_z(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z)))
+svint64_t svrevd_z(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z)))
+svint16_t svrevd_z(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b)))
+svboolx2_t svset2(svboolx2_t, uint64_t, svbool_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b)))
+svboolx4_t svset4(svboolx4_t, uint64_t, svbool_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2)))
+void svst1(svcount_t, uint8_t *, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2)))
+void svst1(svcount_t, int8_t *, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2)))
+void svst1(svcount_t, uint64_t *, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2)))
+void svst1(svcount_t, float64_t *, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2)))
+void svst1(svcount_t, int64_t *, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2)))
+void svst1(svcount_t, uint16_t *, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2)))
+void svst1(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2)))
+void svst1(svcount_t, float16_t *, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2)))
+void svst1(svcount_t, int16_t *, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2)))
+void svst1(svcount_t, uint32_t *, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2)))
+void svst1(svcount_t, float32_t *, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2)))
+void svst1(svcount_t, int32_t *, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4)))
+void svst1(svcount_t, uint8_t *, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4)))
+void svst1(svcount_t, int8_t *, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4)))
+void svst1(svcount_t, uint64_t *, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4)))
+void svst1(svcount_t, float64_t *, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4)))
+void svst1(svcount_t, int64_t *, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4)))
+void svst1(svcount_t, uint16_t *, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4)))
+void svst1(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4)))
+void svst1(svcount_t, float16_t *, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4)))
+void svst1(svcount_t, int16_t *, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4)))
+void svst1(svcount_t, uint32_t *, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4)))
+void svst1(svcount_t, float32_t *, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4)))
+void svst1(svcount_t, int32_t *, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2)))
+void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2)))
+void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2)))
+void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2)))
+void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2)))
+void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2)))
+void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2)))
+void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2)))
+void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2)))
+void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2)))
+void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2)))
+void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2)))
+void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4)))
+void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4)))
+void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4)))
+void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4)))
+void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4)))
+void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4)))
+void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4)))
+void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4)))
+void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4)))
+void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4)))
+void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4)))
+void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4)))
+void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2)))
+void svstnt1(svcount_t, uint8_t *, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2)))
+void svstnt1(svcount_t, int8_t *, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2)))
+void svstnt1(svcount_t, uint64_t *, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2)))
+void svstnt1(svcount_t, float64_t *, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2)))
+void svstnt1(svcount_t, int64_t *, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2)))
+void svstnt1(svcount_t, uint16_t *, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2)))
+void svstnt1(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2)))
+void svstnt1(svcount_t, float16_t *, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2)))
+void svstnt1(svcount_t, int16_t *, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2)))
+void svstnt1(svcount_t, uint32_t *, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2)))
+void svstnt1(svcount_t, float32_t *, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2)))
+void svstnt1(svcount_t, int32_t *, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4)))
+void svstnt1(svcount_t, uint8_t *, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4)))
+void svstnt1(svcount_t, int8_t *, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4)))
+void svstnt1(svcount_t, uint64_t *, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4)))
+void svstnt1(svcount_t, float64_t *, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4)))
+void svstnt1(svcount_t, int64_t *, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4)))
+void svstnt1(svcount_t, uint16_t *, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4)))
+void svstnt1(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4)))
+void svstnt1(svcount_t, float16_t *, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4)))
+void svstnt1(svcount_t, int16_t *, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4)))
+void svstnt1(svcount_t, uint32_t *, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4)))
+void svstnt1(svcount_t, float32_t *, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4)))
+void svstnt1(svcount_t, int32_t *, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2)))
+void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2)))
+void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2)))
+void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2)))
+void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2)))
+void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2)))
+void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2)))
+void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2)))
+void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2)))
+void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2)))
+void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2)))
+void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2)))
+void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4)))
+void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4)))
+void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4)))
+void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4)))
+void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4)))
+void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4)))
+void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4)))
+void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4)))
+void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4)))
+void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4)))
+void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4)))
+void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4)))
+void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64)))
+svcount_t svwhilege_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64)))
+svcount_t svwhilege_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64)))
+svcount_t svwhilege_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64)))
+svcount_t svwhilege_c16(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64)))
+svcount_t svwhilege_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64)))
+svcount_t svwhilege_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64)))
+svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64)))
+svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2)))
+svboolx2_t svwhilege_b8_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2)))
+svboolx2_t svwhilege_b32_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2)))
+svboolx2_t svwhilege_b64_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2)))
+svboolx2_t svwhilege_b16_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2)))
+svboolx2_t svwhilege_b8_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2)))
+svboolx2_t svwhilege_b32_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2)))
+svboolx2_t svwhilege_b64_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2)))
+svboolx2_t svwhilege_b16_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64)))
+svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64)))
+svcount_t svwhilegt_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64)))
+svcount_t svwhilegt_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64)))
+svcount_t svwhilegt_c16(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64)))
+svcount_t svwhilegt_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64)))
+svcount_t svwhilegt_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64)))
+svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64)))
+svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2)))
+svboolx2_t svwhilegt_b8_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2)))
+svboolx2_t svwhilegt_b32_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2)))
+svboolx2_t svwhilegt_b64_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2)))
+svboolx2_t svwhilegt_b16_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2)))
+svboolx2_t svwhilegt_b8_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2)))
+svboolx2_t svwhilegt_b32_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2)))
+svboolx2_t svwhilegt_b64_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2)))
+svboolx2_t svwhilegt_b16_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64)))
+svcount_t svwhilele_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64)))
+svcount_t svwhilele_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64)))
+svcount_t svwhilele_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64)))
+svcount_t svwhilele_c16(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64)))
+svcount_t svwhilele_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64)))
+svcount_t svwhilele_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64)))
+svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64)))
+svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2)))
+svboolx2_t svwhilele_b8_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2)))
+svboolx2_t svwhilele_b32_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2)))
+svboolx2_t svwhilele_b64_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2)))
+svboolx2_t svwhilele_b16_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2)))
+svboolx2_t svwhilele_b8_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2)))
+svboolx2_t svwhilele_b32_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2)))
+svboolx2_t svwhilele_b64_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2)))
+svboolx2_t svwhilele_b16_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64)))
+svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64)))
+svcount_t svwhilelt_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64)))
+svcount_t svwhilelt_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64)))
+svcount_t svwhilelt_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64)))
+svcount_t svwhilelt_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64)))
+svcount_t svwhilelt_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64)))
+svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64)))
+svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2)))
+svboolx2_t svwhilelt_b8_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2)))
+svboolx2_t svwhilelt_b32_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2)))
+svboolx2_t svwhilelt_b64_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2)))
+svboolx2_t svwhilelt_b16_x2(uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2)))
+svboolx2_t svwhilelt_b8_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2)))
+svboolx2_t svwhilelt_b32_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2)))
+svboolx2_t svwhilelt_b64_x2(int64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2)))
+svboolx2_t svwhilelt_b16_x2(int64_t, int64_t);
#define svcvtnt_bf16_x svcvtnt_bf16_m
#define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m
#define svcvtnt_f16_x svcvtnt_f16_m
lib/include/arm_vector_types.h
@@ -0,0 +1,345 @@
+/*===---- arm_vector_types - ARM vector type ------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)
+#error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead"
+
+#endif
+#ifndef __ARM_NEON_TYPES_H
+#define __ARM_NEON_TYPES_H
+typedef float float32_t;
+typedef __fp16 float16_t;
+#ifdef __aarch64__
+typedef double float64_t;
+#endif
+
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
+typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
+typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
+typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
+typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
+typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
+typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
+typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
+typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
+typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
+typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
+typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
+typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
+typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
+typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
+#ifdef __aarch64__
+typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
+typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
+#endif
+
+typedef struct int8x8x2_t {
+ int8x8_t val[2];
+} int8x8x2_t;
+
+typedef struct int8x16x2_t {
+ int8x16_t val[2];
+} int8x16x2_t;
+
+typedef struct int16x4x2_t {
+ int16x4_t val[2];
+} int16x4x2_t;
+
+typedef struct int16x8x2_t {
+ int16x8_t val[2];
+} int16x8x2_t;
+
+typedef struct int32x2x2_t {
+ int32x2_t val[2];
+} int32x2x2_t;
+
+typedef struct int32x4x2_t {
+ int32x4_t val[2];
+} int32x4x2_t;
+
+typedef struct int64x1x2_t {
+ int64x1_t val[2];
+} int64x1x2_t;
+
+typedef struct int64x2x2_t {
+ int64x2_t val[2];
+} int64x2x2_t;
+
+typedef struct uint8x8x2_t {
+ uint8x8_t val[2];
+} uint8x8x2_t;
+
+typedef struct uint8x16x2_t {
+ uint8x16_t val[2];
+} uint8x16x2_t;
+
+typedef struct uint16x4x2_t {
+ uint16x4_t val[2];
+} uint16x4x2_t;
+
+typedef struct uint16x8x2_t {
+ uint16x8_t val[2];
+} uint16x8x2_t;
+
+typedef struct uint32x2x2_t {
+ uint32x2_t val[2];
+} uint32x2x2_t;
+
+typedef struct uint32x4x2_t {
+ uint32x4_t val[2];
+} uint32x4x2_t;
+
+typedef struct uint64x1x2_t {
+ uint64x1_t val[2];
+} uint64x1x2_t;
+
+typedef struct uint64x2x2_t {
+ uint64x2_t val[2];
+} uint64x2x2_t;
+
+typedef struct float16x4x2_t {
+ float16x4_t val[2];
+} float16x4x2_t;
+
+typedef struct float16x8x2_t {
+ float16x8_t val[2];
+} float16x8x2_t;
+
+typedef struct float32x2x2_t {
+ float32x2_t val[2];
+} float32x2x2_t;
+
+typedef struct float32x4x2_t {
+ float32x4_t val[2];
+} float32x4x2_t;
+
+#ifdef __aarch64__
+typedef struct float64x1x2_t {
+ float64x1_t val[2];
+} float64x1x2_t;
+
+typedef struct float64x2x2_t {
+ float64x2_t val[2];
+} float64x2x2_t;
+
+#endif
+typedef struct int8x8x3_t {
+ int8x8_t val[3];
+} int8x8x3_t;
+
+typedef struct int8x16x3_t {
+ int8x16_t val[3];
+} int8x16x3_t;
+
+typedef struct int16x4x3_t {
+ int16x4_t val[3];
+} int16x4x3_t;
+
+typedef struct int16x8x3_t {
+ int16x8_t val[3];
+} int16x8x3_t;
+
+typedef struct int32x2x3_t {
+ int32x2_t val[3];
+} int32x2x3_t;
+
+typedef struct int32x4x3_t {
+ int32x4_t val[3];
+} int32x4x3_t;
+
+typedef struct int64x1x3_t {
+ int64x1_t val[3];
+} int64x1x3_t;
+
+typedef struct int64x2x3_t {
+ int64x2_t val[3];
+} int64x2x3_t;
+
+typedef struct uint8x8x3_t {
+ uint8x8_t val[3];
+} uint8x8x3_t;
+
+typedef struct uint8x16x3_t {
+ uint8x16_t val[3];
+} uint8x16x3_t;
+
+typedef struct uint16x4x3_t {
+ uint16x4_t val[3];
+} uint16x4x3_t;
+
+typedef struct uint16x8x3_t {
+ uint16x8_t val[3];
+} uint16x8x3_t;
+
+typedef struct uint32x2x3_t {
+ uint32x2_t val[3];
+} uint32x2x3_t;
+
+typedef struct uint32x4x3_t {
+ uint32x4_t val[3];
+} uint32x4x3_t;
+
+typedef struct uint64x1x3_t {
+ uint64x1_t val[3];
+} uint64x1x3_t;
+
+typedef struct uint64x2x3_t {
+ uint64x2_t val[3];
+} uint64x2x3_t;
+
+typedef struct float16x4x3_t {
+ float16x4_t val[3];
+} float16x4x3_t;
+
+typedef struct float16x8x3_t {
+ float16x8_t val[3];
+} float16x8x3_t;
+
+typedef struct float32x2x3_t {
+ float32x2_t val[3];
+} float32x2x3_t;
+
+typedef struct float32x4x3_t {
+ float32x4_t val[3];
+} float32x4x3_t;
+
+#ifdef __aarch64__
+typedef struct float64x1x3_t {
+ float64x1_t val[3];
+} float64x1x3_t;
+
+typedef struct float64x2x3_t {
+ float64x2_t val[3];
+} float64x2x3_t;
+
+#endif
+typedef struct int8x8x4_t {
+ int8x8_t val[4];
+} int8x8x4_t;
+
+typedef struct int8x16x4_t {
+ int8x16_t val[4];
+} int8x16x4_t;
+
+typedef struct int16x4x4_t {
+ int16x4_t val[4];
+} int16x4x4_t;
+
+typedef struct int16x8x4_t {
+ int16x8_t val[4];
+} int16x8x4_t;
+
+typedef struct int32x2x4_t {
+ int32x2_t val[4];
+} int32x2x4_t;
+
+typedef struct int32x4x4_t {
+ int32x4_t val[4];
+} int32x4x4_t;
+
+typedef struct int64x1x4_t {
+ int64x1_t val[4];
+} int64x1x4_t;
+
+typedef struct int64x2x4_t {
+ int64x2_t val[4];
+} int64x2x4_t;
+
+typedef struct uint8x8x4_t {
+ uint8x8_t val[4];
+} uint8x8x4_t;
+
+typedef struct uint8x16x4_t {
+ uint8x16_t val[4];
+} uint8x16x4_t;
+
+typedef struct uint16x4x4_t {
+ uint16x4_t val[4];
+} uint16x4x4_t;
+
+typedef struct uint16x8x4_t {
+ uint16x8_t val[4];
+} uint16x8x4_t;
+
+typedef struct uint32x2x4_t {
+ uint32x2_t val[4];
+} uint32x2x4_t;
+
+typedef struct uint32x4x4_t {
+ uint32x4_t val[4];
+} uint32x4x4_t;
+
+typedef struct uint64x1x4_t {
+ uint64x1_t val[4];
+} uint64x1x4_t;
+
+typedef struct uint64x2x4_t {
+ uint64x2_t val[4];
+} uint64x2x4_t;
+
+typedef struct float16x4x4_t {
+ float16x4_t val[4];
+} float16x4x4_t;
+
+typedef struct float16x8x4_t {
+ float16x8_t val[4];
+} float16x8x4_t;
+
+typedef struct float32x2x4_t {
+ float32x2_t val[4];
+} float32x2x4_t;
+
+typedef struct float32x4x4_t {
+ float32x4_t val[4];
+} float32x4x4_t;
+
+#ifdef __aarch64__
+typedef struct float64x1x4_t {
+ float64x1_t val[4];
+} float64x1x4_t;
+
+typedef struct float64x2x4_t {
+ float64x2_t val[4];
+} float64x2x4_t;
+
+#endif
+typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
+typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
+
+typedef struct bfloat16x4x2_t {
+ bfloat16x4_t val[2];
+} bfloat16x4x2_t;
+
+typedef struct bfloat16x8x2_t {
+ bfloat16x8_t val[2];
+} bfloat16x8x2_t;
+
+typedef struct bfloat16x4x3_t {
+ bfloat16x4_t val[3];
+} bfloat16x4x3_t;
+
+typedef struct bfloat16x8x3_t {
+ bfloat16x8_t val[3];
+} bfloat16x8x3_t;
+
+typedef struct bfloat16x4x4_t {
+ bfloat16x4_t val[4];
+} bfloat16x4x4_t;
+
+typedef struct bfloat16x8x4_t {
+ bfloat16x8_t val[4];
+} bfloat16x8x4_t;
+
+#endif // __ARM_NEON_TYPES_H
lib/include/avx2intrin.h
@@ -15,8 +15,12 @@
#define __AVX2INTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256)))
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx2,no-evex512"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx2,no-evex512"), __min_vector_width__(128)))
/* SSE4 Multiple Packed Sums of Absolute Difference. */
/// Computes sixteen sum of absolute difference (SAD) operations on sets of
@@ -1307,6 +1311,23 @@ _mm256_min_epu32(__m256i __a, __m256i __b)
return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
}
+/// Creates a 32-bit integer mask from the most significant bit of each byte
+/// in the 256-bit integer vector in \a __a and returns the result.
+///
+/// \code{.operation}
+/// FOR i := 0 TO 31
+/// j := i*8
+/// result[i] := __a[j+7]
+/// ENDFOR
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c VPMOVMSKB instruction.
+///
+/// \param __a
+/// A 256-bit integer vector containing the source bytes.
+/// \returns The 32-bit integer mask.
static __inline__ int __DEFAULT_FN_ATTRS256
_mm256_movemask_epi8(__m256i __a)
{
@@ -2962,7 +2983,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
/// A pointer to the 32-byte aligned memory containing the vector to load.
/// \returns A 256-bit integer vector loaded from memory.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_stream_load_si256(__m256i const *__V)
+_mm256_stream_load_si256(const void *__V)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);
lib/include/avx512bf16intrin.h
@@ -20,10 +20,11 @@ typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
#define __DEFAULT_FN_ATTRS512 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \
__min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512bf16,no-evex512")))
/// Convert One BF16 Data to One Single Float Data.
///
lib/include/avx512bitalgintrin.h
@@ -15,7 +15,10 @@
#define __AVX512BITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512bitalg,evex512"), \
+ __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)
lib/include/avx512bwintrin.h
@@ -18,8 +18,12 @@ typedef unsigned int __mmask32;
typedef unsigned long long __mmask64;
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512bw,evex512"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512bw,no-evex512")))
static __inline __mmask32 __DEFAULT_FN_ATTRS
_knot_mask32(__mmask32 __M)
@@ -27,9 +31,7 @@ _knot_mask32(__mmask32 __M)
return __builtin_ia32_knotsi(__M);
}
-static __inline __mmask64 __DEFAULT_FN_ATTRS
-_knot_mask64(__mmask64 __M)
-{
+static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
return __builtin_ia32_knotdi(__M);
}
@@ -39,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_kand_mask64(__mmask64 __A, __mmask64 __B)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
+ __mmask64 __B) {
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
}
@@ -51,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_kandn_mask64(__mmask64 __A, __mmask64 __B)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
+ __mmask64 __B) {
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
}
@@ -63,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_kor_mask64(__mmask64 __A, __mmask64 __B)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
+ __mmask64 __B) {
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
}
@@ -75,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_kxnor_mask64(__mmask64 __A, __mmask64 __B)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
+ __mmask64 __B) {
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
}
@@ -87,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_kxor_mask64(__mmask64 __A, __mmask64 __B)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
+ __mmask64 __B) {
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}
@@ -112,14 +109,12 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
-{
+_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
-{
+_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
@@ -148,14 +143,12 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
-{
+_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
-{
+_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}
@@ -171,9 +164,8 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_kadd_mask64(__mmask64 __A, __mmask64 __B)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
+ __mmask64 __B) {
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
}
@@ -214,8 +206,7 @@ _load_mask32(__mmask32 *__A) {
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_load_mask64(__mmask64 *__A) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
}
@@ -224,8 +215,8 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
}
-static __inline__ void __DEFAULT_FN_ATTRS
-_store_mask64(__mmask64 *__A, __mmask64 __B) {
+static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
+ __mmask64 __B) {
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
}
@@ -1714,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
(__v64qi) _mm512_setzero_si512());
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
+ __mmask64 __B) {
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}
lib/include/avx512cdintrin.h
@@ -15,7 +15,9 @@
#define __AVX512CDINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512cd,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_conflict_epi64 (__m512i __A)
lib/include/avx512dqintrin.h
@@ -15,8 +15,10 @@
#define __AVX512DQINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
+#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512dq,no-evex512")))
static __inline __mmask8 __DEFAULT_FN_ATTRS
_knot_mask8(__mmask8 __M)
lib/include/avx512fintrin.h
@@ -167,9 +167,13 @@ typedef enum
} _MM_MANTISSA_SIGN_ENUM;
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
+#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512f,no-evex512"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512f,no-evex512")))
/* Create vectors with repeated elements */
lib/include/avx512fp16intrin.h
@@ -22,13 +22,15 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
- __min_vector_width__(512)))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512fp16,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS256 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512fp16,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512fp16,no-evex512"), \
__min_vector_width__(128)))
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
lib/include/avx512ifmaintrin.h
@@ -15,7 +15,9 @@
#define __IFMAINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512ifma,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
lib/include/avx512ifmavlintrin.h
@@ -15,8 +15,14 @@
#define __IFMAVLINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512ifma,avx512vl,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512ifma,avx512vl,no-evex512"), \
+ __min_vector_width__(256)))
#define _mm_madd52hi_epu64(X, Y, Z) \
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
lib/include/avx512pfintrin.h
@@ -14,9 +14,6 @@
#ifndef __AVX512PFINTRIN_H
#define __AVX512PFINTRIN_H
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
-
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(void const *)(addr), (int)(scale), \
@@ -92,6 +89,4 @@
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))
-#undef __DEFAULT_FN_ATTRS
-
#endif
lib/include/avx512vbmi2intrin.h
@@ -15,7 +15,7 @@
#define __AVX512VBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
lib/include/avx512vbmiintrin.h
@@ -15,8 +15,9 @@
#define __VBMIINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
-
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vbmi,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)
lib/include/avx512vbmivlintrin.h
@@ -15,9 +15,14 @@
#define __VBMIVLINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
-
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vbmi,avx512vl,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vbmi,avx512vl,no-evex512"), \
+ __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
lib/include/avx512vlbf16intrin.h
@@ -15,12 +15,14 @@
#ifndef __AVX512VLBF16INTRIN_H
#define __AVX512VLBF16INTRIN_H
-#define __DEFAULT_FN_ATTRS128 \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("avx512vl, avx512bf16"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("avx512vl, avx512bf16"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bf16,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bf16,no-evex512"), \
+ __min_vector_width__(256)))
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
lib/include/avx512vlbitalgintrin.h
@@ -15,8 +15,14 @@
#define __AVX512VLBITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bitalg,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bitalg,no-evex512"), \
+ __min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A)
lib/include/avx512vlbwintrin.h
@@ -15,8 +15,14 @@
#define __AVX512VLBWINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bw,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bw,no-evex512"), \
+ __min_vector_width__(256)))
/* Integer compare */
lib/include/avx512vlcdintrin.h
@@ -14,9 +14,14 @@
#define __AVX512VLCDINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
-
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512cd,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512cd,no-evex512"), \
+ __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmb_epi64 (__mmask8 __A)
lib/include/avx512vldqintrin.h
@@ -15,8 +15,14 @@
#define __AVX512VLDQINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512dq,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512dq,no-evex512"), \
+ __min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
lib/include/avx512vlfp16intrin.h
@@ -19,11 +19,11 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
- __target__("avx512fp16, avx512vl"), \
+ __target__("avx512fp16,avx512vl,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
- __target__("avx512fp16, avx512vl"), \
+ __target__("avx512fp16,avx512vl,no-evex512"), \
__min_vector_width__(128)))
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
lib/include/avx512vlintrin.h
@@ -14,8 +14,14 @@
#ifndef __AVX512VLINTRIN_H
#define __AVX512VLINTRIN_H
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,no-evex512"), \
+ __min_vector_width__(256)))
typedef short __v2hi __attribute__((__vector_size__(4)));
typedef char __v4qi __attribute__((__vector_size__(4)));
lib/include/avx512vlvbmi2intrin.h
@@ -15,8 +15,14 @@
#define __AVX512VLVBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512vbmi2,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512vbmi2,no-evex512"), \
+ __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
lib/include/avx512vlvnniintrin.h
@@ -15,8 +15,14 @@
#define __AVX512VLVNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512vnni,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512vnni,no-evex512"), \
+ __min_vector_width__(256)))
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
lib/include/avx512vlvp2intersectintrin.h
@@ -28,12 +28,14 @@
#ifndef _AVX512VLVP2INTERSECT_H
#define _AVX512VLVP2INTERSECT_H
-#define __DEFAULT_FN_ATTRS128 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512vp2intersect,no-evex512"), \
__min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512vp2intersect,no-evex512"), \
__min_vector_width__(256)))
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between dwords in operands __a and __b.
lib/include/avx512vnniintrin.h
@@ -15,8 +15,9 @@
#define __AVX512VNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
-
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vnni,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
lib/include/avx512vp2intersectintrin.h
@@ -28,8 +28,9 @@
#ifndef _AVX512VP2INTERSECT_H
#define _AVX512VP2INTERSECT_H
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vp2intersect,evex512"), \
__min_vector_width__(512)))
/// Store, in an even/odd pair of mask registers, the indicators of the
lib/include/avx512vpopcntdqintrin.h
@@ -17,7 +17,9 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vpopcntdq,evex512"), \
+ __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
lib/include/avx512vpopcntdqvlintrin.h
@@ -17,9 +17,13 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \
+ __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \
+ __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi64(__m128i __A) {
lib/include/avxintrin.h
@@ -50,8 +50,12 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
#endif
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
+ __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
+ __min_vector_width__(128)))
/* Arithmetic */
/// Adds two 256-bit vectors of [4 x double].
@@ -3563,7 +3567,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
/// \param __b
/// A 256-bit integer vector containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
-_mm256_stream_si256(__m256i *__a, __m256i __b)
+_mm256_stream_si256(void *__a, __m256i __b)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
@@ -3583,7 +3587,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x double] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
-_mm256_stream_pd(double *__a, __m256d __b)
+_mm256_stream_pd(void *__a, __m256d __b)
{
typedef __v4df __v4df_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
@@ -3604,7 +3608,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
/// \param __a
/// A 256-bit vector of [8 x float] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
-_mm256_stream_ps(float *__p, __m256 __a)
+_mm256_stream_ps(void *__p, __m256 __a)
{
typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
lib/include/bmiintrin.h
@@ -19,18 +19,17 @@
to use it as a potentially faster version of BSF. */
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
-#define _tzcnt_u16(a) (__tzcnt_u16((a)))
-
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 16-bit integer containing the number of trailing zero
/// bits in the operand.
+/// \see _tzcnt_u16
static __inline__ unsigned short __RELAXED_FN_ATTRS
__tzcnt_u16(unsigned short __X)
{
@@ -41,13 +40,30 @@ __tzcnt_u16(unsigned short __X)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+/// \code
+/// unsigned short _tzcnt_u16(unsigned short __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+/// An unsigned 16-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 16-bit integer containing the number of trailing zero
+/// bits in the operand.
+/// \see __tzcnt_u16
+#define _tzcnt_u16 __tzcnt_u16
+
+/// Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of trailing zero
/// bits in the operand.
-/// \see _mm_tzcnt_32
+/// \see { _mm_tzcnt_32 _tzcnt_u32 }
static __inline__ unsigned int __RELAXED_FN_ATTRS
__tzcnt_u32(unsigned int __X)
{
@@ -58,20 +74,35 @@ __tzcnt_u32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
-/// \returns An 32-bit integer containing the number of trailing zero bits in
+/// \returns A 32-bit integer containing the number of trailing zero bits in
/// the operand.
-/// \see __tzcnt_u32
+/// \see { __tzcnt_u32 _tzcnt_u32 }
static __inline__ int __RELAXED_FN_ATTRS
_mm_tzcnt_32(unsigned int __X)
{
return (int)__builtin_ia32_tzcnt_u32(__X);
}
-#define _tzcnt_u32(a) (__tzcnt_u32((a)))
+/// Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _tzcnt_u32(unsigned int __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+/// An unsigned 32-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 32-bit integer containing the number of trailing zero
+/// bits in the operand.
+/// \see { _mm_tzcnt_32 __tzcnt_u32 }
+#define _tzcnt_u32 __tzcnt_u32
#ifdef __x86_64__
@@ -79,13 +110,13 @@ _mm_tzcnt_32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of trailing zero
/// bits in the operand.
-/// \see _mm_tzcnt_64
+/// \see { _mm_tzcnt_64 _tzcnt_u64 }
static __inline__ unsigned long long __RELAXED_FN_ATTRS
__tzcnt_u64(unsigned long long __X)
{
@@ -96,20 +127,35 @@ __tzcnt_u64(unsigned long long __X)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
+/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An 64-bit integer containing the number of trailing zero bits in
/// the operand.
-/// \see __tzcnt_u64
+/// \see { __tzcnt_u64 _tzcnt_u64 }
static __inline__ long long __RELAXED_FN_ATTRS
_mm_tzcnt_64(unsigned long long __X)
{
return (long long)__builtin_ia32_tzcnt_u64(__X);
}
-#define _tzcnt_u64(a) (__tzcnt_u64((a)))
+/// Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _tzcnt_u64(unsigned long long __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+/// An unsigned 64-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 64-bit integer containing the number of trailing zero
+/// bits in the operand.
+/// \see { _mm_tzcnt_64 __tzcnt_u64
+#define _tzcnt_u64 __tzcnt_u64
#endif /* __x86_64__ */
@@ -121,21 +167,12 @@ _mm_tzcnt_64(unsigned long long __X)
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
-#define _andn_u32(a, b) (__andn_u32((a), (b)))
-
-/* _bextr_u32 != __bextr_u32 */
-#define _blsi_u32(a) (__blsi_u32((a)))
-
-#define _blsmsk_u32(a) (__blsmsk_u32((a)))
-
-#define _blsr_u32(a) (__blsr_u32((a)))
-
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> ANDN </c> instruction.
+/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned integer containing one of the operands.
@@ -143,19 +180,40 @@ _mm_tzcnt_64(unsigned long long __X)
/// An unsigned integer containing one of the operands.
/// \returns An unsigned integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
+/// \see _andn_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__andn_u32(unsigned int __X, unsigned int __Y)
{
return ~__X & __Y;
}
+/// Performs a bitwise AND of the second operand with the one's
+/// complement of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ANDN instruction.
+///
+/// \param __X
+/// An unsigned integer containing one of the operands.
+/// \param __Y
+/// An unsigned integer containing one of the operands.
+/// \returns An unsigned integer containing the bitwise AND of the second
+/// operand with the one's complement of the first operand.
+/// \see __andn_u32
+#define _andn_u32 __andn_u32
+
/* AMD-specified, double-leading-underscore version of BEXTR */
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
+/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@@ -178,7 +236,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
+/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@@ -203,7 +261,7 @@ _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
+/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@@ -224,69 +282,117 @@ _bextr2_u32(unsigned int __X, unsigned int __Y) {
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BLSI </c> instruction.
+/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be cleared.
/// \returns An unsigned integer containing the result of clearing the bits from
/// the source operand.
+/// \see _blsi_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsi_u32(unsigned int __X)
{
return __X & -__X;
}
+/// Clears all bits in the source except for the least significant bit
+/// containing a value of 1 and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _blsi_u32(unsigned int __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSI instruction.
+///
+/// \param __X
+/// An unsigned integer whose bits are to be cleared.
+/// \returns An unsigned integer containing the result of clearing the bits from
+/// the source operand.
+/// \see __blsi_u32
+#define _blsi_u32 __blsi_u32
+
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
+/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned integer used to create the mask.
/// \returns An unsigned integer containing the newly created mask.
+/// \see _blsmsk_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsmsk_u32(unsigned int __X)
{
return __X ^ (__X - 1);
}
+/// Creates a mask whose bits are set to 1, using bit 0 up to and
+/// including the least significant bit that is set to 1 in the source
+/// operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _blsmsk_u32(unsigned int __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSMSK instruction.
+///
+/// \param __X
+/// An unsigned integer used to create the mask.
+/// \returns An unsigned integer containing the newly created mask.
+/// \see __blsmsk_u32
+#define _blsmsk_u32 __blsmsk_u32
+
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BLSR </c> instruction.
+/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned integer containing the operand to be cleared.
/// \returns An unsigned integer containing the result of clearing the source
/// operand.
+/// \see _blsr_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsr_u32(unsigned int __X)
{
return __X & (__X - 1);
}
-#ifdef __x86_64__
-
-#define _andn_u64(a, b) (__andn_u64((a), (b)))
-
-/* _bextr_u64 != __bextr_u64 */
-#define _blsi_u64(a) (__blsi_u64((a)))
-
-#define _blsmsk_u64(a) (__blsmsk_u64((a)))
+/// Clears the least significant bit that is set to 1 in the source
+/// operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _bls4_u32(unsigned int __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSR instruction.
+///
+/// \param __X
+/// An unsigned integer containing the operand to be cleared.
+/// \returns An unsigned integer containing the result of clearing the source
+/// operand.
+/// \see __blsr_u32
+#define _blsr_u32 __blsr_u32
-#define _blsr_u64(a) (__blsr_u64((a)))
+#ifdef __x86_64__
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> ANDN </c> instruction.
+/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing one of the operands.
@@ -294,19 +400,41 @@ __blsr_u32(unsigned int __X)
/// An unsigned 64-bit integer containing one of the operands.
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
+/// \see _andn_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__andn_u64 (unsigned long long __X, unsigned long long __Y)
{
return ~__X & __Y;
}
+/// Performs a bitwise AND of the second operand with the one's
+/// complement of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _andn_u64(unsigned long long __X,
+/// unsigned long long __Y);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ANDN instruction.
+///
+/// \param __X
+/// An unsigned 64-bit integer containing one of the operands.
+/// \param __Y
+/// An unsigned 64-bit integer containing one of the operands.
+/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
+/// operand with the one's complement of the first operand.
+/// \see __andn_u64
+#define _andn_u64 __andn_u64
+
/* AMD-specified, double-leading-underscore version of BEXTR */
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
+/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@@ -329,7 +457,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
+/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@@ -354,7 +482,7 @@ _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
+/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@@ -375,52 +503,109 @@ _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BLSI </c> instruction.
+/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// bits from the source operand.
+/// \see _blsi_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsi_u64(unsigned long long __X)
{
return __X & -__X;
}
+/// Clears all bits in the source except for the least significant bit
+/// containing a value of 1 and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _blsi_u64(unsigned long long __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSI instruction.
+///
+/// \param __X
+/// An unsigned 64-bit integer whose bits are to be cleared.
+/// \returns An unsigned 64-bit integer containing the result of clearing the
+/// bits from the source operand.
+/// \see __blsi_u64
+#define _blsi_u64 __blsi_u64
+
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
+/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned 64-bit integer used to create the mask.
/// \returns An unsigned 64-bit integer containing the newly created mask.
+/// \see _blsmsk_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsmsk_u64(unsigned long long __X)
{
return __X ^ (__X - 1);
}
+/// Creates a mask whose bits are set to 1, using bit 0 up to and
+/// including the least significant bit that is set to 1 in the source
+/// operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _blsmsk_u64(unsigned long long __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSMSK instruction.
+///
+/// \param __X
+/// An unsigned 64-bit integer used to create the mask.
+/// \returns An unsigned 64-bit integer containing the newly created mask.
+/// \see __blsmsk_u64
+#define _blsmsk_u64 __blsmsk_u64
+
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
-/// This intrinsic corresponds to the <c> BLSR </c> instruction.
+/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing the operand to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// source operand.
+/// \see _blsr_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsr_u64(unsigned long long __X)
{
return __X & (__X - 1);
}
+/// Clears the least significant bit that is set to 1 in the source
+/// operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _blsr_u64(unsigned long long __X);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSR instruction.
+///
+/// \param __X
+/// An unsigned 64-bit integer containing the operand to be cleared.
+/// \returns An unsigned 64-bit integer containing the result of clearing the
+/// source operand.
+/// \see __blsr_u64
+#define _blsr_u64 __blsr_u64
+
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS
lib/include/emmintrin.h
@@ -50,11 +50,11 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
- __min_vector_width__(128)))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("sse2,no-evex512"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_MMX \
- __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \
- __min_vector_width__(64)))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
/// Adds lower double-precision values in both operands and returns the
/// sum in the lower 64 bits of the result. The upper 64 bits of the result
@@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A vector of [2 x double] containing the 64-bit values to be stored.
-static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
+static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
__m128d __a) {
__builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
}
@@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A 128-bit integer vector containing the values to be stored.
-static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
+static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
__m128i __a) {
__builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
}
@@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
/// A 32-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
- _mm_stream_si32(int *__p, int __a) {
- __builtin_ia32_movnti(__p, __a);
+ _mm_stream_si32(void *__p, int __a) {
+ __builtin_ia32_movnti((int *)__p, __a);
}
#ifdef __x86_64__
@@ -4003,8 +4003,8 @@ static __inline__ void
/// A 64-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
- _mm_stream_si64(long long *__p, long long __a) {
- __builtin_ia32_movnti64(__p, __a);
+ _mm_stream_si64(void *__p, long long __a) {
+ __builtin_ia32_movnti64((long long *)__p, __a);
}
#endif
lib/include/gfniintrin.h
@@ -15,19 +15,36 @@
#define __GFNIINTRIN_H
/* Default attributes for simple form (no masking). */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("gfni,no-evex512"), __min_vector_width__(128)))
/* Default attributes for YMM unmasked form. */
-#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS_Y \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx,gfni,no-evex512"), \
+ __min_vector_width__(256)))
/* Default attributes for ZMM unmasked forms. */
-#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS_Z \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512f,evex512,gfni"), \
+ __min_vector_width__(512)))
/* Default attributes for ZMM masked forms. */
-#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS_Z_MASK \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512bw,evex512,gfni"), \
+ __min_vector_width__(512)))
/* Default attributes for VLX masked forms. */
-#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS_VL128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512bw,avx512vl,gfni,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_VL256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512bw,avx512vl,gfni,no-evex512"), \
+ __min_vector_width__(256)))
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
lib/include/ia32intrin.h
@@ -26,167 +26,271 @@
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#endif
-/** Find the first set bit starting from the lsb. Result is undefined if
- * input is 0.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> BSF </c> instruction or the
- * <c> TZCNT </c> instruction.
- *
- * \param __A
- * A 32-bit integer operand.
- * \returns A 32-bit integer containing the bit number.
- */
+/// Find the first set bit starting from the lsb. Result is undefined if
+/// input is 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BSF instruction or the
+/// \c TZCNT instruction.
+///
+/// \param __A
+/// A 32-bit integer operand.
+/// \returns A 32-bit integer containing the bit number.
+/// \see _bit_scan_forward
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
__bsfd(int __A) {
return __builtin_ctz((unsigned int)__A);
}
-/** Find the first set bit starting from the msb. Result is undefined if
- * input is 0.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> BSR </c> instruction or the
- * <c> LZCNT </c> instruction and an <c> XOR </c>.
- *
- * \param __A
- * A 32-bit integer operand.
- * \returns A 32-bit integer containing the bit number.
- */
+/// Find the first set bit starting from the msb. Result is undefined if
+/// input is 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BSR instruction or the
+/// \c LZCNT instruction and an \c XOR.
+///
+/// \param __A
+/// A 32-bit integer operand.
+/// \returns A 32-bit integer containing the bit number.
+/// \see _bit_scan_reverse
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
__bsrd(int __A) {
return 31 - __builtin_clz((unsigned int)__A);
}
-/** Swaps the bytes in the input. Converting little endian to big endian or
- * vice versa.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> BSWAP </c> instruction.
- *
- * \param __A
- * A 32-bit integer operand.
- * \returns A 32-bit integer containing the swapped bytes.
- */
+/// Swaps the bytes in the input, converting little endian to big endian or
+/// vice versa.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BSWAP instruction.
+///
+/// \param __A
+/// A 32-bit integer operand.
+/// \returns A 32-bit integer containing the swapped bytes.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
__bswapd(int __A) {
return (int)__builtin_bswap32((unsigned int)__A);
}
+/// Swaps the bytes in the input, converting little endian to big endian or
+/// vice versa.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BSWAP instruction.
+///
+/// \param __A
+/// A 32-bit integer operand.
+/// \returns A 32-bit integer containing the swapped bytes.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_bswap(int __A) {
return (int)__builtin_bswap32((unsigned int)__A);
}
+/// Find the first set bit starting from the lsb. Result is undefined if
+/// input is 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// int _bit_scan_forward(int A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BSF instruction or the
+/// \c TZCNT instruction.
+///
+/// \param A
+/// A 32-bit integer operand.
+/// \returns A 32-bit integer containing the bit number.
+/// \see __bsfd
#define _bit_scan_forward(A) __bsfd((A))
+
+/// Find the first set bit starting from the msb. Result is undefined if
+/// input is 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// int _bit_scan_reverse(int A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BSR instruction or the
+/// \c LZCNT instruction and an \c XOR.
+///
+/// \param A
+/// A 32-bit integer operand.
+/// \returns A 32-bit integer containing the bit number.
+/// \see __bsrd
#define _bit_scan_reverse(A) __bsrd((A))
#ifdef __x86_64__
-/** Find the first set bit starting from the lsb. Result is undefined if
- * input is 0.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> BSF </c> instruction or the
- * <c> TZCNT </c> instruction.
- *
- * \param __A
- * A 64-bit integer operand.
- * \returns A 32-bit integer containing the bit number.
- */
+/// Find the first set bit starting from the lsb. Result is undefined if
+/// input is 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BSF instruction or the
+/// \c TZCNT instruction.
+///
+/// \param __A
+/// A 64-bit integer operand.
+/// \returns A 32-bit integer containing the bit number.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
__bsfq(long long __A) {
return (long long)__builtin_ctzll((unsigned long long)__A);
}
-/** Find the first set bit starting from the msb. Result is undefined if
- * input is 0.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> BSR </c> instruction or the
- * <c> LZCNT </c> instruction and an <c> XOR </c>.
- *
- * \param __A
- * A 64-bit integer operand.
- * \returns A 32-bit integer containing the bit number.
- */
+/// Find the first set bit starting from the msb. Result is undefined if
+/// input is 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BSR instruction or the
+/// \c LZCNT instruction and an \c XOR.
+///
+/// \param __A
+/// A 64-bit integer operand.
+/// \returns A 32-bit integer containing the bit number.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
__bsrq(long long __A) {
return 63 - __builtin_clzll((unsigned long long)__A);
}
-/** Swaps the bytes in the input. Converting little endian to big endian or
- * vice versa.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> BSWAP </c> instruction.
- *
- * \param __A
- * A 64-bit integer operand.
- * \returns A 64-bit integer containing the swapped bytes.
- */
+/// Swaps the bytes in the input. Converting little endian to big endian or
+/// vice versa.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BSWAP instruction.
+///
+/// \param __A
+/// A 64-bit integer operand.
+/// \returns A 64-bit integer containing the swapped bytes.
+/// \see _bswap64
static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR
__bswapq(long long __A) {
return (long long)__builtin_bswap64((unsigned long long)__A);
}
+/// Swaps the bytes in the input. Converting little endian to big endian or
+/// vice versa.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// long long _bswap64(long long A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BSWAP instruction.
+///
+/// \param A
+/// A 64-bit integer operand.
+/// \returns A 64-bit integer containing the swapped bytes.
+/// \see __bswapq
#define _bswap64(A) __bswapq((A))
-#endif
+#endif /* __x86_64__ */
-/** Counts the number of bits in the source operand having a value of 1.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
- * a sequence of arithmetic and logic ops to calculate it.
- *
- * \param __A
- * An unsigned 32-bit integer operand.
- * \returns A 32-bit integer containing the number of bits with value 1 in the
- * source operand.
- */
+/// Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c POPCNT instruction or a
+/// a sequence of arithmetic and logic ops to calculate it.
+///
+/// \param __A
+/// An unsigned 32-bit integer operand.
+/// \returns A 32-bit integer containing the number of bits with value 1 in the
+/// source operand.
+/// \see _popcnt32
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
__popcntd(unsigned int __A)
{
return __builtin_popcount(__A);
}
+/// Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// int _popcnt32(int A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c POPCNT instruction or a
+/// a sequence of arithmetic and logic ops to calculate it.
+///
+/// \param A
+/// An unsigned 32-bit integer operand.
+/// \returns A 32-bit integer containing the number of bits with value 1 in the
+/// source operand.
+/// \see __popcntd
#define _popcnt32(A) __popcntd((A))
#ifdef __x86_64__
-/** Counts the number of bits in the source operand having a value of 1.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
- * a sequence of arithmetic and logic ops to calculate it.
- *
- * \param __A
- * An unsigned 64-bit integer operand.
- * \returns A 64-bit integer containing the number of bits with value 1 in the
- * source operand.
- */
+/// Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c POPCNT instruction or a
+/// a sequence of arithmetic and logic ops to calculate it.
+///
+/// \param __A
+/// An unsigned 64-bit integer operand.
+/// \returns A 64-bit integer containing the number of bits with value 1 in the
+/// source operand.
+/// \see _popcnt64
static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR
__popcntq(unsigned long long __A)
{
return __builtin_popcountll(__A);
}
+/// Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// long long _popcnt64(unsigned long long A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c POPCNT instruction or a
+/// a sequence of arithmetic and logic ops to calculate it.
+///
+/// \param A
+/// An unsigned 64-bit integer operand.
+/// \returns A 64-bit integer containing the number of bits with value 1 in the
+/// source operand.
+/// \see __popcntq
#define _popcnt64(A) __popcntq((A))
#endif /* __x86_64__ */
#ifdef __x86_64__
+/// Returns the program status and control \c RFLAGS register with the \c VM
+/// and \c RF flags cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUSHFQ + \c POP instruction sequence.
+///
+/// \returns The 64-bit value of the RFLAGS register.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__readeflags(void)
{
return __builtin_ia32_readeflags_u64();
}
+/// Writes the specified value to the program status and control \c RFLAGS
+/// register. Reserved bits are not affected.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUSH + \c POPFQ instruction sequence.
+///
+/// \param __f
+/// The 64-bit value to write to \c RFLAGS.
static __inline__ void __DEFAULT_FN_ATTRS
__writeeflags(unsigned long long __f)
{
@@ -194,12 +298,29 @@ __writeeflags(unsigned long long __f)
}
#else /* !__x86_64__ */
+/// Returns the program status and control \c EFLAGS register with the \c VM
+/// and \c RF flags cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUSHFD + \c POP instruction sequence.
+///
+/// \returns The 32-bit value of the EFLAGS register.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__readeflags(void)
{
return __builtin_ia32_readeflags_u32();
}
+/// Writes the specified value to the program status and control \c EFLAGS
+/// register. Reserved bits are not affected.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUSH + \c POPFD instruction sequence.
+///
+/// \param __f
+/// The 32-bit value to write to \c EFLAGS.
static __inline__ void __DEFAULT_FN_ATTRS
__writeeflags(unsigned int __f)
{
@@ -207,123 +328,120 @@ __writeeflags(unsigned int __f)
}
#endif /* !__x86_64__ */
-/** Cast a 32-bit float value to a 32-bit unsigned integer value
- *
- * \headerfile <x86intrin.h>
- * This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
- * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
- *
- * \param __A
- * A 32-bit float value.
- * \returns a 32-bit unsigned integer containing the converted value.
- */
+/// Cast a 32-bit float value to a 32-bit unsigned integer value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVD / \c MOVD instruction in x86_64,
+/// and corresponds to the \c VMOVL / \c MOVL instruction in ia32.
+///
+/// \param __A
+/// A 32-bit float value.
+/// \returns a 32-bit unsigned integer containing the converted value.
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST
_castf32_u32(float __A) {
return __builtin_bit_cast(unsigned int, __A);
}
-/** Cast a 64-bit float value to a 64-bit unsigned integer value
- *
- * \headerfile <x86intrin.h>
- * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
- * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
- *
- * \param __A
- * A 64-bit float value.
- * \returns a 64-bit unsigned integer containing the converted value.
- */
+/// Cast a 64-bit float value to a 64-bit unsigned integer value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64,
+/// and corresponds to the \c VMOVL / \c MOVL instruction in ia32.
+///
+/// \param __A
+/// A 64-bit float value.
+/// \returns a 64-bit unsigned integer containing the converted value.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST
_castf64_u64(double __A) {
return __builtin_bit_cast(unsigned long long, __A);
}
-/** Cast a 32-bit unsigned integer value to a 32-bit float value
- *
- * \headerfile <x86intrin.h>
- * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
- * and corresponds to the <c> FLDS </c> instruction in ia32.
- *
- * \param __A
- * A 32-bit unsigned integer value.
- * \returns a 32-bit float value containing the converted value.
- */
+/// Cast a 32-bit unsigned integer value to a 32-bit float value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64,
+/// and corresponds to the \c FLDS instruction in ia32.
+///
+/// \param __A
+/// A 32-bit unsigned integer value.
+/// \returns a 32-bit float value containing the converted value.
static __inline__ float __DEFAULT_FN_ATTRS_CAST
_castu32_f32(unsigned int __A) {
return __builtin_bit_cast(float, __A);
}
-/** Cast a 64-bit unsigned integer value to a 64-bit float value
- *
- * \headerfile <x86intrin.h>
- * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
- * and corresponds to the <c> FLDL </c> instruction in ia32.
- *
- * \param __A
- * A 64-bit unsigned integer value.
- * \returns a 64-bit float value containing the converted value.
- */
+/// Cast a 64-bit unsigned integer value to a 64-bit float value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64,
+/// and corresponds to the \c FLDL instruction in ia32.
+///
+/// \param __A
+/// A 64-bit unsigned integer value.
+/// \returns a 64-bit float value containing the converted value.
static __inline__ double __DEFAULT_FN_ATTRS_CAST
_castu64_f64(unsigned long long __A) {
return __builtin_bit_cast(double, __A);
}
-/** Adds the unsigned integer operand to the CRC-32C checksum of the
- * unsigned char operand.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> CRC32B </c> instruction.
- *
- * \param __C
- * An unsigned integer operand to add to the CRC-32C checksum of operand
- * \a __D.
- * \param __D
- * An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
- * \returns The result of adding operand \a __C to the CRC-32C checksum of
- * operand \a __D.
- */
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+/// unsigned char operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CRC32B instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32b(unsigned int __C, unsigned char __D)
{
return __builtin_ia32_crc32qi(__C, __D);
}
-/** Adds the unsigned integer operand to the CRC-32C checksum of the
- * unsigned short operand.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> CRC32W </c> instruction.
- *
- * \param __C
- * An unsigned integer operand to add to the CRC-32C checksum of operand
- * \a __D.
- * \param __D
- * An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
- * \returns The result of adding operand \a __C to the CRC-32C checksum of
- * operand \a __D.
- */
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+/// unsigned short operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CRC32W instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32w(unsigned int __C, unsigned short __D)
{
return __builtin_ia32_crc32hi(__C, __D);
}
-/** Adds the unsigned integer operand to the CRC-32C checksum of the
- * second unsigned integer operand.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> CRC32D </c> instruction.
- *
- * \param __C
- * An unsigned integer operand to add to the CRC-32C checksum of operand
- * \a __D.
- * \param __D
- * An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
- * \returns The result of adding operand \a __C to the CRC-32C checksum of
- * operand \a __D.
- */
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+/// second unsigned integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CRC32D instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32d(unsigned int __C, unsigned int __D)
{
@@ -331,21 +449,20 @@ __crc32d(unsigned int __C, unsigned int __D)
}
#ifdef __x86_64__
-/** Adds the unsigned integer operand to the CRC-32C checksum of the
- * unsigned 64-bit integer operand.
- *
- * \headerfile <x86intrin.h>
- *
- * This intrinsic corresponds to the <c> CRC32Q </c> instruction.
- *
- * \param __C
- * An unsigned integer operand to add to the CRC-32C checksum of operand
- * \a __D.
- * \param __D
- * An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
- * \returns The result of adding operand \a __C to the CRC-32C checksum of
- * operand \a __D.
- */
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+/// unsigned 64-bit integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CRC32Q instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32
__crc32q(unsigned long long __C, unsigned long long __D)
{
@@ -353,19 +470,67 @@ __crc32q(unsigned long long __C, unsigned long long __D)
}
#endif /* __x86_64__ */
+/// Reads the specified performance monitoring counter. Refer to your
+/// processor's documentation to determine which performance counters are
+/// supported.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c RDPMC instruction.
+///
+/// \param __A
+/// The performance counter to read.
+/// \returns The 64-bit value read from the performance counter.
+/// \see _rdpmc
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__rdpmc(int __A) {
return __builtin_ia32_rdpmc(__A);
}
-/* __rdtscp */
+/// Reads the processor's time stamp counter and the \c IA32_TSC_AUX MSR
+/// \c (0xc0000103).
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c RDTSCP instruction.
+///
+/// \param __A
+/// Address of where to store the 32-bit \c IA32_TSC_AUX value.
+/// \returns The 64-bit value of the time stamp counter.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__rdtscp(unsigned int *__A) {
return __builtin_ia32_rdtscp(__A);
}
+/// Reads the processor's time stamp counter.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _rdtsc();
+/// \endcode
+///
+/// This intrinsic corresponds to the \c RDTSC instruction.
+///
+/// \returns The 64-bit value of the time stamp counter.
#define _rdtsc() __rdtsc()
+/// Reads the specified performance monitoring counter. Refer to your
+/// processor's documentation to determine which performance counters are
+/// supported.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _rdpmc(int A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c RDPMC instruction.
+///
+/// \param A
+/// The performance counter to read.
+/// \returns The 64-bit value read from the performance counter.
+/// \see __rdpmc
#define _rdpmc(A) __rdpmc(A)
static __inline__ void __DEFAULT_FN_ATTRS
@@ -373,42 +538,150 @@ _wbinvd(void) {
__builtin_ia32_wbinvd();
}
+/// Rotates an 8-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param __X
+/// The unsigned 8-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
__rolb(unsigned char __X, int __C) {
return __builtin_rotateleft8(__X, __C);
}
+/// Rotates an 8-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param __X
+/// The unsigned 8-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
__rorb(unsigned char __X, int __C) {
return __builtin_rotateright8(__X, __C);
}
+/// Rotates a 16-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param __X
+/// The unsigned 16-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see _rotwl
static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR
__rolw(unsigned short __X, int __C) {
return __builtin_rotateleft16(__X, __C);
}
+/// Rotates a 16-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param __X
+/// The unsigned 16-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see _rotwr
static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR
__rorw(unsigned short __X, int __C) {
return __builtin_rotateright16(__X, __C);
}
+/// Rotates a 32-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param __X
+/// The unsigned 32-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see _rotl
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
__rold(unsigned int __X, int __C) {
return __builtin_rotateleft32(__X, (unsigned int)__C);
}
+/// Rotates a 32-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param __X
+/// The unsigned 32-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see _rotr
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
__rord(unsigned int __X, int __C) {
return __builtin_rotateright32(__X, (unsigned int)__C);
}
#ifdef __x86_64__
+/// Rotates a 64-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param __X
+/// The unsigned 64-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
__rolq(unsigned long long __X, int __C) {
return __builtin_rotateleft64(__X, (unsigned long long)__C);
}
+/// Rotates a 64-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param __X
+/// The unsigned 64-bit value to be rotated.
+/// \param __C
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
__rorq(unsigned long long __X, int __C) {
return __builtin_rotateright64(__X, (unsigned long long)__C);
@@ -419,18 +692,167 @@ __rorq(unsigned long long __X, int __C) {
/* These are already provided as builtins for MSVC. */
/* Select the correct function based on the size of long. */
#ifdef __LP64__
+/// Rotates a 64-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _lrotl(unsigned long long a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param a
+/// The unsigned 64-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rolq
#define _lrotl(a,b) __rolq((a), (b))
+
+/// Rotates a 64-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _lrotr(unsigned long long a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param a
+/// The unsigned 64-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rorq
#define _lrotr(a,b) __rorq((a), (b))
-#else
+#else // __LP64__
+/// Rotates a 32-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _lrotl(unsigned int a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param a
+/// The unsigned 32-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rold
#define _lrotl(a,b) __rold((a), (b))
+
+/// Rotates a 32-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _lrotr(unsigned int a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param a
+/// The unsigned 32-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rord
#define _lrotr(a,b) __rord((a), (b))
-#endif
+#endif // __LP64__
+
+/// Rotates a 32-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _rotl(unsigned int a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param a
+/// The unsigned 32-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rold
#define _rotl(a,b) __rold((a), (b))
+
+/// Rotates a 32-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _rotr(unsigned int a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param a
+/// The unsigned 32-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rord
#define _rotr(a,b) __rord((a), (b))
#endif // _MSC_VER
/* These are not builtins so need to be provided in all modes. */
+/// Rotates a 16-bit value to the left by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned short _rotwl(unsigned short a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROL instruction.
+///
+/// \param a
+/// The unsigned 16-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rolw
#define _rotwl(a,b) __rolw((a), (b))
+
+/// Rotates a 16-bit value to the right by the specified number of bits.
+/// This operation is undefined if the number of bits exceeds the size of
+/// the value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned short _rotwr(unsigned short a, int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ROR instruction.
+///
+/// \param a
+/// The unsigned 16-bit value to be rotated.
+/// \param b
+/// The number of bits to rotate the value.
+/// \returns The rotated value.
+/// \see __rorw
#define _rotwr(a,b) __rorw((a), (b))
#undef __DEFAULT_FN_ATTRS
lib/include/immintrin.h
@@ -291,11 +291,13 @@
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPID__)
-/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
+/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
+///
+/// \returns The 32-bit contents of the MSR.
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void) {
return __builtin_ia32_rdpid();
@@ -488,6 +490,15 @@ _writegsbase_u64(unsigned long long __V)
* field inside of it.
*/
+/// Load a 16-bit value from memory and swap its bytes.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the MOVBE instruction.
+///
+/// \param __P
+/// A pointer to the 16-bit value to load.
+/// \returns The byte-swapped value.
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P) {
struct __loadu_i16 {
@@ -496,6 +507,16 @@ _loadbe_i16(void const * __P) {
return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
}
+/// Swap the bytes of a 16-bit value and store it to memory.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the MOVBE instruction.
+///
+/// \param __P
+/// A pointer to the memory for storing the swapped value.
+/// \param __D
+/// The 16-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P, short __D) {
struct __storeu_i16 {
@@ -504,6 +525,15 @@ _storebe_i16(void * __P, short __D) {
((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
}
+/// Load a 32-bit value from memory and swap its bytes.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the MOVBE instruction.
+///
+/// \param __P
+/// A pointer to the 32-bit value to load.
+/// \returns The byte-swapped value.
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P) {
struct __loadu_i32 {
@@ -512,6 +542,16 @@ _loadbe_i32(void const * __P) {
return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
}
+/// Swap the bytes of a 32-bit value and store it to memory.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the MOVBE instruction.
+///
+/// \param __P
+/// A pointer to the memory for storing the swapped value.
+/// \param __D
+/// The 32-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P, int __D) {
struct __storeu_i32 {
@@ -521,6 +561,15 @@ _storebe_i32(void * __P, int __D) {
}
#ifdef __x86_64__
+/// Load a 64-bit value from memory and swap its bytes.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the MOVBE instruction.
+///
+/// \param __P
+/// A pointer to the 64-bit value to load.
+/// \returns The byte-swapped value.
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P) {
struct __loadu_i64 {
@@ -529,6 +578,16 @@ _loadbe_i64(void const * __P) {
return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
}
+/// Swap the bytes of a 64-bit value and store it to memory.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the MOVBE instruction.
+///
+/// \param __P
+/// A pointer to the memory for storing the swapped value.
+/// \param __D
+/// The 64-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P, long long __D) {
struct __storeu_i64 {
@@ -578,9 +637,13 @@ _storebe_i64(void * __P, long long __D) {
#include <cetintrin.h>
#endif
-/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
- * whereas others are also available at all times. */
+/* Intrinsics inside adcintrin.h are available at all times. */
+#include <adcintrin.h>
+
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
+ defined(__ADX__)
#include <adxintrin.h>
+#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDSEED__)
lib/include/intrin.h
@@ -572,6 +572,22 @@ unsigned char __readx18byte(unsigned long offset);
unsigned short __readx18word(unsigned long offset);
unsigned long __readx18dword(unsigned long offset);
unsigned __int64 __readx18qword(unsigned long offset);
+
+double _CopyDoubleFromInt64(__int64);
+float _CopyFloatFromInt32(__int32);
+__int32 _CopyInt32FromFloat(float);
+__int64 _CopyInt64FromDouble(double);
+
+unsigned int _CountLeadingOnes(unsigned long);
+unsigned int _CountLeadingOnes64(unsigned __int64);
+unsigned int _CountLeadingSigns(long);
+unsigned int _CountLeadingSigns64(__int64);
+unsigned int _CountLeadingZeros(unsigned long);
+unsigned int _CountLeadingZeros64(unsigned _int64);
+unsigned int _CountOneBits(unsigned long);
+unsigned int _CountOneBits64(unsigned __int64);
+
+void __cdecl __prefetch(void *);
#endif
/*----------------------------------------------------------------------------*\
lib/include/larchintrin.h
@@ -156,7 +156,7 @@ extern __inline unsigned char
return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
}
-extern __inline unsigned char
+extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__iocsrrd_h(unsigned int _1) {
return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
@@ -228,6 +228,18 @@ extern __inline void
((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2)))
#endif
+#define __frecipe_s(/*float*/ _1) \
+ (float)__builtin_loongarch_frecipe_s((float)_1)
+
+#define __frecipe_d(/*double*/ _1) \
+ (double)__builtin_loongarch_frecipe_d((double)_1)
+
+#define __frsqrte_s(/*float*/ _1) \
+ (float)__builtin_loongarch_frsqrte_s((float)_1)
+
+#define __frsqrte_d(/*double*/ _1) \
+ (double)__builtin_loongarch_frsqrte_d((double)_1)
+
#ifdef __cplusplus
}
#endif
lib/include/lasxintrin.h
@@ -0,0 +1,3884 @@
+/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _LOONGSON_ASXINTRIN_H
+#define _LOONGSON_ASXINTRIN_H 1
+
+#if defined(__loongarch_asx)
+
+typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
+typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1)));
+typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32)));
+typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1)));
+typedef short v16i16 __attribute__((vector_size(32), aligned(32)));
+typedef short v16i16_h __attribute__((vector_size(32), aligned(2)));
+typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32)));
+typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2)));
+typedef int v8i32 __attribute__((vector_size(32), aligned(32)));
+typedef int v8i32_w __attribute__((vector_size(32), aligned(4)));
+typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32)));
+typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4)));
+typedef long long v4i64 __attribute__((vector_size(32), aligned(32)));
+typedef long long v4i64_d __attribute__((vector_size(32), aligned(8)));
+typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32)));
+typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8)));
+typedef float v8f32 __attribute__((vector_size(32), aligned(32)));
+typedef float v8f32_w __attribute__((vector_size(32), aligned(4)));
+typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
+typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
+
+typedef double v4f64 __attribute__((vector_size(32), aligned(32)));
+typedef double v4f64_d __attribute__((vector_size(32), aligned(8)));
+
+typedef float __m256 __attribute__((__vector_size__(32), __may_alias__));
+typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__));
+typedef double __m256d __attribute__((__vector_size__(32), __may_alias__));
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsll_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsll_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsll_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsll_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsra_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsra_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsra_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsra_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrar_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrar_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrar_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrar_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrl_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrl_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrl_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrl_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrlr_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrlr_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrlr_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrlr_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitclr_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitclr_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitclr_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitclr_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2)))
+
+#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2)))
+
+#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2)))
+
+#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitset_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitset_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitset_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitset_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2)))
+
+#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2)))
+
+#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2)))
+
+#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitrev_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitrev_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitrev_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitrev_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2)))
+
+#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2)))
+
+#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2)))
+
+#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadd_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadd_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadd_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadd_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2)))
+
+#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2)))
+
+#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2)))
+
+#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsub_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsub_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsub_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsub_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2)))
+
+#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2)))
+
+#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2)))
+
+#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmax_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2)))
+
+#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2)))
+
+#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2)))
+
+#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmin_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2)))
+
+#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2)))
+
+#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2)))
+
+#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvseq_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvseq_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvseq_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvseq_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvslt_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2)))
+
+#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2)))
+
+#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2)))
+
+#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2);
+}
+
+#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsle_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2)))
+
+#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2)))
+
+#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2)))
+
+#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2)))
+
+#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2)))
+
+#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2)))
+
+#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2)))
+
+#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2)))
+
+#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadda_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadda_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadda_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadda_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsadd_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavg_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvavgr_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssub_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvabsd_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmul_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmul_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmul_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmul_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvdiv_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmod_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \
+ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \
+ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickev_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickev_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickev_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickev_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickod_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickod_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickod_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpickod_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvh_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvh_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvh_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvh_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvl_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvl_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvl_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvilvl_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackev_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackev_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackev_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackev_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackod_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackod_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackod_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpackod_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvand_v(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2);
+}
+
+#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvor_v(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2);
+}
+
+#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvnor_v(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2);
+}
+
+#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvxor_v(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2);
+}
+
+#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3);
+}
+
+#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3)))
+
+#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplgr2vr_b(int _1) {
+ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplgr2vr_h(int _1) {
+ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplgr2vr_w(int _1) {
+ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplgr2vr_d(long int _1) {
+ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpcnt_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpcnt_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpcnt_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvpcnt_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclo_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclo_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclo_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclo_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclz_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclz_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclz_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvclz_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfadd_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfadd_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfsub_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfsub_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfmul_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfmul_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfdiv_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfdiv_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) {
+ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfmin_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfmin_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfmina_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfmina_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfmax_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfmax_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) {
+ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) {
+ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfclass_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfclass_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfsqrt_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfsqrt_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrecip_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrecip_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrecipe_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrecipe_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrint_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrint_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrsqrt_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrsqrt_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrsqrte_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrsqrte_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvflogb_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvflogb_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfcvth_s_h(__m256i _1) {
+ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfcvth_d_s(__m256 _1) {
+ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfcvtl_s_h(__m256i _1) {
+ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfcvtl_d_s(__m256 _1) {
+ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftint_w_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftint_l_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftint_wu_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftint_lu_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrz_w_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrz_l_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrz_wu_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrz_lu_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvffint_s_w(__m256i _1) {
+ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvffint_d_l(__m256i _1) {
+ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvffint_s_wu(__m256i _1) {
+ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvffint_d_lu(__m256i _1) {
+ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve_b(__m256i _1, int _2) {
+ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve_h(__m256i _1, int _2) {
+ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve_w(__m256i _1, int _2) {
+ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve_d(__m256i _1, int _2) {
+ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2);
+}
+
+#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvandn_v(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvneg_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvneg_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvneg_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvneg_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmuh_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2)))
+
+#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2)))
+
+#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsran_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsran_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsran_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssran_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssran_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssran_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2);
+}
+
+#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3);
+}
+
+#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2)))
+
+#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2)))
+
+#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmskltz_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmskltz_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmskltz_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmskltz_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsigncov_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsigncov_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsigncov_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsigncov_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) {
+ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) {
+ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) {
+ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) {
+ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) {
+ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) {
+ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) {
+ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) {
+ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrne_w_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrne_l_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrp_w_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrp_l_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrm_w_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrm_l_d(__m256d _1) {
+ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftint_w_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvffint_s_l(__m256i _1, __m256i _2) {
+ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftinth_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintl_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvffinth_d_w(__m256i _1) {
+ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvffintl_d_w(__m256i _1) {
+ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrzh_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrzl_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrph_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrpl_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrmh_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrml_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrneh_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvftintrnel_l_s(__m256 _1) {
+ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrintrne_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrintrne_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrintrz_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrintrz_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrintrp_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrintrp_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+ __lasx_xvfrintrm_s(__m256 _1) {
+ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+ __lasx_xvfrintrm_d(__m256d _1) {
+ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1);
+}
+
+#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \
+ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2)))
+
+#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \
+ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3)))
+
+#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \
+ /*idx*/ _4) \
+ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4)))
+
+#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \
+ /*idx*/ _4) \
+ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4)))
+
+#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \
+ /*idx*/ _4) \
+ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4)))
+
+#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \
+ /*idx*/ _4) \
+ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4)))
+
+#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \
+ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \
+ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \
+ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvorn_v(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2);
+}
+
+#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvldx(void const *_1, long int _2) {
+ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void
+ __lasx_xvstx(__m256i _1, void *_2, long int _3) {
+ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvextl_qu_du(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1);
+}
+
+#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \
+ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3)))
+
+#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \
+ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve0_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve0_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve0_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve0_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvreplve0_q(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_h_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_w_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_d_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_w_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_d_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_d_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_hu_bu(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_wu_hu(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_du_wu(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_wu_bu(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_du_hu(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_vext2xv_du_bu(__m256i _1) {
+ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1);
+}
+
+#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \
+ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \
+ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvperm_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2);
+}
+
+#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \
+ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2)))
+
+#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \
+ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2)))
+
+#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \
+ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2)))
+
+#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \
+ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2)))
+
+#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2)))
+
+#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \
+ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2)))
+
+#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \
+ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2,
+ (v16i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2,
+ (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2,
+ (v4u64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2,
+ (v8u32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2,
+ (v16u16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2,
+ (v32u8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2,
+ (v16i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2,
+ (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2,
+ (v4u64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2,
+ (v8u32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2,
+ (v16u16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2,
+ (v32u8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2,
+ (v4i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2,
+ (v8i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2,
+ (v16i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2,
+ (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2,
+ (v4i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2,
+ (v8i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2,
+ (v16i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) {
+ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2,
+ (v32i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvrotr_b(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvrotr_h(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvrotr_w(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvrotr_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvadd_q(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvsub_q(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) {
+ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmskgez_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvmsknz_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_h_b(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_w_h(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_d_w(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_q_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_hu_bu(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_wu_hu(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_du_wu(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvexth_qu_du(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1);
+}
+
+#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \
+ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2)))
+
+#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \
+ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2)))
+
+#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \
+ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2)))
+
+#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \
+ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvextl_q_d(__m256i _1) {
+ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1);
+}
+
+#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3)))
+
+#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3)))
+
+#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3)))
+
+#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \
+ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3)))
+
+#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1)))
+
+#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1)))
+
+#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1)))
+
+#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1)))
+
+#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1)))
+
+#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1)))
+
+#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1)))
+
+#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1)))
+
+#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1)))
+
+#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) {
+ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2);
+}
+
+#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \
+ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2)))
+
+#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \
+ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2)))
+
+#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1)))
+
+#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1)))
+
+#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1)))
+
+#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
+
+#endif /* defined(__loongarch_asx). */
+#endif /* _LOONGSON_ASXINTRIN_H. */
lib/include/limits.h
@@ -66,10 +66,8 @@
#define CHAR_BIT __CHAR_BIT__
-/* C2x 5.2.4.2.1 */
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+/* C23 5.2.4.2.1 */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define BOOL_WIDTH __BOOL_WIDTH__
#define CHAR_WIDTH CHAR_BIT
#define SCHAR_WIDTH CHAR_BIT
lib/include/lsxintrin.h
@@ -0,0 +1,3750 @@
+/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _LOONGSON_SXINTRIN_H
+#define _LOONGSON_SXINTRIN_H 1
+
+#if defined(__loongarch_sx)
+typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));
+typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));
+typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));
+typedef short v8i16 __attribute__((vector_size(16), aligned(16)));
+typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));
+typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));
+typedef int v4i32 __attribute__((vector_size(16), aligned(16)));
+typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));
+typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));
+typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));
+typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));
+typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));
+typedef float v4f32 __attribute__((vector_size(16), aligned(16)));
+typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));
+typedef double v2f64 __attribute__((vector_size(16), aligned(16)));
+typedef double v2f64_d __attribute__((vector_size(16), aligned(8)));
+
+typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__));
+typedef float __m128 __attribute__((__vector_size__(16), __may_alias__));
+typedef double __m128d __attribute__((__vector_size__(16), __may_alias__));
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsll_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsll_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsll_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsll_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2)))
+
+#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2)))
+
+#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2)))
+
+#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsra_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsra_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsra_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsra_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2)))
+
+#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2)))
+
+#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2)))
+
+#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrar_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrar_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrar_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrar_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2)))
+
+#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2)))
+
+#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2)))
+
+#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrl_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrl_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrl_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrl_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2)))
+
+#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2)))
+
+#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2)))
+
+#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrlr_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrlr_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrlr_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrlr_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2)))
+
+#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2)))
+
+#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2)))
+
+#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitclr_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitclr_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitclr_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitclr_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2)))
+
+#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2)))
+
+#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2)))
+
+#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitset_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitset_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitset_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitset_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2)))
+
+#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2)))
+
+#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2)))
+
+#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitrev_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitrev_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitrev_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitrev_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2)))
+
+#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2)))
+
+#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2)))
+
+#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadd_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadd_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadd_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadd_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2)))
+
+#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2)))
+
+#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2)))
+
+#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsub_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsub_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsub_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsub_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2)))
+
+#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2)))
+
+#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2)))
+
+#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2)))
+
+#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2)))
+
+#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2)))
+
+#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmax_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2)))
+
+#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2)))
+
+#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2)))
+
+#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2)))
+
+#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2)))
+
+#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2)))
+
+#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmin_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2)))
+
+#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2)))
+
+#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2)))
+
+#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vseq_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vseq_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vseq_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vseq_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2)))
+
+#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2)))
+
+#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2)))
+
+#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2)))
+
+#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2)))
+
+#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2)))
+
+#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vslt_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2)))
+
+#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2)))
+
+#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2)))
+
+#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2)))
+
+#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2)))
+
+#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2)))
+
+#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsle_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2)))
+
+#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2)))
+
+#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2)))
+
+#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2)))
+
+#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2)))
+
+#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2)))
+
+#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2)))
+
+#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2)))
+
+#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2)))
+
+#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2)))
+
+#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2)))
+
+#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadda_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadda_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadda_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadda_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsadd_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavg_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vavgr_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssub_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vabsd_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmul_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmul_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmul_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmul_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vdiv_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmod_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplve_b(__m128i _1, int _2) {
+ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplve_h(__m128i _1, int _2) {
+ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplve_w(__m128i _1, int _2) {
+ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplve_d(__m128i _1, int _2) {
+ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2);
+}
+
+#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2)))
+
+#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2)))
+
+#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \
+ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2)))
+
+#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \
+ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickev_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickev_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickev_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickev_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickod_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickod_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickod_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpickod_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvh_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvh_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvh_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvh_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvl_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvl_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvl_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vilvl_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackev_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackev_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackev_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackev_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackod_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackod_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackod_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpackod_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vand_v(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2);
+}
+
+#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \
+ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vor_v(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2);
+}
+
+#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \
+ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vnor_v(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2);
+}
+
+#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \
+ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vxor_v(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2);
+}
+
+#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \
+ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3);
+}
+
+#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \
+ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3)))
+
+#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \
+ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2)))
+
+#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \
+ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2)))
+
+#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \
+ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplgr2vr_b(int _1) {
+ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplgr2vr_h(int _1) {
+ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplgr2vr_w(int _1) {
+ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vreplgr2vr_d(long int _1) {
+ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpcnt_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpcnt_h(__m128i _1) {
+ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpcnt_w(__m128i _1) {
+ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vpcnt_d(__m128i _1) {
+ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclo_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclo_h(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclo_w(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclo_d(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclz_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclz_h(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclz_w(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vclz_d(__m128i _1) {
+ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1);
+}
+
+#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2)))
+
+#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2)))
+
+#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \
+ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2)))
+
+#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \
+ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2)))
+
+#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2)))
+
+#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2)))
+
+#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \
+ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2)))
+
+#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \
+ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2)))
+
+#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3)))
+
+#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \
+ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3)))
+
+#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \
+ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3)))
+
+#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \
+ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfadd_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfadd_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfsub_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfsub_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfmul_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfmul_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfdiv_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfdiv_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) {
+ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfmin_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfmin_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfmina_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfmina_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfmax_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfmax_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfmaxa_s(__m128 _1, __m128 _2) {
+ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfmaxa_d(__m128d _1, __m128d _2) {
+ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfclass_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfclass_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfsqrt_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfsqrt_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrecip_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrecip_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrecipe_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrecipe_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrint_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrint_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrsqrt_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrsqrt_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrsqrte_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrsqrte_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vflogb_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vflogb_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfcvth_s_h(__m128i _1) {
+ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfcvth_d_s(__m128 _1) {
+ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfcvtl_s_h(__m128i _1) {
+ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfcvtl_d_s(__m128 _1) {
+ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftint_w_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftint_l_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftint_wu_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftint_lu_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrz_w_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrz_l_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrz_wu_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrz_lu_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vffint_s_w(__m128i _1) {
+ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vffint_d_l(__m128i _1) {
+ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vffint_s_wu(__m128i _1) {
+ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vffint_d_lu(__m128i _1) {
+ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vandn_v(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vneg_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vneg_h(__m128i _1) {
+ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vneg_w(__m128i _1) {
+ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vneg_d(__m128i _1) {
+ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmuh_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2)))
+
+#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2)))
+
+#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2)))
+
+#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2)))
+
+#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2)))
+
+#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsran_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsran_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsran_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssran_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssran_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssran_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssran_bu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssran_hu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssran_wu_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrln_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrln_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrln_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2);
+}
+
+#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3);
+}
+
+#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \
+ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2)))
+
+#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2)))
+
+#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \
+ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \
+ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \
+ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \
+ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmskltz_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmskltz_h(__m128i _1) {
+ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmskltz_w(__m128i _1) {
+ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmskltz_d(__m128i _1) {
+ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsigncov_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsigncov_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsigncov_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsigncov_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) {
+ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) {
+ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) {
+ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) {
+ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) {
+ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) {
+ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) {
+ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) {
+ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrne_w_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrne_l_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrp_w_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrp_l_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrm_w_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrm_l_d(__m128d _1) {
+ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftint_w_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vffint_s_l(__m128i _1, __m128i _2) {
+ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintl_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftinth_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vffinth_d_w(__m128i _1) {
+ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vffintl_d_w(__m128i _1) {
+ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrzl_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrzh_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrpl_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrph_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrml_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrmh_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrnel_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vftintrneh_l_s(__m128 _1) {
+ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrintrne_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrintrne_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrintrz_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrintrz_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrintrp_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrintrp_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+ __lsx_vfrintrm_s(__m128 _1) {
+ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+ __lsx_vfrintrm_d(__m128d _1) {
+ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1);
+}
+
+#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \
+ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4)))
+
+#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \
+ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4)))
+
+#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \
+ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4)))
+
+#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \
+ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2,
+ (v4i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2,
+ (v8i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2,
+ (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2,
+ (v4i32)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2,
+ (v8i16)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2,
+ (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2,
+ (v2i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2,
+ (v2i64)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vrotr_b(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vrotr_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vrotr_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vrotr_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vadd_q(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vsub_q(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2);
+}
+
+#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \
+ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2)))
+
+#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \
+ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2)))
+
+#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \
+ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2)))
+
+#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \
+ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmskgez_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vmsknz_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_h_b(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_w_h(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_d_w(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_q_d(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_hu_bu(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_wu_hu(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_du_wu(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vexth_qu_du(__m128i _1) {
+ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1);
+}
+
+#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \
+ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2)))
+
+#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \
+ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2)))
+
+#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \
+ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2)))
+
+#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \
+ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vextl_q_d(__m128i _1) {
+ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1);
+}
+
+#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3)))
+
+#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3)))
+
+#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \
+ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3)))
+
+#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \
+ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3)))
+
+#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \
+ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2)))
+
+#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \
+ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrln_b_h(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrln_h_w(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vssrln_w_d(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vorn_v(__m128i _1, __m128i _2) {
+ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2);
+}
+
+#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) {
+ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vldx(void const *_1, long int _2) {
+ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void
+ __lsx_vstx(__m128i _1, void *_2, long int _3) {
+ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vextl_qu_du(__m128i _1) {
+ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1);
+}
+
+#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1)))
+
+#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1)))
+
+#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1)))
+
+#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1)))
+
+#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1)))
+
+#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1)))
+
+#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1)))
+
+#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1)))
+
+#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1)))
+
+#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1)))
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) {
+ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2);
+}
+
+#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1)))
+
+#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1)))
+
+#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1)))
+
+#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1)))
+
+#endif /* defined(__loongarch_sx) */
+#endif /* _LOONGSON_SXINTRIN_H */
lib/include/mmintrin.h
@@ -22,7 +22,9 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
+ __min_vector_width__(64)))
/// Clears the MMX state by setting the state of the x87 stack registers
/// to empty.
@@ -31,10 +33,10 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
///
/// This intrinsic corresponds to the <c> EMMS </c> instruction.
///
-static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
-_mm_empty(void)
-{
- __builtin_ia32_emms();
+static __inline__ void __attribute__((__always_inline__, __nodebug__,
+ __target__("mmx,no-evex512")))
+_mm_empty(void) {
+ __builtin_ia32_emms();
}
/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
lib/include/module.modulemap
@@ -153,9 +153,162 @@ module _Builtin_intrinsics [system] [extern_c] {
}
}
-module _Builtin_stddef_max_align_t [system] [extern_c] {
- header "__stddef_max_align_t.h"
+// Start -fbuiltin-headers-in-system-modules affected modules
+
+// The following modules all ignore their headers when
+// -fbuiltin-headers-in-system-modules is passed, and many of
+// those headers join system modules when present.
+
+// e.g. if -fbuiltin-headers-in-system-modules is passed, then
+// float.h will not be in the _Builtin_float module (that module
+// will be empty). If there is a system module that declares
+// `header "float.h"`, then the builtin float.h will join
+// that module. The system float.h (if present) will be treated
+// as a textual header in the sytem module.
+module _Builtin_float [system] {
+ header "float.h"
+ export *
+}
+
+module _Builtin_inttypes [system] {
+ header "inttypes.h"
+ export *
+}
+
+module _Builtin_iso646 [system] {
+ header "iso646.h"
+ export *
+}
+
+module _Builtin_limits [system] {
+ header "limits.h"
+ export *
+}
+
+module _Builtin_stdalign [system] {
+ header "stdalign.h"
+ export *
+}
+
+module _Builtin_stdarg [system] {
+ textual header "stdarg.h"
+
+ explicit module __gnuc_va_list {
+ header "__stdarg___gnuc_va_list.h"
+ export *
+ }
+
+ explicit module __va_copy {
+ header "__stdarg___va_copy.h"
+ export *
+ }
+
+ explicit module va_arg {
+ header "__stdarg_va_arg.h"
+ export *
+ }
+
+ explicit module va_copy {
+ header "__stdarg_va_copy.h"
+ export *
+ }
+
+ explicit module va_list {
+ header "__stdarg_va_list.h"
+ export *
+ }
+}
+
+module _Builtin_stdatomic [system] {
+ header "stdatomic.h"
+ export *
+}
+
+module _Builtin_stdbool [system] {
+ header "stdbool.h"
+ export *
+}
+
+module _Builtin_stddef [system] {
+ textual header "stddef.h"
+
+ // __stddef_max_align_t.h is always in this module, even if
+ // -fbuiltin-headers-in-system-modules is passed.
+ explicit module max_align_t {
+ header "__stddef_max_align_t.h"
+ export *
+ }
+
+ explicit module null {
+ header "__stddef_null.h"
+ export *
+ }
+
+ explicit module nullptr_t {
+ header "__stddef_nullptr_t.h"
+ export *
+ }
+
+ explicit module offsetof {
+ header "__stddef_offsetof.h"
+ export *
+ }
+
+ explicit module ptrdiff_t {
+ header "__stddef_ptrdiff_t.h"
+ export *
+ }
+
+ explicit module rsize_t {
+ header "__stddef_rsize_t.h"
+ export *
+ }
+
+ explicit module size_t {
+ header "__stddef_size_t.h"
+ export *
+ }
+
+ explicit module unreachable {
+ header "__stddef_unreachable.h"
+ export *
+ }
+
+ explicit module wchar_t {
+ header "__stddef_wchar_t.h"
+ export *
+ }
+}
+
+// wint_t is provided by <wchar.h> and not <stddef.h>. It's here
+// for compatibility, but must be explicitly requested. Therefore
+// __stddef_wint_t.h is not part of _Builtin_stddef. It is always in
+// this module even if -fbuiltin-headers-in-system-modules is passed.
+module _Builtin_stddef_wint_t [system] {
+ header "__stddef_wint_t.h"
+ export *
+}
+
+module _Builtin_stdint [system] {
+ header "stdint.h"
+ export *
+}
+
+module _Builtin_stdnoreturn [system] {
+ header "stdnoreturn.h"
+ export *
+}
+
+module _Builtin_tgmath [system] {
+ header "tgmath.h"
+ export *
+}
+
+module _Builtin_unwind [system] {
+ header "unwind.h"
+ export *
}
+// End -fbuiltin-headers-in-system-modules affected modules
module opencl_c {
requires opencl
lib/include/opencl-c-base.h
@@ -45,6 +45,7 @@
#define __opencl_c_ext_fp32_local_atomic_add 1
#define __opencl_c_ext_fp32_global_atomic_min_max 1
#define __opencl_c_ext_fp32_local_atomic_min_max 1
+#define __opencl_c_ext_image_raw10_raw12 1
#endif // defined(__SPIR__) || defined(__SPIRV__)
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
@@ -477,6 +478,10 @@ typedef enum memory_order
#if __OPENCL_C_VERSION__ >= CL_VERSION_3_0
#define CLK_UNORM_INT_101010_2 0x10E0
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0
+#ifdef __opencl_c_ext_image_raw10_raw12
+#define CLK_UNSIGNED_INT_RAW10_EXT 0x10E3
+#define CLK_UNSIGNED_INT_RAW12_EXT 0x10E4
+#endif // __opencl_c_ext_image_raw10_raw12
// Channel order, numbering must be aligned with cl_channel_order in cl.h
//
lib/include/pmmintrin.h
@@ -17,8 +17,9 @@
#include <emmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("sse3,no-evex512"), __min_vector_width__(128)))
/// Loads data from an unaligned memory location to elements in a 128-bit
/// vector.
lib/include/riscv_bitmanip.h
@@ -0,0 +1,195 @@
+/*===---- riscv_bitmanip.h - RISC-V Zb* intrinsics --------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __RISCV_BITMANIP_H
+#define __RISCV_BITMANIP_H
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__riscv_zbb)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_orc_b_32(uint32_t __x) {
+ return __builtin_riscv_orc_b_32(__x);
+}
+
+static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
+__riscv_clz_32(uint32_t __x) {
+ return __builtin_riscv_clz_32(__x);
+}
+
+static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
+__riscv_ctz_32(uint32_t __x) {
+ return __builtin_riscv_ctz_32(__x);
+}
+
+static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
+__riscv_cpop_32(uint32_t __x) {
+ return __builtin_popcount(__x);
+}
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_orc_b_64(uint64_t __x) {
+ return __builtin_riscv_orc_b_64(__x);
+}
+
+static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
+__riscv_clz_64(uint64_t __x) {
+ return __builtin_riscv_clz_64(__x);
+}
+
+static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
+__riscv_ctz_64(uint64_t __x) {
+ return __builtin_riscv_ctz_64(__x);
+}
+
+static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
+__riscv_cpop_64(uint64_t __x) {
+ return __builtin_popcountll(__x);
+}
+#endif
+#endif // defined(__riscv_zbb)
+
+#if defined(__riscv_zbb) || defined(__riscv_zbkb)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_rev8_32(uint32_t __x) {
+ return __builtin_bswap32(__x);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_rol_32(uint32_t __x, uint32_t __y) {
+ return __builtin_rotateleft32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ror_32(uint32_t __x, uint32_t __y) {
+ return __builtin_rotateright32(__x, __y);
+}
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_rev8_64(uint64_t __x) {
+ return __builtin_bswap64(__x);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_rol_64(uint64_t __x, uint32_t __y) {
+ return __builtin_rotateleft64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ror_64(uint64_t __x, uint32_t __y) {
+ return __builtin_rotateright64(__x, __y);
+}
+#endif
+#endif // defined(__riscv_zbb) || defined(__riscv_zbkb)
+
+#if defined(__riscv_zbkb)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_brev8_32(uint32_t __x) {
+ return __builtin_riscv_brev8_32(__x);
+}
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_brev8_64(uint64_t __x) {
+ return __builtin_riscv_brev8_64(__x);
+}
+#endif
+
+#if __riscv_xlen == 32
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_unzip_32(uint32_t __x) {
+ return __builtin_riscv_unzip_32(__x);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_zip_32(uint32_t __x) {
+ return __builtin_riscv_zip_32(__x);
+}
+#endif
+#endif // defined(__riscv_zbkb)
+
+#if defined(__riscv_zbc)
+#if __riscv_xlen == 32
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_clmulr_32(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_clmulr_32(__x, __y);
+}
+#endif
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_clmulr_64(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_clmulr_64(__x, __y);
+}
+#endif
+#endif // defined(__riscv_zbc)
+
+#if defined(__riscv_zbkc) || defined(__riscv_zbc)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_clmul_32(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_clmul_32(__x, __y);
+}
+
+#if __riscv_xlen == 32
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_clmulh_32(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_clmulh_32(__x, __y);
+}
+#endif
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_clmul_64(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_clmul_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_clmulh_64(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_clmulh_64(__x, __y);
+}
+#endif
+#endif // defined(__riscv_zbkc) || defined(__riscv_zbc)
+
+#if defined(__riscv_zbkx)
+#if __riscv_xlen == 32
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_xperm4_32(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_xperm4_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_xperm8_32(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_xperm8_32(__x, __y);
+}
+#endif
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_xperm4_64(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_xperm4_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_xperm8_64(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_xperm8_64(__x, __y);
+}
+#endif
+#endif // defined(__riscv_zbkx)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
lib/include/riscv_crypto.h
@@ -0,0 +1,170 @@
+/*===---- riscv_crypto.h - RISC-V Zk* intrinsics ---------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __RISCV_CRYPTO_H
+#define __RISCV_CRYPTO_H
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__riscv_zknd)
+#if __riscv_xlen == 32
+#define __riscv_aes32dsi(x, y, bs) __builtin_riscv_aes32dsi(x, y, bs)
+#define __riscv_aes32dsmi(x, y, bs) __builtin_riscv_aes32dsmi(x, y, bs)
+#endif
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aes64ds(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_aes64ds(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aes64dsm(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_aes64dsm(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aes64im(uint64_t __x) {
+ return __builtin_riscv_aes64im(__x);
+}
+#endif
+#endif // defined(__riscv_zknd)
+
+#if defined(__riscv_zkne)
+#if __riscv_xlen == 32
+#define __riscv_aes32esi(x, y, bs) __builtin_riscv_aes32esi(x, y, bs)
+#define __riscv_aes32esmi(x, y, bs) __builtin_riscv_aes32esmi(x, y, bs)
+#endif
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aes64es(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_aes64es(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aes64esm(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_aes64esm(__x, __y);
+}
+#endif
+#endif // defined(__riscv_zkne)
+
+#if defined(__riscv_zknd) || defined(__riscv_zkne)
+#if __riscv_xlen == 64
+#define __riscv_aes64ks1i(x, rnum) __builtin_riscv_aes64ks1i(x, rnum)
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aes64ks2(uint64_t __x, uint64_t __y) {
+ return __builtin_riscv_aes64ks2(__x, __y);
+}
+#endif
+#endif // defined(__riscv_zknd) || defined(__riscv_zkne)
+
+#if defined(__riscv_zknh)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha256sig0(uint32_t __x) {
+ return __builtin_riscv_sha256sig0(__x);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha256sig1(uint32_t __x) {
+ return __builtin_riscv_sha256sig1(__x);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha256sum0(uint32_t __x) {
+ return __builtin_riscv_sha256sum0(__x);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha256sum1(uint32_t __x) {
+ return __builtin_riscv_sha256sum1(__x);
+}
+
+#if __riscv_xlen == 32
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sig0h(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_sha512sig0h(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sig0l(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_sha512sig0l(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sig1h(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_sha512sig1h(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sig1l(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_sha512sig1l(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sum0r(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_sha512sum0r(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sum1r(uint32_t __x, uint32_t __y) {
+ return __builtin_riscv_sha512sum1r(__x, __y);
+}
+#endif
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sig0(uint64_t __x) {
+ return __builtin_riscv_sha512sig0(__x);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sig1(uint64_t __x) {
+ return __builtin_riscv_sha512sig1(__x);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sum0(uint64_t __x) {
+ return __builtin_riscv_sha512sum0(__x);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sha512sum1(uint64_t __x) {
+ return __builtin_riscv_sha512sum1(__x);
+}
+#endif
+#endif // defined(__riscv_zknh)
+
+#if defined(__riscv_zksh)
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sm3p0(uint32_t __x) {
+ return __builtin_riscv_sm3p0(__x);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sm3p1(uint32_t __x) {
+ return __builtin_riscv_sm3p1(__x);
+}
+#endif // defined(__riscv_zksh)
+
+#if defined(__riscv_zksed)
+#define __riscv_sm4ed(x, y, bs) __builtin_riscv_sm4ed(x, y, bs);
+#define __riscv_sm4ks(x, y, bs) __builtin_riscv_sm4ks(x, y, bs);
+#endif // defined(__riscv_zksed)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
lib/include/riscv_ntlh.h
@@ -21,8 +21,6 @@ enum {
__RISCV_NTLH_ALL
};
-#define __riscv_ntl_load(PTR, DOMAIN) __builtin_riscv_ntl_load((PTR), (DOMAIN))
-#define __riscv_ntl_store(PTR, VAL, DOMAIN) \
- __builtin_riscv_ntl_store((PTR), (VAL), (DOMAIN))
-
-#endif
\ No newline at end of file
+#define __riscv_ntl_load __builtin_riscv_ntl_load
+#define __riscv_ntl_store __builtin_riscv_ntl_store
+#endif
lib/include/riscv_vector.h
@@ -392,6 +392,37 @@ typedef __rvv_float64m2x4_t vfloat64m2x4_t;
typedef __rvv_float64m4_t vfloat64m4_t;
typedef __rvv_float64m4x2_t vfloat64m4x2_t;
typedef __rvv_float64m8_t vfloat64m8_t;
+typedef __rvv_bfloat16mf4_t vbfloat16mf4_t;
+typedef __rvv_bfloat16mf4x2_t vbfloat16mf4x2_t;
+typedef __rvv_bfloat16mf4x3_t vbfloat16mf4x3_t;
+typedef __rvv_bfloat16mf4x4_t vbfloat16mf4x4_t;
+typedef __rvv_bfloat16mf4x5_t vbfloat16mf4x5_t;
+typedef __rvv_bfloat16mf4x6_t vbfloat16mf4x6_t;
+typedef __rvv_bfloat16mf4x7_t vbfloat16mf4x7_t;
+typedef __rvv_bfloat16mf4x8_t vbfloat16mf4x8_t;
+typedef __rvv_bfloat16mf2_t vbfloat16mf2_t;
+typedef __rvv_bfloat16mf2x2_t vbfloat16mf2x2_t;
+typedef __rvv_bfloat16mf2x3_t vbfloat16mf2x3_t;
+typedef __rvv_bfloat16mf2x4_t vbfloat16mf2x4_t;
+typedef __rvv_bfloat16mf2x5_t vbfloat16mf2x5_t;
+typedef __rvv_bfloat16mf2x6_t vbfloat16mf2x6_t;
+typedef __rvv_bfloat16mf2x7_t vbfloat16mf2x7_t;
+typedef __rvv_bfloat16mf2x8_t vbfloat16mf2x8_t;
+typedef __rvv_bfloat16m1_t vbfloat16m1_t;
+typedef __rvv_bfloat16m1x2_t vbfloat16m1x2_t;
+typedef __rvv_bfloat16m1x3_t vbfloat16m1x3_t;
+typedef __rvv_bfloat16m1x4_t vbfloat16m1x4_t;
+typedef __rvv_bfloat16m1x5_t vbfloat16m1x5_t;
+typedef __rvv_bfloat16m1x6_t vbfloat16m1x6_t;
+typedef __rvv_bfloat16m1x7_t vbfloat16m1x7_t;
+typedef __rvv_bfloat16m1x8_t vbfloat16m1x8_t;
+typedef __rvv_bfloat16m2_t vbfloat16m2_t;
+typedef __rvv_bfloat16m2x2_t vbfloat16m2x2_t;
+typedef __rvv_bfloat16m2x3_t vbfloat16m2x3_t;
+typedef __rvv_bfloat16m2x4_t vbfloat16m2x4_t;
+typedef __rvv_bfloat16m4_t vbfloat16m4_t;
+typedef __rvv_bfloat16m4x2_t vbfloat16m4x2_t;
+typedef __rvv_bfloat16m8_t vbfloat16m8_t;
#define __riscv_v_intrinsic_overloading 1
#ifdef __cplusplus
lib/include/smmintrin.h
@@ -18,8 +18,8 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
- __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), \
- __min_vector_width__(128)))
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("sse4.1,no-evex512"), __min_vector_width__(128)))
/* SSE4 Rounding macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
@@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
/// \returns A 128-bit integer vector containing the data stored at the
/// specified memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_stream_load_si128(__m128i const *__V) {
+_mm_stream_load_si128(const void *__V) {
return (__m128i)__builtin_nontemporal_load((const __v2di *)__V);
}
lib/include/stdalign.h
@@ -10,10 +10,8 @@
#ifndef __STDALIGN_H
#define __STDALIGN_H
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
#if defined(__cplusplus) || \
- (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L)
+ (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L)
#ifndef __cplusplus
#define alignas _Alignas
#define alignof _Alignof
lib/include/stdarg.h
@@ -7,45 +7,73 @@
*===-----------------------------------------------------------------------===
*/
-#ifndef __STDARG_H
-
-#ifndef __GNUC_VA_LIST
-#define __GNUC_VA_LIST
-typedef __builtin_va_list __gnuc_va_list;
-#endif
+/*
+ * This header is designed to be included multiple times. If any of the __need_
+ * macros are defined, then only that subset of interfaces are provided. This
+ * can be useful for POSIX headers that need to not expose all of stdarg.h, but
+ * need to use some of its interfaces. Otherwise this header provides all of
+ * the expected interfaces.
+ *
+ * When clang modules are enabled, this header is a textual header. It ignores
+ * its header guard so that multiple submodules can export its interfaces.
+ * Take module SM with submodules A and B, whose headers both include stdarg.h
+ * When SM.A builds, __STDARG_H will be defined. When SM.B builds, the
+ * definition from SM.A will leak when building without local submodule
+ * visibility. stdarg.h wouldn't include any of its implementation headers, and
+ * SM.B wouldn't import any of the stdarg modules, and SM.B's `export *`
+ * wouldn't export any stdarg interfaces as expected. However, since stdarg.h
+ * ignores its header guard when building with modules, it all works as
+ * expected.
+ *
+ * When clang modules are not enabled, the header guards can function in the
+ * normal simple fashion.
+ */
+#if !defined(__STDARG_H) || __has_feature(modules) || \
+ defined(__need___va_list) || defined(__need_va_list) || \
+ defined(__need_va_arg) || defined(__need___va_copy) || \
+ defined(__need_va_copy)
-#ifdef __need___va_list
-#undef __need___va_list
-#else
+#if !defined(__need___va_list) && !defined(__need_va_list) && \
+ !defined(__need_va_arg) && !defined(__need___va_copy) && \
+ !defined(__need_va_copy)
#define __STDARG_H
-#ifndef _VA_LIST
-typedef __builtin_va_list va_list;
-#define _VA_LIST
-#endif
-
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
-/* C2x does not require the second parameter for va_start. */
-#define va_start(ap, ...) __builtin_va_start(ap, 0)
-#else
-/* Versions before C2x do require the second parameter. */
-#define va_start(ap, param) __builtin_va_start(ap, param)
-#endif
-#define va_end(ap) __builtin_va_end(ap)
-#define va_arg(ap, type) __builtin_va_arg(ap, type)
-
+#define __need___va_list
+#define __need_va_list
+#define __need_va_arg
+#define __need___va_copy
/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode
* or -ansi is not specified, since it was not part of C90.
*/
-#define __va_copy(d,s) __builtin_va_copy(d,s)
-
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \
!defined(__STRICT_ANSI__)
-#define va_copy(dest, src) __builtin_va_copy(dest, src)
+#define __need_va_copy
+#endif
#endif
-#endif /* __STDARG_H */
+#ifdef __need___va_list
+#include <__stdarg___gnuc_va_list.h>
+#undef __need___va_list
+#endif /* defined(__need___va_list) */
+
+#ifdef __need_va_list
+#include <__stdarg_va_list.h>
+#undef __need_va_list
+#endif /* defined(__need_va_list) */
+
+#ifdef __need_va_arg
+#include <__stdarg_va_arg.h>
+#undef __need_va_arg
+#endif /* defined(__need_va_arg) */
+
+#ifdef __need___va_copy
+#include <__stdarg___va_copy.h>
+#undef __need___va_copy
+#endif /* defined(__need___va_copy) */
-#endif /* not __STDARG_H */
+#ifdef __need_va_copy
+#include <__stdarg_va_copy.h>
+#undef __need_va_copy
+#endif /* defined(__need_va_copy) */
+
+#endif
lib/include/stdatomic.h
@@ -45,16 +45,14 @@ extern "C" {
#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE
/* 7.17.2 Initialization */
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L) || \
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L) || \
defined(__cplusplus)
-/* ATOMIC_VAR_INIT was removed in C2x, but still remains in C++23. */
+/* ATOMIC_VAR_INIT was removed in C23, but still remains in C++23. */
#define ATOMIC_VAR_INIT(value) (value)
#endif
#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L && \
- __STDC_VERSION__ < 202000L) || \
+ __STDC_VERSION__ < 202311L) || \
(defined(__cplusplus) && __cplusplus >= 202002L)) && \
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
/* ATOMIC_VAR_INIT was deprecated in C17 and C++20. */
lib/include/stdckdint.h
@@ -0,0 +1,42 @@
+/*===---- stdckdint.h - Standard header for checking integer----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDCKDINT_H
+#define __STDCKDINT_H
+
+/* If we're hosted, fall back to the system's stdckdint.h. FreeBSD, for
+ * example, already has a Clang-compatible stdckdint.h header.
+ *
+ * The `stdckdint.h` header requires C 23 or newer.
+ */
+#if __STDC_HOSTED__ && __has_include_next(<stdckdint.h>)
+#include_next <stdckdint.h>
+#else
+
+/* C23 7.20.1 Defines several macros for performing checked integer arithmetic*/
+
+#define __STDC_VERSION_STDCKDINT_H__ 202311L
+
+// Both A and B shall be any integer type other than "plain" char, bool, a bit-
+// precise integer type, or an enumerated type, and they need not be the same.
+
+// R shall be a modifiable lvalue of any integer type other than "plain" char,
+// bool, a bit-precise integer type, or an enumerated type. It shouldn't be
+// short type, either. Otherwise, it may be unable to hold two the result of
+// operating two 'int's.
+
+// A diagnostic message will be produced if A or B are not suitable integer
+// types, or if R is not a modifiable lvalue of a suitable integer type or R
+// is short type.
+#define ckd_add(R, A, B) __builtin_add_overflow((A), (B), (R))
+#define ckd_sub(R, A, B) __builtin_sub_overflow((A), (B), (R))
+#define ckd_mul(R, A, B) __builtin_mul_overflow((A), (B), (R))
+
+#endif /* __STDC_HOSTED__ */
+#endif /* __STDCKDINT_H */
lib/include/stddef.h
@@ -7,126 +7,116 @@
*===-----------------------------------------------------------------------===
*/
-#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \
- defined(__need_size_t) || defined(__need_wchar_t) || \
- defined(__need_NULL) || defined(__need_wint_t)
+/*
+ * This header is designed to be included multiple times. If any of the __need_
+ * macros are defined, then only that subset of interfaces are provided. This
+ * can be useful for POSIX headers that need to not expose all of stddef.h, but
+ * need to use some of its interfaces. Otherwise this header provides all of
+ * the expected interfaces.
+ *
+ * When clang modules are enabled, this header is a textual header. It ignores
+ * its header guard so that multiple submodules can export its interfaces.
+ * Take module SM with submodules A and B, whose headers both include stddef.h
+ * When SM.A builds, __STDDEF_H will be defined. When SM.B builds, the
+ * definition from SM.A will leak when building without local submodule
+ * visibility. stddef.h wouldn't include any of its implementation headers, and
+ * SM.B wouldn't import any of the stddef modules, and SM.B's `export *`
+ * wouldn't export any stddef interfaces as expected. However, since stddef.h
+ * ignores its header guard when building with modules, it all works as
+ * expected.
+ *
+ * When clang modules are not enabled, the header guards can function in the
+ * normal simple fashion.
+ */
+#if !defined(__STDDEF_H) || __has_feature(modules) || \
+ (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1) || \
+ defined(__need_ptrdiff_t) || defined(__need_size_t) || \
+ defined(__need_rsize_t) || defined(__need_wchar_t) || \
+ defined(__need_NULL) || defined(__need_nullptr_t) || \
+ defined(__need_unreachable) || defined(__need_max_align_t) || \
+ defined(__need_offsetof) || defined(__need_wint_t)
#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \
- !defined(__need_wchar_t) && !defined(__need_NULL) && \
- !defined(__need_wint_t)
-/* Always define miscellaneous pieces when modules are available. */
-#if !__has_feature(modules)
+ !defined(__need_rsize_t) && !defined(__need_wchar_t) && \
+ !defined(__need_NULL) && !defined(__need_nullptr_t) && \
+ !defined(__need_unreachable) && !defined(__need_max_align_t) && \
+ !defined(__need_offsetof) && !defined(__need_wint_t)
#define __STDDEF_H
-#endif
#define __need_ptrdiff_t
#define __need_size_t
+/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
+ * enabled. */
+#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
+#define __need_rsize_t
+#endif
#define __need_wchar_t
#define __need_NULL
-#define __need_STDDEF_H_misc
-/* __need_wint_t is intentionally not defined here. */
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
+ defined(__cplusplus)
+#define __need_nullptr_t
#endif
-
-#if defined(__need_ptrdiff_t)
-#if !defined(_PTRDIFF_T) || __has_feature(modules)
-/* Always define ptrdiff_t when modules are available. */
-#if !__has_feature(modules)
-#define _PTRDIFF_T
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#define __need_unreachable
#endif
-typedef __PTRDIFF_TYPE__ ptrdiff_t;
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
+ (defined(__cplusplus) && __cplusplus >= 201103L)
+#define __need_max_align_t
#endif
+#define __need_offsetof
+/* wint_t is provided by <wchar.h> and not <stddef.h>. It's here
+ * for compatibility, but must be explicitly requested. Therefore
+ * __need_wint_t is intentionally not defined here. */
+#endif
+
+#if defined(__need_ptrdiff_t)
+#include <__stddef_ptrdiff_t.h>
#undef __need_ptrdiff_t
#endif /* defined(__need_ptrdiff_t) */
#if defined(__need_size_t)
-#if !defined(_SIZE_T) || __has_feature(modules)
-/* Always define size_t when modules are available. */
-#if !__has_feature(modules)
-#define _SIZE_T
-#endif
-typedef __SIZE_TYPE__ size_t;
-#endif
+#include <__stddef_size_t.h>
#undef __need_size_t
#endif /*defined(__need_size_t) */
-#if defined(__need_STDDEF_H_misc)
-/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
- * enabled. */
-#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \
- !defined(_RSIZE_T)) || __has_feature(modules)
-/* Always define rsize_t when modules are available. */
-#if !__has_feature(modules)
-#define _RSIZE_T
-#endif
-typedef __SIZE_TYPE__ rsize_t;
-#endif
-#endif /* defined(__need_STDDEF_H_misc) */
+#if defined(__need_rsize_t)
+#include <__stddef_rsize_t.h>
+#undef __need_rsize_t
+#endif /* defined(__need_rsize_t) */
#if defined(__need_wchar_t)
-#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
-/* Always define wchar_t when modules are available. */
-#if !defined(_WCHAR_T) || __has_feature(modules)
-#if !__has_feature(modules)
-#define _WCHAR_T
-#if defined(_MSC_EXTENSIONS)
-#define _WCHAR_T_DEFINED
-#endif
-#endif
-typedef __WCHAR_TYPE__ wchar_t;
-#endif
-#endif
+#include <__stddef_wchar_t.h>
#undef __need_wchar_t
#endif /* defined(__need_wchar_t) */
#if defined(__need_NULL)
-#undef NULL
-#ifdef __cplusplus
-# if !defined(__MINGW32__) && !defined(_MSC_VER)
-# define NULL __null
-# else
-# define NULL 0
-# endif
-#else
-# define NULL ((void*)0)
-#endif
-#ifdef __cplusplus
-#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
-namespace std { typedef decltype(nullptr) nullptr_t; }
-using ::std::nullptr_t;
-#endif
-#endif
+#include <__stddef_null.h>
#undef __need_NULL
#endif /* defined(__need_NULL) */
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
-typedef typeof(nullptr) nullptr_t;
-#endif /* defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L */
+#if defined(__need_nullptr_t)
+#include <__stddef_nullptr_t.h>
+#undef __need_nullptr_t
+#endif /* defined(__need_nullptr_t) */
-#if defined(__need_STDDEF_H_misc) && defined(__STDC_VERSION__) && \
- __STDC_VERSION__ >= 202000L
-#define unreachable() __builtin_unreachable()
-#endif /* defined(__need_STDDEF_H_misc) && >= C23 */
+#if defined(__need_unreachable)
+#include <__stddef_unreachable.h>
+#undef __need_unreachable
+#endif /* defined(__need_unreachable) */
-#if defined(__need_STDDEF_H_misc)
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
- (defined(__cplusplus) && __cplusplus >= 201103L)
-#include "__stddef_max_align_t.h"
-#endif
-#define offsetof(t, d) __builtin_offsetof(t, d)
-#undef __need_STDDEF_H_misc
-#endif /* defined(__need_STDDEF_H_misc) */
+#if defined(__need_max_align_t)
+#include <__stddef_max_align_t.h>
+#undef __need_max_align_t
+#endif /* defined(__need_max_align_t) */
+
+#if defined(__need_offsetof)
+#include <__stddef_offsetof.h>
+#undef __need_offsetof
+#endif /* defined(__need_offsetof) */
/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
#if defined(__need_wint_t)
-/* Always define wint_t when modules are available. */
-#if !defined(_WINT_T) || __has_feature(modules)
-#if !__has_feature(modules)
-#define _WINT_T
-#endif
-typedef __WINT_TYPE__ wint_t;
-#endif
+#include <__stddef_wint_t.h>
#undef __need_wint_t
#endif /* __need_wint_t */
lib/include/stdint.h
@@ -499,9 +499,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT64_MAX INT64_C( 9223372036854775807)
# define INT64_MIN (-INT64_C( 9223372036854775807)-1)
# define UINT64_MAX UINT64_C(18446744073709551615)
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT64_WIDTH 64
# define INT64_WIDTH UINT64_WIDTH
@@ -545,9 +544,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST64_MAX __INT_LEAST64_MAX
# define UINT_FAST64_MAX __UINT_LEAST64_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST64_WIDTH __UINT_LEAST64_WIDTH
# define INT_LEAST64_WIDTH UINT_LEAST64_WIDTH
# define UINT_FAST64_WIDTH __UINT_LEAST64_WIDTH
@@ -586,9 +583,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT56_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT56_WIDTH 56
# define INT56_WIDTH UINT56_WIDTH
# define UINT_LEAST56_WIDTH UINT56_WIDTH
@@ -635,9 +630,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT48_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define UINT48_WIDTH 48
#define INT48_WIDTH UINT48_WIDTH
#define UINT_LEAST48_WIDTH UINT48_WIDTH
@@ -684,9 +677,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT40_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT40_WIDTH 40
# define INT40_WIDTH UINT40_WIDTH
# define UINT_LEAST40_WIDTH UINT40_WIDTH
@@ -727,9 +718,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT32_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT32_WIDTH 32
# define INT32_WIDTH UINT32_WIDTH
# undef __UINT_LEAST32_WIDTH
@@ -749,9 +738,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST32_MAX __INT_LEAST32_MAX
# define UINT_FAST32_MAX __UINT_LEAST32_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST32_WIDTH __UINT_LEAST32_WIDTH
# define INT_LEAST32_WIDTH UINT_LEAST32_WIDTH
# define UINT_FAST32_WIDTH __UINT_LEAST32_WIDTH
@@ -784,9 +771,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT24_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT24_WIDTH 24
# define INT24_WIDTH UINT24_WIDTH
# define UINT_LEAST24_WIDTH UINT24_WIDTH
@@ -819,9 +804,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT16_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT16_WIDTH 16
# define INT16_WIDTH UINT16_WIDTH
# undef __UINT_LEAST16_WIDTH
@@ -839,9 +822,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST16_MAX __INT_LEAST16_MAX
# define UINT_FAST16_MAX __UINT_LEAST16_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST16_WIDTH __UINT_LEAST16_WIDTH
# define INT_LEAST16_WIDTH UINT_LEAST16_WIDTH
# define UINT_FAST16_WIDTH __UINT_LEAST16_WIDTH
@@ -862,9 +843,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT8_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT8_WIDTH 8
# define INT8_WIDTH UINT8_WIDTH
# undef __UINT_LEAST8_WIDTH
@@ -880,9 +859,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST8_MAX __INT_LEAST8_MAX
# define UINT_FAST8_MAX __UINT_LEAST8_MAX
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST8_WIDTH __UINT_LEAST8_WIDTH
# define INT_LEAST8_WIDTH UINT_LEAST8_WIDTH
# define UINT_FAST8_WIDTH __UINT_LEAST8_WIDTH
@@ -907,10 +884,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#define PTRDIFF_MAX __PTRDIFF_MAX__
#define SIZE_MAX __SIZE_MAX__
-/* C2x 7.20.2.4 Width of integer types capable of holding object pointers. */
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+/* C23 7.22.2.4 Width of integer types capable of holding object pointers. */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
/* NB: The C standard requires that these be the same value, but the compiler
exposes separate internal width macros. */
#define INTPTR_WIDTH __INTPTR_WIDTH__
@@ -928,10 +903,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#define INTMAX_MAX __INTMAX_MAX__
#define UINTMAX_MAX __UINTMAX_MAX__
-/* C2x 7.20.2.5 Width of greatest-width integer types. */
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+/* C23 7.22.2.5 Width of greatest-width integer types. */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
/* NB: The C standard requires that these be the same value, but the compiler
exposes separate internal width macros. */
#define INTMAX_WIDTH __INTMAX_WIDTH__
@@ -964,10 +937,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)
-/* C2x 7.20.3.x Width of other integer types. */
-/* FIXME: This is using the placeholder dates Clang produces for these macros
- in C2x mode; switch to the correct values once they've been published. */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
+/* C23 7.22.3.x Width of other integer types. */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define PTRDIFF_WIDTH __PTRDIFF_WIDTH__
#define SIG_ATOMIC_WIDTH __SIG_ATOMIC_WIDTH__
#define SIZE_WIDTH __SIZE_WIDTH__
lib/include/stdnoreturn.h
@@ -15,8 +15,8 @@
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
-/* The noreturn macro is deprecated in C2x. We do not mark it as such because
- including the header file in C2x is also deprecated and we do not want to
+/* The noreturn macro is deprecated in C23. We do not mark it as such because
+ including the header file in C23 is also deprecated and we do not want to
issue a confusing diagnostic for code which includes <stdnoreturn.h>
followed by code that writes [[noreturn]]. The issue with such code is not
with the attribute, or the use of 'noreturn', but the inclusion of the
lib/include/tmmintrin.h
@@ -17,8 +17,13 @@
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
-#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("ssse3,no-evex512"), __min_vector_width__(64)))
+#define __DEFAULT_FN_ATTRS_MMX \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("mmx,ssse3,no-evex512"), \
+ __min_vector_width__(64)))
/// Computes the absolute value of each of the packed 8-bit signed
/// integers in the source operand and stores the 8-bit unsigned integer
lib/include/usermsrintrin.h
@@ -0,0 +1,51 @@
+/*===--------------- usermsrintrin.h - USERMSR intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __X86GPRINTRIN_H
+#error "Never use <usermsrintrin.h> directly; include <x86gprintrin.h> instead."
+#endif // __X86GPRINTRIN_H
+
+#ifndef __USERMSRINTRIN_H
+#define __USERMSRINTRIN_H
+#ifdef __x86_64__
+
+/// Reads the contents of a 64-bit MSR specified in \a __A into \a dst.
+///
+/// This intrinsic corresponds to the <c> URDMSR </c> instruction.
+/// \param __A
+/// An unsigned long long.
+///
+/// \code{.operation}
+/// DEST := MSR[__A]
+/// \endcode
+static __inline__ unsigned long long
+ __attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
+ _urdmsr(unsigned long long __A) {
+ return __builtin_ia32_urdmsr(__A);
+}
+
+/// Writes the contents of \a __B into the 64-bit MSR specified in \a __A.
+///
+/// This intrinsic corresponds to the <c> UWRMSR </c> instruction.
+///
+/// \param __A
+/// An unsigned long long.
+/// \param __B
+/// An unsigned long long.
+///
+/// \code{.operation}
+/// MSR[__A] := __B
+/// \endcode
+static __inline__ void
+ __attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
+ _uwrmsr(unsigned long long __A, unsigned long long __B) {
+ return __builtin_ia32_uwrmsr(__A, __B);
+}
+
+#endif // __x86_64__
+#endif // __USERMSRINTRIN_H
lib/include/vaesintrin.h
@@ -18,8 +18,10 @@
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256)))
/* Default attributes for ZMM forms. */
-#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512)))
-
+#define __DEFAULT_FN_ATTRS_F \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512f,evex512,vaes"), \
+ __min_vector_width__(512)))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
lib/include/vecintrin.h
@@ -1543,7 +1543,7 @@ vec_load_len(const double *__ptr, unsigned int __len) {
#if __ARCH__ >= 12
static inline __ATTRS_ai __vector unsigned char
vec_load_len_r(const unsigned char *__ptr, unsigned int __len) {
- return (__vector unsigned char)__builtin_s390_vlrl(__len, __ptr);
+ return (__vector unsigned char)__builtin_s390_vlrlr(__len, __ptr);
}
#endif
@@ -1617,7 +1617,7 @@ vec_store_len(__vector double __vec, double *__ptr,
static inline __ATTRS_ai void
vec_store_len_r(__vector unsigned char __vec, unsigned char *__ptr,
unsigned int __len) {
- __builtin_s390_vstrl((__vector signed char)__vec, __len, __ptr);
+ __builtin_s390_vstrlr((__vector signed char)__vec, __len, __ptr);
}
#endif
@@ -2689,7 +2689,8 @@ vec_cmplt(__vector double __a, __vector double __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 0;
}
@@ -2697,7 +2698,8 @@ vec_all_eq(__vector signed char __a, __vector signed char __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 0;
}
@@ -2705,15 +2707,15 @@ vec_all_eq(__vector signed char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs(__a, __b, &__cc);
return __cc == 0;
}
@@ -2721,8 +2723,7 @@ vec_all_eq(__vector unsigned char __a, __vector unsigned char __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc);
return __cc == 0;
}
@@ -2730,23 +2731,23 @@ vec_all_eq(__vector unsigned char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 0;
}
@@ -2754,7 +2755,8 @@ vec_all_eq(__vector signed short __a, __vector signed short __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 0;
}
@@ -2762,15 +2764,15 @@ vec_all_eq(__vector signed short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, __b, &__cc);
return __cc == 0;
}
@@ -2778,8 +2780,7 @@ vec_all_eq(__vector unsigned short __a, __vector unsigned short __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc);
return __cc == 0;
}
@@ -2787,23 +2788,23 @@ vec_all_eq(__vector unsigned short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 0;
}
@@ -2811,7 +2812,8 @@ vec_all_eq(__vector signed int __a, __vector signed int __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 0;
}
@@ -2819,15 +2821,15 @@ vec_all_eq(__vector signed int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, __b, &__cc);
return __cc == 0;
}
@@ -2835,8 +2837,7 @@ vec_all_eq(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc);
return __cc == 0;
}
@@ -2844,23 +2845,23 @@ vec_all_eq(__vector unsigned int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 0;
}
@@ -2868,7 +2869,8 @@ vec_all_eq(__vector signed long long __a, __vector signed long long __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector signed long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 0;
}
@@ -2876,15 +2878,15 @@ vec_all_eq(__vector signed long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, __b, &__cc);
return __cc == 0;
}
@@ -2892,8 +2894,7 @@ vec_all_eq(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector unsigned long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc);
return __cc == 0;
}
@@ -2901,16 +2902,15 @@ vec_all_eq(__vector unsigned long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc);
return __cc == 0;
}
static inline __ATTRS_o_ai int
vec_all_eq(__vector __bool long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 0;
}
@@ -2935,7 +2935,8 @@ vec_all_eq(__vector double __a, __vector double __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 3;
}
@@ -2943,7 +2944,8 @@ vec_all_ne(__vector signed char __a, __vector signed char __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 3;
}
@@ -2951,15 +2953,16 @@ vec_all_ne(__vector signed char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 3;
}
@@ -2967,8 +2970,7 @@ vec_all_ne(__vector unsigned char __a, __vector unsigned char __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc);
return __cc == 3;
}
@@ -2976,23 +2978,23 @@ vec_all_ne(__vector unsigned char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 3;
}
@@ -3000,7 +3002,8 @@ vec_all_ne(__vector signed short __a, __vector signed short __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 3;
}
@@ -3008,15 +3011,15 @@ vec_all_ne(__vector signed short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, __b, &__cc);
return __cc == 3;
}
@@ -3024,8 +3027,7 @@ vec_all_ne(__vector unsigned short __a, __vector unsigned short __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc);
return __cc == 3;
}
@@ -3033,23 +3035,23 @@ vec_all_ne(__vector unsigned short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 3;
}
@@ -3057,7 +3059,8 @@ vec_all_ne(__vector signed int __a, __vector signed int __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 3;
}
@@ -3065,15 +3068,15 @@ vec_all_ne(__vector signed int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, __b, &__cc);
return __cc == 3;
}
@@ -3081,8 +3084,7 @@ vec_all_ne(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc);
return __cc == 3;
}
@@ -3090,23 +3092,23 @@ vec_all_ne(__vector unsigned int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 3;
}
@@ -3114,7 +3116,8 @@ vec_all_ne(__vector signed long long __a, __vector signed long long __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector signed long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 3;
}
@@ -3122,15 +3125,15 @@ vec_all_ne(__vector signed long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, __b, &__cc);
return __cc == 3;
}
@@ -3138,8 +3141,7 @@ vec_all_ne(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector unsigned long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc);
return __cc == 3;
}
@@ -3147,16 +3149,15 @@ vec_all_ne(__vector unsigned long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc);
return __cc == 3;
}
static inline __ATTRS_o_ai int
vec_all_ne(__vector __bool long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc == 3;
}
@@ -4241,7 +4242,8 @@ vec_all_numeric(__vector double __a) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc <= 1;
}
@@ -4249,7 +4251,8 @@ vec_any_eq(__vector signed char __a, __vector signed char __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc <= 1;
}
@@ -4257,15 +4260,15 @@ vec_any_eq(__vector signed char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs(__a, __b, &__cc);
return __cc <= 1;
}
@@ -4273,8 +4276,7 @@ vec_any_eq(__vector unsigned char __a, __vector unsigned char __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc);
return __cc <= 1;
}
@@ -4282,23 +4284,23 @@ vec_any_eq(__vector unsigned char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc <= 1;
}
@@ -4306,7 +4308,8 @@ vec_any_eq(__vector signed short __a, __vector signed short __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc <= 1;
}
@@ -4314,15 +4317,15 @@ vec_any_eq(__vector signed short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, __b, &__cc);
return __cc <= 1;
}
@@ -4330,8 +4333,7 @@ vec_any_eq(__vector unsigned short __a, __vector unsigned short __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc);
return __cc <= 1;
}
@@ -4339,23 +4341,23 @@ vec_any_eq(__vector unsigned short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc <= 1;
}
@@ -4363,7 +4365,8 @@ vec_any_eq(__vector signed int __a, __vector signed int __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc <= 1;
}
@@ -4371,15 +4374,15 @@ vec_any_eq(__vector signed int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, __b, &__cc);
return __cc <= 1;
}
@@ -4387,8 +4390,7 @@ vec_any_eq(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc);
return __cc <= 1;
}
@@ -4396,23 +4398,23 @@ vec_any_eq(__vector unsigned int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc <= 1;
}
@@ -4420,7 +4422,8 @@ vec_any_eq(__vector signed long long __a, __vector signed long long __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector signed long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc <= 1;
}
@@ -4428,15 +4431,15 @@ vec_any_eq(__vector signed long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, __b, &__cc);
return __cc <= 1;
}
@@ -4444,8 +4447,7 @@ vec_any_eq(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector unsigned long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc);
return __cc <= 1;
}
@@ -4453,16 +4455,15 @@ vec_any_eq(__vector unsigned long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc);
return __cc <= 1;
}
static inline __ATTRS_o_ai int
vec_any_eq(__vector __bool long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc <= 1;
}
@@ -4487,7 +4488,8 @@ vec_any_eq(__vector double __a, __vector double __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc != 0;
}
@@ -4495,7 +4497,8 @@ vec_any_ne(__vector signed char __a, __vector signed char __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc != 0;
}
@@ -4503,15 +4506,15 @@ vec_any_ne(__vector signed char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool char __a, __vector signed char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs(__a, __b, &__cc);
return __cc != 0;
}
@@ -4519,8 +4522,7 @@ vec_any_ne(__vector unsigned char __a, __vector unsigned char __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc);
return __cc != 0;
}
@@ -4528,23 +4530,23 @@ vec_any_ne(__vector unsigned char __a, __vector __bool char __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool char __a, __vector unsigned char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool char __a, __vector __bool char __b) {
int __cc;
- __builtin_s390_vceqbs((__vector signed char)__a,
- (__vector signed char)__b, &__cc);
+ __builtin_s390_vceqbs((__vector unsigned char)__a,
+ (__vector unsigned char)__b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc != 0;
}
@@ -4552,7 +4554,8 @@ vec_any_ne(__vector signed short __a, __vector signed short __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc != 0;
}
@@ -4560,15 +4563,15 @@ vec_any_ne(__vector signed short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool short __a, __vector signed short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, __b, &__cc);
return __cc != 0;
}
@@ -4576,8 +4579,7 @@ vec_any_ne(__vector unsigned short __a, __vector unsigned short __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc);
return __cc != 0;
}
@@ -4585,23 +4587,23 @@ vec_any_ne(__vector unsigned short __a, __vector __bool short __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool short __a, __vector unsigned short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool short __a, __vector __bool short __b) {
int __cc;
- __builtin_s390_vceqhs((__vector signed short)__a,
- (__vector signed short)__b, &__cc);
+ __builtin_s390_vceqhs((__vector unsigned short)__a,
+ (__vector unsigned short)__b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc != 0;
}
@@ -4609,7 +4611,8 @@ vec_any_ne(__vector signed int __a, __vector signed int __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc != 0;
}
@@ -4617,15 +4620,15 @@ vec_any_ne(__vector signed int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool int __a, __vector signed int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, __b, &__cc);
return __cc != 0;
}
@@ -4633,8 +4636,7 @@ vec_any_ne(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc);
return __cc != 0;
}
@@ -4642,23 +4644,23 @@ vec_any_ne(__vector unsigned int __a, __vector __bool int __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool int __a, __vector unsigned int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool int __a, __vector __bool int __b) {
int __cc;
- __builtin_s390_vceqfs((__vector signed int)__a,
- (__vector signed int)__b, &__cc);
+ __builtin_s390_vceqfs((__vector unsigned int)__a,
+ (__vector unsigned int)__b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc != 0;
}
@@ -4666,7 +4668,8 @@ vec_any_ne(__vector signed long long __a, __vector signed long long __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector signed long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc != 0;
}
@@ -4674,15 +4677,15 @@ vec_any_ne(__vector signed long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool long long __a, __vector signed long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, __b, &__cc);
return __cc != 0;
}
@@ -4690,8 +4693,7 @@ vec_any_ne(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector unsigned long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc);
return __cc != 0;
}
@@ -4699,16 +4701,15 @@ vec_any_ne(__vector unsigned long long __a, __vector __bool long long __b) {
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool long long __a, __vector unsigned long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc);
return __cc != 0;
}
static inline __ATTRS_o_ai int
vec_any_ne(__vector __bool long long __a, __vector __bool long long __b) {
int __cc;
- __builtin_s390_vceqgs((__vector signed long long)__a,
- (__vector signed long long)__b, &__cc);
+ __builtin_s390_vceqgs((__vector unsigned long long)__a,
+ (__vector unsigned long long)__b, &__cc);
return __cc != 0;
}
@@ -6565,45 +6566,45 @@ vec_rl(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_o_ai __vector signed char
vec_rli(__vector signed char __a, unsigned long __b) {
return (__vector signed char)__builtin_s390_verllb(
- (__vector unsigned char)__a, (int)__b);
+ (__vector unsigned char)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned char
vec_rli(__vector unsigned char __a, unsigned long __b) {
- return __builtin_s390_verllb(__a, (int)__b);
+ return __builtin_s390_verllb(__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector signed short
vec_rli(__vector signed short __a, unsigned long __b) {
return (__vector signed short)__builtin_s390_verllh(
- (__vector unsigned short)__a, (int)__b);
+ (__vector unsigned short)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned short
vec_rli(__vector unsigned short __a, unsigned long __b) {
- return __builtin_s390_verllh(__a, (int)__b);
+ return __builtin_s390_verllh(__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector signed int
vec_rli(__vector signed int __a, unsigned long __b) {
return (__vector signed int)__builtin_s390_verllf(
- (__vector unsigned int)__a, (int)__b);
+ (__vector unsigned int)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned int
vec_rli(__vector unsigned int __a, unsigned long __b) {
- return __builtin_s390_verllf(__a, (int)__b);
+ return __builtin_s390_verllf(__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector signed long long
vec_rli(__vector signed long long __a, unsigned long __b) {
return (__vector signed long long)__builtin_s390_verllg(
- (__vector unsigned long long)__a, (int)__b);
+ (__vector unsigned long long)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned long long
vec_rli(__vector unsigned long long __a, unsigned long __b) {
- return __builtin_s390_verllg(__a, (int)__b);
+ return __builtin_s390_verllg(__a, (unsigned char)__b);
}
/*-- vec_rl_mask ------------------------------------------------------------*/
@@ -8358,7 +8359,7 @@ vec_min(__vector double __a, __vector double __b) {
static inline __ATTRS_ai __vector unsigned char
vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return __builtin_s390_vaq(__a, __b);
+ return (__vector unsigned char)((__int128)__a + (__int128)__b);
}
/*-- vec_addc ---------------------------------------------------------------*/
@@ -8387,7 +8388,8 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_ai __vector unsigned char
vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return __builtin_s390_vaccq(__a, __b);
+ return (__vector unsigned char)
+ __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b);
}
/*-- vec_adde_u128 ----------------------------------------------------------*/
@@ -8395,7 +8397,9 @@ vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) {
static inline __ATTRS_ai __vector unsigned char
vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return __builtin_s390_vacq(__a, __b, __c);
+ return (__vector unsigned char)
+ __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
}
/*-- vec_addec_u128 ---------------------------------------------------------*/
@@ -8403,7 +8407,9 @@ vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b,
static inline __ATTRS_ai __vector unsigned char
vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return __builtin_s390_vacccq(__a, __b, __c);
+ return (__vector unsigned char)
+ __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
}
/*-- vec_avg ----------------------------------------------------------------*/
@@ -8477,7 +8483,7 @@ vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai __vector unsigned char
vec_gfmsum_128(__vector unsigned long long __a,
__vector unsigned long long __b) {
- return __builtin_s390_vgfmg(__a, __b);
+ return (__vector unsigned char)__builtin_s390_vgfmg(__a, __b);
}
/*-- vec_gfmsum_accum -------------------------------------------------------*/
@@ -8506,7 +8512,8 @@ static inline __ATTRS_o_ai __vector unsigned char
vec_gfmsum_accum_128(__vector unsigned long long __a,
__vector unsigned long long __b,
__vector unsigned char __c) {
- return __builtin_s390_vgfmag(__a, __b, __c);
+ return (__vector unsigned char)
+ __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c);
}
/*-- vec_mladd --------------------------------------------------------------*/
@@ -8796,15 +8803,21 @@ vec_mulo(__vector unsigned int __a, __vector unsigned int __b) {
/*-- vec_msum_u128 ----------------------------------------------------------*/
#if __ARCH__ >= 12
+extern __ATTRS_o __vector unsigned char
+vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned char __c, int __d)
+ __constant_range(__d, 0, 15);
+
#define vec_msum_u128(X, Y, Z, W) \
- ((__vector unsigned char)__builtin_s390_vmslg((X), (Y), (Z), (W)));
+ ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \
+ __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W)))
#endif
/*-- vec_sub_u128 -----------------------------------------------------------*/
static inline __ATTRS_ai __vector unsigned char
vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return __builtin_s390_vsq(__a, __b);
+ return (__vector unsigned char)((__int128)__a - (__int128)__b);
}
/*-- vec_subc ---------------------------------------------------------------*/
@@ -8833,7 +8846,8 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_ai __vector unsigned char
vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return __builtin_s390_vscbiq(__a, __b);
+ return (__vector unsigned char)
+ __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b);
}
/*-- vec_sube_u128 ----------------------------------------------------------*/
@@ -8841,7 +8855,9 @@ vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) {
static inline __ATTRS_ai __vector unsigned char
vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return __builtin_s390_vsbiq(__a, __b, __c);
+ return (__vector unsigned char)
+ __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
}
/*-- vec_subec_u128 ---------------------------------------------------------*/
@@ -8849,7 +8865,9 @@ vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b,
static inline __ATTRS_ai __vector unsigned char
vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return __builtin_s390_vsbcbiq(__a, __b, __c);
+ return (__vector unsigned char)
+ __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
}
/*-- vec_sum2 ---------------------------------------------------------------*/
@@ -8868,12 +8886,12 @@ vec_sum2(__vector unsigned int __a, __vector unsigned int __b) {
static inline __ATTRS_o_ai __vector unsigned char
vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) {
- return __builtin_s390_vsumqf(__a, __b);
+ return (__vector unsigned char)__builtin_s390_vsumqf(__a, __b);
}
static inline __ATTRS_o_ai __vector unsigned char
vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) {
- return __builtin_s390_vsumqg(__a, __b);
+ return (__vector unsigned char)__builtin_s390_vsumqg(__a, __b);
}
/*-- vec_sum4 ---------------------------------------------------------------*/
lib/include/x86gprintrin.h
@@ -20,6 +20,11 @@
#include <uintrintrin.h>
#endif
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
+ defined(__USERMSR__)
+#include <usermsrintrin.h>
+#endif
+
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__CRC32__)
#include <crc32intrin.h>
lib/include/xmmintrin.h
@@ -32,8 +32,12 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
#endif
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64)))
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_MMX \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("mmx,sse,no-evex512"), __min_vector_width__(64)))
/// Adds the 32-bit float values in the low-order bits of the operands.
///
@@ -2121,9 +2125,9 @@ _mm_storer_ps(float *__p, __m128 __a)
/// \param __a
/// A 64-bit integer containing the value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS_MMX
-_mm_stream_pi(__m64 *__p, __m64 __a)
+_mm_stream_pi(void *__p, __m64 __a)
{
- __builtin_ia32_movntq(__p, __a);
+ __builtin_ia32_movntq((__m64 *)__p, __a);
}
/// Moves packed float values from a 128-bit vector of [4 x float] to a
@@ -2140,7 +2144,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a)
/// \param __a
/// A 128-bit vector of [4 x float] containing the values to be moved.
static __inline__ void __DEFAULT_FN_ATTRS
-_mm_stream_ps(float *__p, __m128 __a)
+_mm_stream_ps(void *__p, __m128 __a)
{
__builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
}