zig/lib/libc/include/any-darwin-any/simd/common.h at master

   1/*! @header
   2 *  The interfaces declared in this header provide "common" elementwise
   3 *  operations that are neither math nor logic functions.  These are available
   4 *  only for floating-point vectors and scalars, except for min, max, abs,
   5 *  clamp, and the reduce operations, which also support integer vectors.
   6 *
   7 *      simd_abs(x)             Absolute value of x.  Also available as fabs
   8 *                              for floating-point vectors.  If x is the
   9 *                              smallest signed integer, x is returned.
  10 *
  11 *      simd_max(x,y)           Returns the maximum of x and y.  Also available
  12 *                              as fmax for floating-point vectors.
  13 *
  14 *      simd_min(x,y)           Returns the minimum of x and y.  Also available
  15 *                              as fmin for floating-point vectors.
  16 *
  17 *      simd_clamp(x,min,max)   x clamped to the range [min, max].
  18 *
  19 *      simd_sign(x)            -1 if x is less than zero, 0 if x is zero or
  20 *                              NaN, and +1 if x is greater than zero.
  21 *
  22 *      simd_mix(x,y,t)         If t is not in the range [0,1], the result is
  23 *      simd_lerp(x,y,t)        undefined.  Otherwise the result is x+(y-x)*t,
  24 *                              which linearly interpolates between x and y.
  25 *
  26 *      simd_recip(x)           An approximation to 1/x.  If x is very near the
  27 *                              limits of representable values, or is infinity
  28 *                              or NaN, the result is undefined.  There are
  29 *                              two variants of this function:
  30 *
  31 *                                  simd_precise_recip(x)
  32 *
  33 *                              and
  34 *
  35 *                                  simd_fast_recip(x).
  36 *
  37 *                              The "precise" variant is accurate to a few ULPs,
  38 *                              whereas the "fast" variant may have as little
  39 *                              as 11 bits of accuracy in float and about 22
  40 *                              bits in double.
  41 *
  42 *                              The function simd_recip(x) resolves to
  43 *                              simd_precise_recip(x) ordinarily, but to
  44 *                              simd_fast_recip(x) when used in a translation
  45 *                              unit compiled with -ffast-math (when
  46 *                              -ffast-math is in effect, you may still use the
  47 *                              precise version of this function by calling it
  48 *                              explicitly by name).
  49 *
  50 *      simd_rsqrt(x)           An approximation to 1/sqrt(x).  If x is
  51 *                              infinity or NaN, the result is undefined.
  52 *                              There are two variants of this function:
  53 *
  54 *                                  simd_precise_rsqrt(x)
  55 *
  56 *                              and
  57 *
  58 *                                  simd_fast_rsqrt(x).
  59 *
  60 *                              The "precise" variant is accurate to a few ULPs,
  61 *                              whereas the "fast" variant may have as little
  62 *                              as 11 bits of accuracy in float and about 22
  63 *                              bits in double.
  64 *
  65 *                              The function simd_rsqrt(x) resolves to
  66 *                              simd_precise_rsqrt(x) ordinarily, but to
  67 *                              simd_fast_rsqrt(x) when used in a translation
  68 *                              unit compiled with -ffast-math (when
  69 *                              -ffast-math is in effect, you may still use the
  70 *                              precise version of this function by calling it
  71 *                              explicitly by name).
  72 *
  73 *      simd_fract(x)           The "fractional part" of x, which lies strictly
  74 *                              in the range [0, 0x1.fffffep-1].
  75 *
  76 *      simd_step(edge,x)       0 if x < edge, and 1 otherwise.
  77 *
  78 *      simd_smoothstep(edge0,edge1,x) 0 if x <= edge0, 1 if x >= edge1, and
  79 *                              a Hermite interpolation between 0 and 1 if
  80 *                              edge0 < x < edge1.
  81 *
  82 *      simd_reduce_add(x)      Sum of the elements of x.
  83 *
  84 *      simd_reduce_min(x)      Minimum of the elements of x.
  85 *
  86 *      simd_reduce_max(x)      Maximum of the elements of x.
  87 *
  88 *      simd_equal(x,y)         True if and only if every lane of x is equal
  89 *                              to the corresponding lane of y.
  90 *
  91 *  The following common functions are available in the simd:: namespace:
  92 *
  93 *      C++ Function                    Equivalent C Function
  94 *      --------------------------------------------------------------------
  95 *      simd::abs(x)                    simd_abs(x)
  96 *      simd::max(x,y)                  simd_max(x,y)
  97 *      simd::min(x,y)                  simd_min(x,y)
  98 *      simd::clamp(x,min,max)          simd_clamp(x,min,max)
  99 *      simd::sign(x)                   simd_sign(x)
 100 *      simd::mix(x,y,t)                simd_mix(x,y,t)
 101 *      simd::lerp(x,y,t)               simd_lerp(x,y,t)
 102 *      simd::recip(x)                  simd_recip(x)
 103 *      simd::rsqrt(x)                  simd_rsqrt(x)
 104 *      simd::fract(x)                  simd_fract(x)
 105 *      simd::step(edge,x)              simd_step(edge,x)
 106 *      simd::smoothstep(e0,e1,x)       simd_smoothstep(e0,e1,x)
 107 *      simd::reduce_add(x)             simd_reduce_add(x)
 108 *      simd::reduce_max(x)             simd_reduce_max(x)
 109 *      simd::reduce_min(x)             simd_reduce_min(x)
 110 *      simd::equal(x,y)                simd_equal(x,y)
 111 *
 112 *      simd::precise::recip(x)         simd_precise_recip(x)
 113 *      simd::precise::rsqrt(x)         simd_precise_rsqrt(x)
 114 *
 115 *      simd::fast::recip(x)            simd_fast_recip(x)
 116 *      simd::fast::rsqrt(x)            simd_fast_rsqrt(x)
 117 *
 118 *  @copyright 2014-2017 Apple, Inc. All rights reserved.
 119 *  @unsorted                                                                 */
 120
 121#ifndef SIMD_COMMON_HEADER
 122#define SIMD_COMMON_HEADER
 123
 124#include <simd/base.h>
 125#if SIMD_COMPILER_HAS_REQUIRED_FEATURES
 126#include <simd/vector_make.h>
 127#include <simd/logic.h>
 128#include <simd/math.h>
 129
 130#ifdef __cplusplus
 131extern "C" {
 132#endif
 133
 134/*! @abstract The elementwise absolute value of x.                            */
 135static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x);
 136/*! @abstract The elementwise absolute value of x.                            */
 137static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x);
 138/*! @abstract The elementwise absolute value of x.                            */
 139static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x);
 140/*! @abstract The elementwise absolute value of x.                            */
 141static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x);
 142/*! @abstract The elementwise absolute value of x.                            */
 143static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x);
 144/*! @abstract The elementwise absolute value of x.                            */
 145static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x);
 146/*! @abstract The elementwise absolute value of x.                            */
 147static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x);
 148/*! @abstract The elementwise absolute value of x.                            */
 149static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x);
 150/*! @abstract The elementwise absolute value of x.                            */
 151static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x);
 152/*! @abstract The elementwise absolute value of x.                            */
 153static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x);
 154/*! @abstract The elementwise absolute value of x.                            */
 155static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x);
 156/*! @abstract The elementwise absolute value of x.                            */
 157static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x);
 158/*! @abstract The elementwise absolute value of x.                            */
 159static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x);
 160/*! @abstract The elementwise absolute value of x.                            */
 161static inline SIMD_CFUNC simd_half2 simd_abs(simd_half2 x);
 162/*! @abstract The elementwise absolute value of x.                            */
 163static inline SIMD_CFUNC simd_half3 simd_abs(simd_half3 x);
 164/*! @abstract The elementwise absolute value of x.                            */
 165static inline SIMD_CFUNC simd_half4 simd_abs(simd_half4 x);
 166/*! @abstract The elementwise absolute value of x.                            */
 167static inline SIMD_CFUNC simd_half8 simd_abs(simd_half8 x);
 168/*! @abstract The elementwise absolute value of x.                            */
 169static inline SIMD_CFUNC simd_half16 simd_abs(simd_half16 x);
 170/*! @abstract The elementwise absolute value of x.                            */
 171static inline SIMD_CFUNC simd_half32 simd_abs(simd_half32 x);
 172/*! @abstract The elementwise absolute value of x.                            */
 173static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x);
 174/*! @abstract The elementwise absolute value of x.                            */
 175static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x);
 176/*! @abstract The elementwise absolute value of x.                            */
 177static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x);
 178/*! @abstract The elementwise absolute value of x.                            */
 179static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x);
 180/*! @abstract The elementwise absolute value of x.                            */
 181static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x);
 182/*! @abstract The elementwise absolute value of x.                            */
 183static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x);
 184/*! @abstract The elementwise absolute value of x.                            */
 185static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x);
 186/*! @abstract The elementwise absolute value of x.                            */
 187static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x);
 188/*! @abstract The elementwise absolute value of x.                            */
 189static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x);
 190/*! @abstract The elementwise absolute value of x.                            */
 191static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x);
 192/*! @abstract The elementwise absolute value of x.                            */
 193static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x);
 194/*! @abstract The elementwise absolute value of x.                            */
 195static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x);
 196/*! @abstract The elementwise absolute value of x.                            */
 197static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x);
 198/*! @abstract The elementwise absolute value of x.                            */
 199static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x);
 200/*! @abstract The elementwise absolute value of x.                            */
 201static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x);
 202/*! @abstract The elementwise absolute value of x.                            */
 203static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x);
 204/*! @abstract The elementwise absolute value of x.                            */
 205static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x);
 206/*! @abstract The elementwise absolute value of x.                            */
 207static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x);
 208/*! @abstract The elementwise absolute value of x.
 209 *  @discussion Deprecated. Use simd_abs(x) instead.                          */
 210#define vector_abs simd_abs
 211  
 212/*! @abstract The elementwise maximum of x and y.                             */
 213static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y);
 214/*! @abstract The elementwise maximum of x and y.                             */
 215static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y);
 216/*! @abstract The elementwise maximum of x and y.                             */
 217static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y);
 218/*! @abstract The elementwise maximum of x and y.                             */
 219static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y);
 220/*! @abstract The elementwise maximum of x and y.                             */
 221static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y);
 222/*! @abstract The elementwise maximum of x and y.                             */
 223static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y);
 224/*! @abstract The elementwise maximum of x and y.                             */
 225static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y);
 226/*! @abstract The elementwise maximum of x and y.                             */
 227static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y);
 228/*! @abstract The elementwise maximum of x and y.                             */
 229static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y);
 230/*! @abstract The elementwise maximum of x and y.                             */
 231static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y);
 232/*! @abstract The elementwise maximum of x and y.                             */
 233static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y);
 234/*! @abstract The elementwise maximum of x and y.                             */
 235static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y);
 236/*! @abstract The elementwise maximum of x and y.                             */
 237static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y);
 238/*! @abstract The elementwise maximum of x and y.                             */
 239static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y);
 240/*! @abstract The elementwise maximum of x and y.                             */
 241static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y);
 242/*! @abstract The elementwise maximum of x and y.                             */
 243static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y);
 244/*! @abstract The elementwise maximum of x and y.                             */
 245static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y);
 246/*! @abstract The elementwise maximum of x and y.                             */
 247static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y);
 248/*! @abstract The elementwise maximum of x and y.                             */
 249static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y);
 250/*! @abstract The elementwise maximum of x and y.                             */
 251static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y);
 252/*! @abstract The elementwise maximum of x and y.                             */
 253static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y);
 254/*! @abstract The elementwise maximum of x and y.                             */
 255static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y);
 256/*! @abstract The elementwise maximum of x and y.                             */
 257static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y);
 258/*! @abstract The elementwise maximum of x and y.                             */
 259static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y);
 260/*! @abstract The elementwise maximum of x and y.                             */
 261static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y);
 262/*! @abstract The elementwise maximum of x and y.                             */
 263static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y);
 264/*! @abstract The elementwise maximum of x and y.                             */
 265static inline SIMD_CFUNC _Float16 simd_max(_Float16 x, _Float16 y);
 266/*! @abstract The elementwise maximum of x and y.                             */
 267static inline SIMD_CFUNC simd_half2 simd_max(simd_half2 x, simd_half2 y);
 268/*! @abstract The elementwise maximum of x and y.                             */
 269static inline SIMD_CFUNC simd_half3 simd_max(simd_half3 x, simd_half3 y);
 270/*! @abstract The elementwise maximum of x and y.                             */
 271static inline SIMD_CFUNC simd_half4 simd_max(simd_half4 x, simd_half4 y);
 272/*! @abstract The elementwise maximum of x and y.                             */
 273static inline SIMD_CFUNC simd_half8 simd_max(simd_half8 x, simd_half8 y);
 274/*! @abstract The elementwise maximum of x and y.                             */
 275static inline SIMD_CFUNC simd_half16 simd_max(simd_half16 x, simd_half16 y);
 276/*! @abstract The elementwise maximum of x and y.                             */
 277static inline SIMD_CFUNC simd_half32 simd_max(simd_half32 x, simd_half32 y);
 278/*! @abstract The elementwise maximum of x and y.                             */
 279static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y);
 280/*! @abstract The elementwise maximum of x and y.                             */
 281static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y);
 282/*! @abstract The elementwise maximum of x and y.                             */
 283static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y);
 284/*! @abstract The elementwise maximum of x and y.                             */
 285static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y);
 286/*! @abstract The elementwise maximum of x and y.                             */
 287static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y);
 288/*! @abstract The elementwise maximum of x and y.                             */
 289static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y);
 290/*! @abstract The elementwise maximum of x and y.                             */
 291static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y);
 292/*! @abstract The elementwise maximum of x and y.                             */
 293static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y);
 294/*! @abstract The elementwise maximum of x and y.                             */
 295static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y);
 296/*! @abstract The elementwise maximum of x and y.                             */
 297static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y);
 298/*! @abstract The elementwise maximum of x and y.                             */
 299static inline SIMD_CFUNC float simd_max(float x, float y);
 300/*! @abstract The elementwise maximum of x and y.                             */
 301static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y);
 302/*! @abstract The elementwise maximum of x and y.                             */
 303static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y);
 304/*! @abstract The elementwise maximum of x and y.                             */
 305static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y);
 306/*! @abstract The elementwise maximum of x and y.                             */
 307static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y);
 308/*! @abstract The elementwise maximum of x and y.                             */
 309static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y);
 310/*! @abstract The elementwise maximum of x and y.                             */
 311static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y);
 312/*! @abstract The elementwise maximum of x and y.                             */
 313static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y);
 314/*! @abstract The elementwise maximum of x and y.                             */
 315static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y);
 316/*! @abstract The elementwise maximum of x and y.                             */
 317static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y);
 318/*! @abstract The elementwise maximum of x and y.                             */
 319static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y);
 320/*! @abstract The elementwise maximum of x and y.                             */
 321static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y);
 322/*! @abstract The elementwise maximum of x and y.                             */
 323static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y);
 324/*! @abstract The elementwise maximum of x and y.                             */
 325static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y);
 326/*! @abstract The elementwise maximum of x and y.                             */
 327static inline SIMD_CFUNC double simd_max(double x, double y);
 328/*! @abstract The elementwise maximum of x and y.                             */
 329static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y);
 330/*! @abstract The elementwise maximum of x and y.                             */
 331static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y);
 332/*! @abstract The elementwise maximum of x and y.                             */
 333static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y);
 334/*! @abstract The elementwise maximum of x and y.                             */
 335static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y);
 336/*! @abstract The elementwise maximum of x and y.
 337 *  @discussion Deprecated. Use simd_max(x,y) instead.                        */
 338#define vector_max simd_max
 339
 340/*! @abstract The elementwise minimum of x and y.                             */
 341static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y);
 342/*! @abstract The elementwise minimum of x and y.                             */
 343static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y);
 344/*! @abstract The elementwise minimum of x and y.                             */
 345static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y);
 346/*! @abstract The elementwise minimum of x and y.                             */
 347static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y);
 348/*! @abstract The elementwise minimum of x and y.                             */
 349static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y);
 350/*! @abstract The elementwise minimum of x and y.                             */
 351static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y);
 352/*! @abstract The elementwise minimum of x and y.                             */
 353static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y);
 354/*! @abstract The elementwise minimum of x and y.                             */
 355static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y);
 356/*! @abstract The elementwise minimum of x and y.                             */
 357static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y);
 358/*! @abstract The elementwise minimum of x and y.                             */
 359static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y);
 360/*! @abstract The elementwise minimum of x and y.                             */
 361static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y);
 362/*! @abstract The elementwise minimum of x and y.                             */
 363static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y);
 364/*! @abstract The elementwise minimum of x and y.                             */
 365static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y);
 366/*! @abstract The elementwise minimum of x and y.                             */
 367static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y);
 368/*! @abstract The elementwise minimum of x and y.                             */
 369static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y);
 370/*! @abstract The elementwise minimum of x and y.                             */
 371static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y);
 372/*! @abstract The elementwise minimum of x and y.                             */
 373static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y);
 374/*! @abstract The elementwise minimum of x and y.                             */
 375static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y);
 376/*! @abstract The elementwise minimum of x and y.                             */
 377static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y);
 378/*! @abstract The elementwise minimum of x and y.                             */
 379static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y);
 380/*! @abstract The elementwise minimum of x and y.                             */
 381static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y);
 382/*! @abstract The elementwise minimum of x and y.                             */
 383static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y);
 384/*! @abstract The elementwise minimum of x and y.                             */
 385static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y);
 386/*! @abstract The elementwise minimum of x and y.                             */
 387static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y);
 388/*! @abstract The elementwise minimum of x and y.                             */
 389static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y);
 390/*! @abstract The elementwise minimum of x and y.                             */
 391static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y);
 392/*! @abstract The elementwise minimum of x and y.                             */
 393static inline SIMD_CFUNC _Float16 simd_min(_Float16 x, _Float16 y);
 394/*! @abstract The elementwise minimum of x and y.                             */
 395static inline SIMD_CFUNC simd_half2 simd_min(simd_half2 x, simd_half2 y);
 396/*! @abstract The elementwise minimum of x and y.                             */
 397static inline SIMD_CFUNC simd_half3 simd_min(simd_half3 x, simd_half3 y);
 398/*! @abstract The elementwise minimum of x and y.                             */
 399static inline SIMD_CFUNC simd_half4 simd_min(simd_half4 x, simd_half4 y);
 400/*! @abstract The elementwise minimum of x and y.                             */
 401static inline SIMD_CFUNC simd_half8 simd_min(simd_half8 x, simd_half8 y);
 402/*! @abstract The elementwise minimum of x and y.                             */
 403static inline SIMD_CFUNC simd_half16 simd_min(simd_half16 x, simd_half16 y);
 404/*! @abstract The elementwise minimum of x and y.                             */
 405static inline SIMD_CFUNC simd_half32 simd_min(simd_half32 x, simd_half32 y);
 406/*! @abstract The elementwise minimum of x and y.                             */
 407static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y);
 408/*! @abstract The elementwise minimum of x and y.                             */
 409static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y);
 410/*! @abstract The elementwise minimum of x and y.                             */
 411static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y);
 412/*! @abstract The elementwise minimum of x and y.                             */
 413static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y);
 414/*! @abstract The elementwise minimum of x and y.                             */
 415static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y);
 416/*! @abstract The elementwise minimum of x and y.                             */
 417static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y);
 418/*! @abstract The elementwise minimum of x and y.                             */
 419static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y);
 420/*! @abstract The elementwise minimum of x and y.                             */
 421static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y);
 422/*! @abstract The elementwise minimum of x and y.                             */
 423static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y);
 424/*! @abstract The elementwise minimum of x and y.                             */
 425static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y);
 426/*! @abstract The elementwise minimum of x and y.                             */
 427static inline SIMD_CFUNC float simd_min(float x, float y);
 428/*! @abstract The elementwise minimum of x and y.                             */
 429static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y);
 430/*! @abstract The elementwise minimum of x and y.                             */
 431static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y);
 432/*! @abstract The elementwise minimum of x and y.                             */
 433static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y);
 434/*! @abstract The elementwise minimum of x and y.                             */
 435static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y);
 436/*! @abstract The elementwise minimum of x and y.                             */
 437static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y);
 438/*! @abstract The elementwise minimum of x and y.                             */
 439static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y);
 440/*! @abstract The elementwise minimum of x and y.                             */
 441static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y);
 442/*! @abstract The elementwise minimum of x and y.                             */
 443static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y);
 444/*! @abstract The elementwise minimum of x and y.                             */
 445static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y);
 446/*! @abstract The elementwise minimum of x and y.                             */
 447static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y);
 448/*! @abstract The elementwise minimum of x and y.                             */
 449static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y);
 450/*! @abstract The elementwise minimum of x and y.                             */
 451static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y);
 452/*! @abstract The elementwise minimum of x and y.                             */
 453static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y);
 454/*! @abstract The elementwise minimum of x and y.                             */
 455static inline SIMD_CFUNC double simd_min(double x, double y);
 456/*! @abstract The elementwise minimum of x and y.                             */
 457static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y);
 458/*! @abstract The elementwise minimum of x and y.                             */
 459static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y);
 460/*! @abstract The elementwise minimum of x and y.                             */
 461static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y);
 462/*! @abstract The elementwise minimum of x and y.                             */
 463static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y);
 464/*! @abstract The elementwise minimum of x and y.
 465 *  @discussion Deprecated. Use simd_min(x,y) instead.                        */
 466#define vector_min simd_min
 467
 468  
 469/*! @abstract x clamped to the range [min, max].
 470 *  @discussion Note that if you want to clamp all lanes to the same range,
 471 *  you can use a scalar value for min and max.                               */
 472static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max);
 473/*! @abstract x clamped to the range [min, max].
 474 *  @discussion Note that if you want to clamp all lanes to the same range,
 475 *  you can use a scalar value for min and max.                               */
 476static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max);
 477/*! @abstract x clamped to the range [min, max].
 478 *  @discussion Note that if you want to clamp all lanes to the same range,
 479 *  you can use a scalar value for min and max.                               */
 480static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max);
 481/*! @abstract x clamped to the range [min, max].
 482 *  @discussion Note that if you want to clamp all lanes to the same range,
 483 *  you can use a scalar value for min and max.                               */
 484static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max);
 485/*! @abstract x clamped to the range [min, max].
 486 *  @discussion Note that if you want to clamp all lanes to the same range,
 487 *  you can use a scalar value for min and max.                               */
 488static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max);
 489/*! @abstract x clamped to the range [min, max].
 490 *  @discussion Note that if you want to clamp all lanes to the same range,
 491 *  you can use a scalar value for min and max.                               */
 492static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max);
 493/*! @abstract x clamped to the range [min, max].
 494 *  @discussion Note that if you want to clamp all lanes to the same range,
 495 *  you can use a scalar value for min and max.                               */
 496static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max);
 497/*! @abstract x clamped to the range [min, max].
 498 *  @discussion Note that if you want to clamp all lanes to the same range,
 499 *  you can use a scalar value for min and max.                               */
 500static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max);
 501/*! @abstract x clamped to the range [min, max].
 502 *  @discussion Note that if you want to clamp all lanes to the same range,
 503 *  you can use a scalar value for min and max.                               */
 504static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max);
 505/*! @abstract x clamped to the range [min, max].
 506 *  @discussion Note that if you want to clamp all lanes to the same range,
 507 *  you can use a scalar value for min and max.                               */
 508static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max);
 509/*! @abstract x clamped to the range [min, max].
 510 *  @discussion Note that if you want to clamp all lanes to the same range,
 511 *  you can use a scalar value for min and max.                               */
 512static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max);
 513/*! @abstract x clamped to the range [min, max].
 514 *  @discussion Note that if you want to clamp all lanes to the same range,
 515 *  you can use a scalar value for min and max.                               */
 516static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max);
 517/*! @abstract x clamped to the range [min, max].
 518 *  @discussion Note that if you want to clamp all lanes to the same range,
 519 *  you can use a scalar value for min and max.                               */
 520static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max);
 521/*! @abstract x clamped to the range [min, max].
 522 *  @discussion Note that if you want to clamp all lanes to the same range,
 523 *  you can use a scalar value for min and max.                               */
 524static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max);
 525/*! @abstract x clamped to the range [min, max].
 526 *  @discussion Note that if you want to clamp all lanes to the same range,
 527 *  you can use a scalar value for min and max.                               */
 528static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max);
 529/*! @abstract x clamped to the range [min, max].
 530 *  @discussion Note that if you want to clamp all lanes to the same range,
 531 *  you can use a scalar value for min and max.                               */
 532static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max);
 533/*! @abstract x clamped to the range [min, max].
 534 *  @discussion Note that if you want to clamp all lanes to the same range,
 535 *  you can use a scalar value for min and max.                               */
 536static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max);
 537/*! @abstract x clamped to the range [min, max].
 538 *  @discussion Note that if you want to clamp all lanes to the same range,
 539 *  you can use a scalar value for min and max.                               */
 540static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max);
 541/*! @abstract x clamped to the range [min, max].
 542 *  @discussion Note that if you want to clamp all lanes to the same range,
 543 *  you can use a scalar value for min and max.                               */
 544static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max);
 545/*! @abstract x clamped to the range [min, max].
 546 *  @discussion Note that if you want to clamp all lanes to the same range,
 547 *  you can use a scalar value for min and max.                               */
 548static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max);
 549/*! @abstract x clamped to the range [min, max].
 550 *  @discussion Note that if you want to clamp all lanes to the same range,
 551 *  you can use a scalar value for min and max.                               */
 552static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max);
 553/*! @abstract x clamped to the range [min, max].
 554 *  @discussion Note that if you want to clamp all lanes to the same range,
 555 *  you can use a scalar value for min and max.                               */
 556static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max);
 557/*! @abstract x clamped to the range [min, max].
 558 *  @discussion Note that if you want to clamp all lanes to the same range,
 559 *  you can use a scalar value for min and max.                               */
 560static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max);
 561/*! @abstract x clamped to the range [min, max].
 562 *  @discussion Note that if you want to clamp all lanes to the same range,
 563 *  you can use a scalar value for min and max.                               */
 564static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max);
 565/*! @abstract x clamped to the range [min, max].
 566 *  @discussion Note that if you want to clamp all lanes to the same range,
 567 *  you can use a scalar value for min and max.                               */
 568static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max);
 569/*! @abstract x clamped to the range [min, max].
 570 *  @discussion Note that if you want to clamp all lanes to the same range,
 571 *  you can use a scalar value for min and max.                               */
 572static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max);
 573/*! @abstract x clamped to the range [min, max].
 574 *  @discussion Note that if you want to clamp all lanes to the same range,
 575 *  you can use a scalar value for min and max.                               */
 576static inline SIMD_CFUNC _Float16 simd_clamp(_Float16 x, _Float16 min, _Float16 max);
 577/*! @abstract x clamped to the range [min, max].
 578 *  @discussion Note that if you want to clamp all lanes to the same range,
 579 *  you can use a scalar value for min and max.                               */
 580static inline SIMD_CFUNC simd_half2 simd_clamp(simd_half2 x, simd_half2 min, simd_half2 max);
 581/*! @abstract x clamped to the range [min, max].
 582 *  @discussion Note that if you want to clamp all lanes to the same range,
 583 *  you can use a scalar value for min and max.                               */
 584static inline SIMD_CFUNC simd_half3 simd_clamp(simd_half3 x, simd_half3 min, simd_half3 max);
 585/*! @abstract x clamped to the range [min, max].
 586 *  @discussion Note that if you want to clamp all lanes to the same range,
 587 *  you can use a scalar value for min and max.                               */
 588static inline SIMD_CFUNC simd_half4 simd_clamp(simd_half4 x, simd_half4 min, simd_half4 max);
 589/*! @abstract x clamped to the range [min, max].
 590 *  @discussion Note that if you want to clamp all lanes to the same range,
 591 *  you can use a scalar value for min and max.                               */
 592static inline SIMD_CFUNC simd_half8 simd_clamp(simd_half8 x, simd_half8 min, simd_half8 max);
 593/*! @abstract x clamped to the range [min, max].
 594 *  @discussion Note that if you want to clamp all lanes to the same range,
 595 *  you can use a scalar value for min and max.                               */
 596static inline SIMD_CFUNC simd_half16 simd_clamp(simd_half16 x, simd_half16 min, simd_half16 max);
 597/*! @abstract x clamped to the range [min, max].
 598 *  @discussion Note that if you want to clamp all lanes to the same range,
 599 *  you can use a scalar value for min and max.                               */
 600static inline SIMD_CFUNC simd_half32 simd_clamp(simd_half32 x, simd_half32 min, simd_half32 max);
 601/*! @abstract x clamped to the range [min, max].
 602 *  @discussion Note that if you want to clamp all lanes to the same range,
 603 *  you can use a scalar value for min and max.                               */
 604static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max);
 605/*! @abstract x clamped to the range [min, max].
 606 *  @discussion Note that if you want to clamp all lanes to the same range,
 607 *  you can use a scalar value for min and max.                               */
 608static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max);
 609/*! @abstract x clamped to the range [min, max].
 610 *  @discussion Note that if you want to clamp all lanes to the same range,
 611 *  you can use a scalar value for min and max.                               */
 612static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max);
 613/*! @abstract x clamped to the range [min, max].
 614 *  @discussion Note that if you want to clamp all lanes to the same range,
 615 *  you can use a scalar value for min and max.                               */
 616static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max);
 617/*! @abstract x clamped to the range [min, max].
 618 *  @discussion Note that if you want to clamp all lanes to the same range,
 619 *  you can use a scalar value for min and max.                               */
 620static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max);
 621/*! @abstract x clamped to the range [min, max].
 622 *  @discussion Note that if you want to clamp all lanes to the same range,
 623 *  you can use a scalar value for min and max.                               */
 624static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max);
 625/*! @abstract x clamped to the range [min, max].
 626 *  @discussion Note that if you want to clamp all lanes to the same range,
 627 *  you can use a scalar value for min and max.                               */
 628static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max);
 629/*! @abstract x clamped to the range [min, max].
 630 *  @discussion Note that if you want to clamp all lanes to the same range,
 631 *  you can use a scalar value for min and max.                               */
 632static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max);
 633/*! @abstract x clamped to the range [min, max].
 634 *  @discussion Note that if you want to clamp all lanes to the same range,
 635 *  you can use a scalar value for min and max.                               */
 636static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max);
 637/*! @abstract x clamped to the range [min, max].
 638 *  @discussion Note that if you want to clamp all lanes to the same range,
 639 *  you can use a scalar value for min and max.                               */
 640static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max);
 641/*! @abstract x clamped to the range [min, max].
 642 *  @discussion Note that if you want to clamp all lanes to the same range,
 643 *  you can use a scalar value for min and max.                               */
 644static inline SIMD_CFUNC float simd_clamp(float x, float min, float max);
 645/*! @abstract x clamped to the range [min, max].
 646 *  @discussion Note that if you want to clamp all lanes to the same range,
 647 *  you can use a scalar value for min and max.                               */
 648static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max);
 649/*! @abstract x clamped to the range [min, max].
 650 *  @discussion Note that if you want to clamp all lanes to the same range,
 651 *  you can use a scalar value for min and max.                               */
 652static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max);
 653/*! @abstract x clamped to the range [min, max].
 654 *  @discussion Note that if you want to clamp all lanes to the same range,
 655 *  you can use a scalar value for min and max.                               */
 656static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max);
 657/*! @abstract x clamped to the range [min, max].
 658 *  @discussion Note that if you want to clamp all lanes to the same range,
 659 *  you can use a scalar value for min and max.                               */
 660static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max);
 661/*! @abstract x clamped to the range [min, max].
 662 *  @discussion Note that if you want to clamp all lanes to the same range,
 663 *  you can use a scalar value for min and max.                               */
 664static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max);
 665/*! @abstract x clamped to the range [min, max].
 666 *  @discussion Note that if you want to clamp all lanes to the same range,
 667 *  you can use a scalar value for min and max.                               */
 668static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max);
 669/*! @abstract x clamped to the range [min, max].
 670 *  @discussion Note that if you want to clamp all lanes to the same range,
 671 *  you can use a scalar value for min and max.                               */
 672static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max);
 673/*! @abstract x clamped to the range [min, max].
 674 *  @discussion Note that if you want to clamp all lanes to the same range,
 675 *  you can use a scalar value for min and max.                               */
 676static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max);
 677/*! @abstract x clamped to the range [min, max].
 678 *  @discussion Note that if you want to clamp all lanes to the same range,
 679 *  you can use a scalar value for min and max.                               */
 680static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max);
 681/*! @abstract x clamped to the range [min, max].
 682 *  @discussion Note that if you want to clamp all lanes to the same range,
 683 *  you can use a scalar value for min and max.                               */
 684static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max);
 685/*! @abstract x clamped to the range [min, max].
 686 *  @discussion Note that if you want to clamp all lanes to the same range,
 687 *  you can use a scalar value for min and max.                               */
 688static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max);
 689/*! @abstract x clamped to the range [min, max].
 690 *  @discussion Note that if you want to clamp all lanes to the same range,
 691 *  you can use a scalar value for min and max.                               */
 692static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max);
 693/*! @abstract x clamped to the range [min, max].
 694 *  @discussion Note that if you want to clamp all lanes to the same range,
 695 *  you can use a scalar value for min and max.                               */
 696static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max);
 697/*! @abstract x clamped to the range [min, max].
 698 *  @discussion Note that if you want to clamp all lanes to the same range,
 699 *  you can use a scalar value for min and max.                               */
 700static inline SIMD_CFUNC double simd_clamp(double x, double min, double max);
 701/*! @abstract x clamped to the range [min, max].
 702 *  @discussion Note that if you want to clamp all lanes to the same range,
 703 *  you can use a scalar value for min and max.                               */
 704static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max);
 705/*! @abstract x clamped to the range [min, max].
 706 *  @discussion Note that if you want to clamp all lanes to the same range,
 707 *  you can use a scalar value for min and max.                               */
 708static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max);
 709/*! @abstract x clamped to the range [min, max].
 710 *  @discussion Note that if you want to clamp all lanes to the same range,
 711 *  you can use a scalar value for min and max.                               */
 712static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max);
 713/*! @abstract x clamped to the range [min, max].
 714 *  @discussion Note that if you want to clamp all lanes to the same range,
 715 *  you can use a scalar value for min and max.                               */
 716static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max);
 717/*! @abstract x clamped to the range [min, max].
 718 *  @discussion Deprecated. Use simd_clamp(x,min,max) instead.                */
 719#define vector_clamp simd_clamp
 720  
 721/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 722static inline SIMD_CFUNC _Float16 simd_sign(_Float16 x);
 723/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 724static inline SIMD_CFUNC simd_half2 simd_sign(simd_half2 x);
 725/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 726static inline SIMD_CFUNC simd_half3 simd_sign(simd_half3 x);
 727/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 728static inline SIMD_CFUNC simd_half4 simd_sign(simd_half4 x);
 729/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 730static inline SIMD_CFUNC simd_half8 simd_sign(simd_half8 x);
 731/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 732static inline SIMD_CFUNC simd_half16 simd_sign(simd_half16 x);
 733/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 734static inline SIMD_CFUNC simd_half32 simd_sign(simd_half32 x);
 735/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 736static inline SIMD_CFUNC float simd_sign(float x);
 737/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 738static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x);
 739/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 740static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x);
 741/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 742static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x);
 743/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 744static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x);
 745/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 746static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x);
 747/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 748static inline SIMD_CFUNC double simd_sign(double x);
 749/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 750static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x);
 751/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 752static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x);
 753/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 754static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x);
 755/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
 756static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x);
 757/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.
 758 *  @discussion Deprecated. Use simd_sign(x) instead.                         */
 759#define vector_sign simd_sign
 760
 761/*! @abstract Linearly interpolates between x and y, taking the value x when
 762 *  t=0 and y when t=1                                                        */
 763static inline SIMD_CFUNC _Float16 simd_mix(_Float16 x, _Float16 y, _Float16 t);
 764/*! @abstract Linearly interpolates between x and y, taking the value x when
 765 *  t=0 and y when t=1                                                        */
 766static inline SIMD_CFUNC simd_half2 simd_mix(simd_half2 x, simd_half2 y, simd_half2 t);
 767/*! @abstract Linearly interpolates between x and y, taking the value x when
 768 *  t=0 and y when t=1                                                        */
 769static inline SIMD_CFUNC simd_half3 simd_mix(simd_half3 x, simd_half3 y, simd_half3 t);
 770/*! @abstract Linearly interpolates between x and y, taking the value x when
 771 *  t=0 and y when t=1                                                        */
 772static inline SIMD_CFUNC simd_half4 simd_mix(simd_half4 x, simd_half4 y, simd_half4 t);
 773/*! @abstract Linearly interpolates between x and y, taking the value x when
 774 *  t=0 and y when t=1                                                        */
 775static inline SIMD_CFUNC simd_half8 simd_mix(simd_half8 x, simd_half8 y, simd_half8 t);
 776/*! @abstract Linearly interpolates between x and y, taking the value x when
 777 *  t=0 and y when t=1                                                        */
 778static inline SIMD_CFUNC simd_half16 simd_mix(simd_half16 x, simd_half16 y, simd_half16 t);
 779/*! @abstract Linearly interpolates between x and y, taking the value x when
 780 *  t=0 and y when t=1                                                        */
 781static inline SIMD_CFUNC simd_half32 simd_mix(simd_half32 x, simd_half32 y, simd_half32 t);
 782/*! @abstract Linearly interpolates between x and y, taking the value x when
 783 *  t=0 and y when t=1                                                        */
 784static inline SIMD_CFUNC float simd_mix(float x, float y, float t);
 785/*! @abstract Linearly interpolates between x and y, taking the value x when
 786 *  t=0 and y when t=1                                                        */
 787static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t);
 788/*! @abstract Linearly interpolates between x and y, taking the value x when
 789 *  t=0 and y when t=1                                                        */
 790static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t);
 791/*! @abstract Linearly interpolates between x and y, taking the value x when
 792 *  t=0 and y when t=1                                                        */
 793static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t);
 794/*! @abstract Linearly interpolates between x and y, taking the value x when
 795 *  t=0 and y when t=1                                                        */
 796static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t);
 797/*! @abstract Linearly interpolates between x and y, taking the value x when
 798 *  t=0 and y when t=1                                                        */
 799static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t);
 800/*! @abstract Linearly interpolates between x and y, taking the value x when
 801 *  t=0 and y when t=1                                                        */
 802static inline SIMD_CFUNC double simd_mix(double x, double y, double t);
 803/*! @abstract Linearly interpolates between x and y, taking the value x when
 804 *  t=0 and y when t=1                                                        */
 805static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t);
 806/*! @abstract Linearly interpolates between x and y, taking the value x when
 807 *  t=0 and y when t=1                                                        */
 808static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t);
 809/*! @abstract Linearly interpolates between x and y, taking the value x when
 810 *  t=0 and y when t=1                                                        */
 811static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t);
 812/*! @abstract Linearly interpolates between x and y, taking the value x when
 813 *  t=0 and y when t=1                                                        */
 814static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t);
 815/*! @abstract Linearly interpolates between x and y, taking the value x when
 816 *  t=0 and y when t=1
 817 *  @discussion Deprecated. Use simd_mix(x, y, t) instead.                    */
 818#define vector_mix simd_mix
 819#define simd_lerp simd_mix
 820
 821/*! @abstract A good approximation to 1/x.
 822 *  @discussion If x is very close to the limits of representation, the
 823 *  result may overflow or underflow; otherwise this function is accurate to
 824 *  a few units in the last place (ULPs).                                     */
 825static inline SIMD_CFUNC _Float16 simd_precise_recip(_Float16 x);
 826/*! @abstract A good approximation to 1/x.
 827 *  @discussion If x is very close to the limits of representation, the
 828 *  result may overflow or underflow; otherwise this function is accurate to
 829 *  a few units in the last place (ULPs).                                     */
 830static inline SIMD_CFUNC simd_half2 simd_precise_recip(simd_half2 x);
 831/*! @abstract A good approximation to 1/x.
 832 *  @discussion If x is very close to the limits of representation, the
 833 *  result may overflow or underflow; otherwise this function is accurate to
 834 *  a few units in the last place (ULPs).                                     */
 835static inline SIMD_CFUNC simd_half3 simd_precise_recip(simd_half3 x);
 836/*! @abstract A good approximation to 1/x.
 837 *  @discussion If x is very close to the limits of representation, the
 838 *  result may overflow or underflow; otherwise this function is accurate to
 839 *  a few units in the last place (ULPs).                                     */
 840static inline SIMD_CFUNC simd_half4 simd_precise_recip(simd_half4 x);
 841/*! @abstract A good approximation to 1/x.
 842 *  @discussion If x is very close to the limits of representation, the
 843 *  result may overflow or underflow; otherwise this function is accurate to
 844 *  a few units in the last place (ULPs).                                     */
 845static inline SIMD_CFUNC simd_half8 simd_precise_recip(simd_half8 x);
 846/*! @abstract A good approximation to 1/x.
 847 *  @discussion If x is very close to the limits of representation, the
 848 *  result may overflow or underflow; otherwise this function is accurate to
 849 *  a few units in the last place (ULPs).                                     */
 850static inline SIMD_CFUNC simd_half16 simd_precise_recip(simd_half16 x);
 851/*! @abstract A good approximation to 1/x.
 852 *  @discussion If x is very close to the limits of representation, the
 853 *  result may overflow or underflow; otherwise this function is accurate to
 854 *  a few units in the last place (ULPs).                                     */
 855static inline SIMD_CFUNC simd_half32 simd_precise_recip(simd_half32 x);
 856/*! @abstract A good approximation to 1/x.
 857 *  @discussion If x is very close to the limits of representation, the
 858 *  result may overflow or underflow; otherwise this function is accurate to
 859 *  a few units in the last place (ULPs).                                     */
 860static inline SIMD_CFUNC float simd_precise_recip(float x);
 861/*! @abstract A good approximation to 1/x.
 862 *  @discussion If x is very close to the limits of representation, the
 863 *  result may overflow or underflow; otherwise this function is accurate to
 864 *  a few units in the last place (ULPs).                                     */
 865static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x);
 866/*! @abstract A good approximation to 1/x.
 867 *  @discussion If x is very close to the limits of representation, the
 868 *  result may overflow or underflow; otherwise this function is accurate to
 869 *  a few units in the last place (ULPs).                                     */
 870static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x);
 871/*! @abstract A good approximation to 1/x.
 872 *  @discussion If x is very close to the limits of representation, the
 873 *  result may overflow or underflow; otherwise this function is accurate to
 874 *  a few units in the last place (ULPs).                                     */
 875static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x);
 876/*! @abstract A good approximation to 1/x.
 877 *  @discussion If x is very close to the limits of representation, the
 878 *  result may overflow or underflow; otherwise this function is accurate to
 879 *  a few units in the last place (ULPs).                                     */
 880static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x);
 881/*! @abstract A good approximation to 1/x.
 882 *  @discussion If x is very close to the limits of representation, the
 883 *  result may overflow or underflow; otherwise this function is accurate to
 884 *  a few units in the last place (ULPs).                                     */
 885static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x);
 886/*! @abstract A good approximation to 1/x.
 887 *  @discussion If x is very close to the limits of representation, the
 888 *  result may overflow or underflow; otherwise this function is accurate to
 889 *  a few units in the last place (ULPs).                                     */
 890static inline SIMD_CFUNC double simd_precise_recip(double x);
 891/*! @abstract A good approximation to 1/x.
 892 *  @discussion If x is very close to the limits of representation, the
 893 *  result may overflow or underflow; otherwise this function is accurate to
 894 *  a few units in the last place (ULPs).                                     */
 895static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x);
 896/*! @abstract A good approximation to 1/x.
 897 *  @discussion If x is very close to the limits of representation, the
 898 *  result may overflow or underflow; otherwise this function is accurate to
 899 *  a few units in the last place (ULPs).                                     */
 900static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x);
 901/*! @abstract A good approximation to 1/x.
 902 *  @discussion If x is very close to the limits of representation, the
 903 *  result may overflow or underflow; otherwise this function is accurate to
 904 *  a few units in the last place (ULPs).                                     */
 905static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x);
 906/*! @abstract A good approximation to 1/x.
 907 *  @discussion If x is very close to the limits of representation, the
 908 *  result may overflow or underflow; otherwise this function is accurate to
 909 *  a few units in the last place (ULPs).                                     */
 910static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x);
 911/*! @abstract A good approximation to 1/x.
 912 *  @discussion Deprecated. Use simd_precise_recip(x) instead.                */
 913#define vector_precise_recip simd_precise_recip
 914
 915/*! @abstract A fast approximation to 1/x.
 916 *  @discussion If x is very close to the limits of representation, the
 917 *  result may overflow or underflow; otherwise this function is accurate to
 918 *  at least 11 bits for float and 22 bits for double.                        */
 919static inline SIMD_CFUNC _Float16 simd_fast_recip(_Float16 x);
 920/*! @abstract A fast approximation to 1/x.
 921 *  @discussion If x is very close to the limits of representation, the
 922 *  result may overflow or underflow; otherwise this function is accurate to
 923 *  at least 11 bits for float and 22 bits for double.                        */
 924static inline SIMD_CFUNC simd_half2 simd_fast_recip(simd_half2 x);
 925/*! @abstract A fast approximation to 1/x.
 926 *  @discussion If x is very close to the limits of representation, the
 927 *  result may overflow or underflow; otherwise this function is accurate to
 928 *  at least 11 bits for float and 22 bits for double.                        */
 929static inline SIMD_CFUNC simd_half3 simd_fast_recip(simd_half3 x);
 930/*! @abstract A fast approximation to 1/x.
 931 *  @discussion If x is very close to the limits of representation, the
 932 *  result may overflow or underflow; otherwise this function is accurate to
 933 *  at least 11 bits for float and 22 bits for double.                        */
 934static inline SIMD_CFUNC simd_half4 simd_fast_recip(simd_half4 x);
 935/*! @abstract A fast approximation to 1/x.
 936 *  @discussion If x is very close to the limits of representation, the
 937 *  result may overflow or underflow; otherwise this function is accurate to
 938 *  at least 11 bits for float and 22 bits for double.                        */
 939static inline SIMD_CFUNC simd_half8 simd_fast_recip(simd_half8 x);
 940/*! @abstract A fast approximation to 1/x.
 941 *  @discussion If x is very close to the limits of representation, the
 942 *  result may overflow or underflow; otherwise this function is accurate to
 943 *  at least 11 bits for float and 22 bits for double.                        */
 944static inline SIMD_CFUNC simd_half16 simd_fast_recip(simd_half16 x);
 945/*! @abstract A fast approximation to 1/x.
 946 *  @discussion If x is very close to the limits of representation, the
 947 *  result may overflow or underflow; otherwise this function is accurate to
 948 *  at least 11 bits for float and 22 bits for double.                        */
 949static inline SIMD_CFUNC simd_half32 simd_fast_recip(simd_half32 x);
 950/*! @abstract A fast approximation to 1/x.
 951 *  @discussion If x is very close to the limits of representation, the
 952 *  result may overflow or underflow; otherwise this function is accurate to
 953 *  at least 11 bits for float and 22 bits for double.                        */
 954static inline SIMD_CFUNC float simd_fast_recip(float x);
 955/*! @abstract A fast approximation to 1/x.
 956 *  @discussion If x is very close to the limits of representation, the
 957 *  result may overflow or underflow; otherwise this function is accurate to
 958 *  at least 11 bits for float and 22 bits for double.                        */
 959static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x);
 960/*! @abstract A fast approximation to 1/x.
 961 *  @discussion If x is very close to the limits of representation, the
 962 *  result may overflow or underflow; otherwise this function is accurate to
 963 *  at least 11 bits for float and 22 bits for double.                        */
 964static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x);
 965/*! @abstract A fast approximation to 1/x.
 966 *  @discussion If x is very close to the limits of representation, the
 967 *  result may overflow or underflow; otherwise this function is accurate to
 968 *  at least 11 bits for float and 22 bits for double.                        */
 969static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x);
 970/*! @abstract A fast approximation to 1/x.
 971 *  @discussion If x is very close to the limits of representation, the
 972 *  result may overflow or underflow; otherwise this function is accurate to
 973 *  at least 11 bits for float and 22 bits for double.                        */
 974static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x);
 975/*! @abstract A fast approximation to 1/x.
 976 *  @discussion If x is very close to the limits of representation, the
 977 *  result may overflow or underflow; otherwise this function is accurate to
 978 *  at least 11 bits for float and 22 bits for double.                        */
 979static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x);
 980/*! @abstract A fast approximation to 1/x.
 981 *  @discussion If x is very close to the limits of representation, the
 982 *  result may overflow or underflow; otherwise this function is accurate to
 983 *  at least 11 bits for float and 22 bits for double.                        */
 984static inline SIMD_CFUNC double simd_fast_recip(double x);
 985/*! @abstract A fast approximation to 1/x.
 986 *  @discussion If x is very close to the limits of representation, the
 987 *  result may overflow or underflow; otherwise this function is accurate to
 988 *  at least 11 bits for float and 22 bits for double.                        */
 989static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x);
 990/*! @abstract A fast approximation to 1/x.
 991 *  @discussion If x is very close to the limits of representation, the
 992 *  result may overflow or underflow; otherwise this function is accurate to
 993 *  at least 11 bits for float and 22 bits for double.                        */
 994static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x);
 995/*! @abstract A fast approximation to 1/x.
 996 *  @discussion If x is very close to the limits of representation, the
 997 *  result may overflow or underflow; otherwise this function is accurate to
 998 *  at least 11 bits for float and 22 bits for double.                        */
 999static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x);
1000/*! @abstract A fast approximation to 1/x.
1001 *  @discussion If x is very close to the limits of representation, the
1002 *  result may overflow or underflow; otherwise this function is accurate to
1003 *  at least 11 bits for float and 22 bits for double.                        */
1004static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x);
1005/*! @abstract A fast approximation to 1/x.
1006 *  @discussion Deprecated. Use simd_fast_recip(x) instead.                   */
1007#define vector_fast_recip simd_fast_recip
1008
1009/*! @abstract An approximation to 1/x.
1010 *  @discussion If x is very close to the limits of representation, the
1011 *  result may overflow or underflow. This function maps to
1012 *  simd_fast_recip(x) if -ffast-math is specified, and to
1013 *  simd_precise_recip(x) otherwise.                                          */
1014static inline SIMD_CFUNC _Float16 simd_recip(_Float16 x);
1015/*! @abstract An approximation to 1/x.
1016 *  @discussion If x is very close to the limits of representation, the
1017 *  result may overflow or underflow. This function maps to
1018 *  simd_fast_recip(x) if -ffast-math is specified, and to
1019 *  simd_precise_recip(x) otherwise.                                          */
1020static inline SIMD_CFUNC simd_half2 simd_recip(simd_half2 x);
1021/*! @abstract An approximation to 1/x.
1022 *  @discussion If x is very close to the limits of representation, the
1023 *  result may overflow or underflow. This function maps to
1024 *  simd_fast_recip(x) if -ffast-math is specified, and to
1025 *  simd_precise_recip(x) otherwise.                                          */
1026static inline SIMD_CFUNC simd_half3 simd_recip(simd_half3 x);
1027/*! @abstract An approximation to 1/x.
1028 *  @discussion If x is very close to the limits of representation, the
1029 *  result may overflow or underflow. This function maps to
1030 *  simd_fast_recip(x) if -ffast-math is specified, and to
1031 *  simd_precise_recip(x) otherwise.                                          */
1032static inline SIMD_CFUNC simd_half4 simd_recip(simd_half4 x);
1033/*! @abstract An approximation to 1/x.
1034 *  @discussion If x is very close to the limits of representation, the
1035 *  result may overflow or underflow. This function maps to
1036 *  simd_fast_recip(x) if -ffast-math is specified, and to
1037 *  simd_precise_recip(x) otherwise.                                          */
1038static inline SIMD_CFUNC simd_half8 simd_recip(simd_half8 x);
1039/*! @abstract An approximation to 1/x.
1040 *  @discussion If x is very close to the limits of representation, the
1041 *  result may overflow or underflow. This function maps to
1042 *  simd_fast_recip(x) if -ffast-math is specified, and to
1043 *  simd_precise_recip(x) otherwise.                                          */
1044static inline SIMD_CFUNC simd_half16 simd_recip(simd_half16 x);
1045/*! @abstract An approximation to 1/x.
1046 *  @discussion If x is very close to the limits of representation, the
1047 *  result may overflow or underflow. This function maps to
1048 *  simd_fast_recip(x) if -ffast-math is specified, and to
1049 *  simd_precise_recip(x) otherwise.                                          */
1050static inline SIMD_CFUNC simd_half32 simd_recip(simd_half32 x);
1051/*! @abstract An approximation to 1/x.
1052 *  @discussion If x is very close to the limits of representation, the
1053 *  result may overflow or underflow. This function maps to
1054 *  simd_fast_recip(x) if -ffast-math is specified, and to
1055 *  simd_precise_recip(x) otherwise.                                          */
1056static inline SIMD_CFUNC float simd_recip(float x);
1057/*! @abstract An approximation to 1/x.
1058 *  @discussion If x is very close to the limits of representation, the
1059 *  result may overflow or underflow. This function maps to
1060 *  simd_fast_recip(x) if -ffast-math is specified, and to
1061 *  simd_precise_recip(x) otherwise.                                          */
1062static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x);
1063/*! @abstract An approximation to 1/x.
1064 *  @discussion If x is very close to the limits of representation, the
1065 *  result may overflow or underflow. This function maps to
1066 *  simd_fast_recip(x) if -ffast-math is specified, and to
1067 *  simd_precise_recip(x) otherwise.                                          */
1068static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x);
1069/*! @abstract An approximation to 1/x.
1070 *  @discussion If x is very close to the limits of representation, the
1071 *  result may overflow or underflow. This function maps to
1072 *  simd_fast_recip(x) if -ffast-math is specified, and to
1073 *  simd_precise_recip(x) otherwise.                                          */
1074static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x);
1075/*! @abstract An approximation to 1/x.
1076 *  @discussion If x is very close to the limits of representation, the
1077 *  result may overflow or underflow. This function maps to
1078 *  simd_fast_recip(x) if -ffast-math is specified, and to
1079 *  simd_precise_recip(x) otherwise.                                          */
1080static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x);
1081/*! @abstract An approximation to 1/x.
1082 *  @discussion If x is very close to the limits of representation, the
1083 *  result may overflow or underflow. This function maps to
1084 *  simd_fast_recip(x) if -ffast-math is specified, and to
1085 *  simd_precise_recip(x) otherwise.                                          */
1086static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x);
1087/*! @abstract An approximation to 1/x.
1088 *  @discussion If x is very close to the limits of representation, the
1089 *  result may overflow or underflow. This function maps to
1090 *  simd_fast_recip(x) if -ffast-math is specified, and to
1091 *  simd_precise_recip(x) otherwise.                                          */
1092static inline SIMD_CFUNC double simd_recip(double x);
1093/*! @abstract An approximation to 1/x.
1094 *  @discussion If x is very close to the limits of representation, the
1095 *  result may overflow or underflow. This function maps to
1096 *  simd_fast_recip(x) if -ffast-math is specified, and to
1097 *  simd_precise_recip(x) otherwise.                                          */
1098static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x);
1099/*! @abstract An approximation to 1/x.
1100 *  @discussion If x is very close to the limits of representation, the
1101 *  result may overflow or underflow. This function maps to
1102 *  simd_fast_recip(x) if -ffast-math is specified, and to
1103 *  simd_precise_recip(x) otherwise.                                          */
1104static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x);
1105/*! @abstract An approximation to 1/x.
1106 *  @discussion If x is very close to the limits of representation, the
1107 *  result may overflow or underflow. This function maps to
1108 *  simd_fast_recip(x) if -ffast-math is specified, and to
1109 *  simd_precise_recip(x) otherwise.                                          */
1110static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x);
1111/*! @abstract An approximation to 1/x.
1112 *  @discussion If x is very close to the limits of representation, the
1113 *  result may overflow or underflow. This function maps to
1114 *  simd_fast_recip(x) if -ffast-math is specified, and to
1115 *  simd_precise_recip(x) otherwise.                                          */
1116static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x);
1117/*! @abstract An approximation to 1/x.
1118 *  @discussion Deprecated. Use simd_recip(x) instead.                        */
1119#define vector_recip simd_recip
1120
1121/*! @abstract A good approximation to 1/sqrt(x).
1122 *  @discussion This function is accurate to a few units in the last place
1123 *  (ULPs).                                                                   */
1124static inline SIMD_CFUNC _Float16 simd_precise_rsqrt(_Float16 x);
1125/*! @abstract A good approximation to 1/sqrt(x).
1126 *  @discussion This function is accurate to a few units in the last place
1127 *  (ULPs).                                                                   */
1128static inline SIMD_CFUNC simd_half2 simd_precise_rsqrt(simd_half2 x);
1129/*! @abstract A good approximation to 1/sqrt(x).
1130 *  @discussion This function is accurate to a few units in the last place
1131 *  (ULPs).                                                                   */
1132static inline SIMD_CFUNC simd_half3 simd_precise_rsqrt(simd_half3 x);
1133/*! @abstract A good approximation to 1/sqrt(x).
1134 *  @discussion This function is accurate to a few units in the last place
1135 *  (ULPs).                                                                   */
1136static inline SIMD_CFUNC simd_half4 simd_precise_rsqrt(simd_half4 x);
1137/*! @abstract A good approximation to 1/sqrt(x).
1138 *  @discussion This function is accurate to a few units in the last place
1139 *  (ULPs).                                                                   */
1140static inline SIMD_CFUNC simd_half8 simd_precise_rsqrt(simd_half8 x);
1141/*! @abstract A good approximation to 1/sqrt(x).
1142 *  @discussion This function is accurate to a few units in the last place
1143 *  (ULPs).                                                                   */
1144static inline SIMD_CFUNC simd_half16 simd_precise_rsqrt(simd_half16 x);
1145/*! @abstract A good approximation to 1/sqrt(x).
1146 *  @discussion This function is accurate to a few units in the last place
1147 *  (ULPs).                                                                   */
1148static inline SIMD_CFUNC simd_half32 simd_precise_rsqrt(simd_half32 x);
1149/*! @abstract A good approximation to 1/sqrt(x).
1150 *  @discussion This function is accurate to a few units in the last place
1151 *  (ULPs).                                                                   */
1152static inline SIMD_CFUNC float simd_precise_rsqrt(float x);
1153/*! @abstract A good approximation to 1/sqrt(x).
1154 *  @discussion This function is accurate to a few units in the last place
1155 *  (ULPs).                                                                   */
1156static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x);
1157/*! @abstract A good approximation to 1/sqrt(x).
1158 *  @discussion This function is accurate to a few units in the last place
1159 *  (ULPs).                                                                   */
1160static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x);
1161/*! @abstract A good approximation to 1/sqrt(x).
1162 *  @discussion This function is accurate to a few units in the last place
1163 *  (ULPs).                                                                   */
1164static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x);
1165/*! @abstract A good approximation to 1/sqrt(x).
1166 *  @discussion This function is accurate to a few units in the last place
1167 *  (ULPs).                                                                   */
1168static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x);
1169/*! @abstract A good approximation to 1/sqrt(x).
1170 *  @discussion This function is accurate to a few units in the last place
1171 *  (ULPs).                                                                   */
1172static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x);
1173/*! @abstract A good approximation to 1/sqrt(x).
1174 *  @discussion This function is accurate to a few units in the last place
1175 *  (ULPs).                                                                   */
1176static inline SIMD_CFUNC double simd_precise_rsqrt(double x);
1177/*! @abstract A good approximation to 1/sqrt(x).
1178 *  @discussion This function is accurate to a few units in the last place
1179 *  (ULPs).                                                                   */
1180static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x);
1181/*! @abstract A good approximation to 1/sqrt(x).
1182 *  @discussion This function is accurate to a few units in the last place
1183 *  (ULPs).                                                                   */
1184static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x);
1185/*! @abstract A good approximation to 1/sqrt(x).
1186 *  @discussion This function is accurate to a few units in the last place
1187 *  (ULPs).                                                                   */
1188static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x);
1189/*! @abstract A good approximation to 1/sqrt(x).
1190 *  @discussion This function is accurate to a few units in the last place
1191 *  (ULPs).                                                                   */
1192static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x);
1193/*! @abstract A good approximation to 1/sqrt(x).
1194 *  @discussion Deprecated. Use simd_precise_rsqrt(x) instead.                */
1195#define vector_precise_rsqrt simd_precise_rsqrt
1196
1197/*! @abstract A fast approximation to 1/sqrt(x).
1198 *  @discussion This function is accurate to at least 11 bits for float and
1199 *  22 bits for double.                                                       */
1200static inline SIMD_CFUNC _Float16 simd_fast_rsqrt(_Float16 x);
1201/*! @abstract A fast approximation to 1/sqrt(x).
1202 *  @discussion This function is accurate to at least 11 bits for float and
1203 *  22 bits for double.                                                       */
1204static inline SIMD_CFUNC simd_half2 simd_fast_rsqrt(simd_half2 x);
1205/*! @abstract A fast approximation to 1/sqrt(x).
1206 *  @discussion This function is accurate to at least 11 bits for float and
1207 *  22 bits for double.                                                       */
1208static inline SIMD_CFUNC simd_half3 simd_fast_rsqrt(simd_half3 x);
1209/*! @abstract A fast approximation to 1/sqrt(x).
1210 *  @discussion This function is accurate to at least 11 bits for float and
1211 *  22 bits for double.                                                       */
1212static inline SIMD_CFUNC simd_half4 simd_fast_rsqrt(simd_half4 x);
1213/*! @abstract A fast approximation to 1/sqrt(x).
1214 *  @discussion This function is accurate to at least 11 bits for float and
1215 *  22 bits for double.                                                       */
1216static inline SIMD_CFUNC simd_half8 simd_fast_rsqrt(simd_half8 x);
1217/*! @abstract A fast approximation to 1/sqrt(x).
1218 *  @discussion This function is accurate to at least 11 bits for float and
1219 *  22 bits for double.                                                       */
1220static inline SIMD_CFUNC simd_half16 simd_fast_rsqrt(simd_half16 x);
1221/*! @abstract A fast approximation to 1/sqrt(x).
1222 *  @discussion This function is accurate to at least 11 bits for float and
1223 *  22 bits for double.                                                       */
1224static inline SIMD_CFUNC simd_half32 simd_fast_rsqrt(simd_half32 x);
1225/*! @abstract A fast approximation to 1/sqrt(x).
1226 *  @discussion This function is accurate to at least 11 bits for float and
1227 *  22 bits for double.                                                       */
1228static inline SIMD_CFUNC float simd_fast_rsqrt(float x);
1229/*! @abstract A fast approximation to 1/sqrt(x).
1230 *  @discussion This function is accurate to at least 11 bits for float and
1231 *  22 bits for double.                                                       */
1232static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x);
1233/*! @abstract A fast approximation to 1/sqrt(x).
1234 *  @discussion This function is accurate to at least 11 bits for float and
1235 *  22 bits for double.                                                       */
1236static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x);
1237/*! @abstract A fast approximation to 1/sqrt(x).
1238 *  @discussion This function is accurate to at least 11 bits for float and
1239 *  22 bits for double.                                                       */
1240static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x);
1241/*! @abstract A fast approximation to 1/sqrt(x).
1242 *  @discussion This function is accurate to at least 11 bits for float and
1243 *  22 bits for double.                                                       */
1244static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x);
1245/*! @abstract A fast approximation to 1/sqrt(x).
1246 *  @discussion This function is accurate to at least 11 bits for float and
1247 *  22 bits for double.                                                       */
1248static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x);
1249/*! @abstract A fast approximation to 1/sqrt(x).
1250 *  @discussion This function is accurate to at least 11 bits for float and
1251 *  22 bits for double.                                                       */
1252static inline SIMD_CFUNC double simd_fast_rsqrt(double x);
1253/*! @abstract A fast approximation to 1/sqrt(x).
1254 *  @discussion This function is accurate to at least 11 bits for float and
1255 *  22 bits for double.                                                       */
1256static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x);
1257/*! @abstract A fast approximation to 1/sqrt(x).
1258 *  @discussion This function is accurate to at least 11 bits for float and
1259 *  22 bits for double.                                                       */
1260static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x);
1261/*! @abstract A fast approximation to 1/sqrt(x).
1262 *  @discussion This function is accurate to at least 11 bits for float and
1263 *  22 bits for double.                                                       */
1264static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x);
1265/*! @abstract A fast approximation to 1/sqrt(x).
1266 *  @discussion This function is accurate to at least 11 bits for float and
1267 *  22 bits for double.                                                       */
1268static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x);
1269/*! @abstract A fast approximation to 1/sqrt(x).
1270 *  @discussion Deprecated. Use simd_fast_rsqrt(x) instead.                   */
1271#define vector_fast_rsqrt simd_fast_rsqrt
1272
1273/*! @abstract An approximation to 1/sqrt(x).
1274 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1275 *  specified, and to simd_precise_recip(x) otherwise.                        */
1276static inline SIMD_CFUNC _Float16 simd_rsqrt(_Float16 x);
1277/*! @abstract An approximation to 1/sqrt(x).
1278 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1279 *  specified, and to simd_precise_recip(x) otherwise.                        */
1280static inline SIMD_CFUNC simd_half2 simd_rsqrt(simd_half2 x);
1281/*! @abstract An approximation to 1/sqrt(x).
1282 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1283 *  specified, and to simd_precise_recip(x) otherwise.                        */
1284static inline SIMD_CFUNC simd_half3 simd_rsqrt(simd_half3 x);
1285/*! @abstract An approximation to 1/sqrt(x).
1286 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1287 *  specified, and to simd_precise_recip(x) otherwise.                        */
1288static inline SIMD_CFUNC simd_half4 simd_rsqrt(simd_half4 x);
1289/*! @abstract An approximation to 1/sqrt(x).
1290 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1291 *  specified, and to simd_precise_recip(x) otherwise.                        */
1292static inline SIMD_CFUNC simd_half8 simd_rsqrt(simd_half8 x);
1293/*! @abstract An approximation to 1/sqrt(x).
1294 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1295 *  specified, and to simd_precise_recip(x) otherwise.                        */
1296static inline SIMD_CFUNC simd_half16 simd_rsqrt(simd_half16 x);
1297/*! @abstract An approximation to 1/sqrt(x).
1298 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1299 *  specified, and to simd_precise_recip(x) otherwise.                        */
1300static inline SIMD_CFUNC simd_half32 simd_rsqrt(simd_half32 x);
1301/*! @abstract An approximation to 1/sqrt(x).
1302 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1303 *  specified, and to simd_precise_recip(x) otherwise.                        */
1304static inline SIMD_CFUNC float simd_rsqrt(float x);
1305/*! @abstract An approximation to 1/sqrt(x).
1306 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1307 *  specified, and to simd_precise_recip(x) otherwise.                        */
1308static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x);
1309/*! @abstract An approximation to 1/sqrt(x).
1310 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1311 *  specified, and to simd_precise_recip(x) otherwise.                        */
1312static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x);
1313/*! @abstract An approximation to 1/sqrt(x).
1314 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1315 *  specified, and to simd_precise_recip(x) otherwise.                        */
1316static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x);
1317/*! @abstract An approximation to 1/sqrt(x).
1318 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1319 *  specified, and to simd_precise_recip(x) otherwise.                        */
1320static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x);
1321/*! @abstract An approximation to 1/sqrt(x).
1322 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1323 *  specified, and to simd_precise_recip(x) otherwise.                        */
1324static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x);
1325/*! @abstract An approximation to 1/sqrt(x).
1326 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1327 *  specified, and to simd_precise_recip(x) otherwise.                        */
1328static inline SIMD_CFUNC double simd_rsqrt(double x);
1329/*! @abstract An approximation to 1/sqrt(x).
1330 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1331 *  specified, and to simd_precise_recip(x) otherwise.                        */
1332static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x);
1333/*! @abstract An approximation to 1/sqrt(x).
1334 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1335 *  specified, and to simd_precise_recip(x) otherwise.                        */
1336static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x);
1337/*! @abstract An approximation to 1/sqrt(x).
1338 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1339 *  specified, and to simd_precise_recip(x) otherwise.                        */
1340static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x);
1341/*! @abstract An approximation to 1/sqrt(x).
1342 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1343 *  specified, and to simd_precise_recip(x) otherwise.                        */
1344static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x);
1345/*! @abstract An approximation to 1/sqrt(x).
1346 *  @discussion Deprecated. Use simd_rsqrt(x) instead.                        */
1347#define vector_rsqrt simd_rsqrt
1348
1349/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1350 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1351 *  positive and finite, then the two values are exactly equal.               */
1352static inline SIMD_CFUNC _Float16 simd_fract(_Float16 x);
1353/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1354 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1355 *  positive and finite, then the two values are exactly equal.               */
1356static inline SIMD_CFUNC simd_half2 simd_fract(simd_half2 x);
1357/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1358 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1359 *  positive and finite, then the two values are exactly equal.               */
1360static inline SIMD_CFUNC simd_half3 simd_fract(simd_half3 x);
1361/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1362 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1363 *  positive and finite, then the two values are exactly equal.               */
1364static inline SIMD_CFUNC simd_half4 simd_fract(simd_half4 x);
1365/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1366 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1367 *  positive and finite, then the two values are exactly equal.               */
1368static inline SIMD_CFUNC simd_half8 simd_fract(simd_half8 x);
1369/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1370 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1371 *  positive and finite, then the two values are exactly equal.               */
1372static inline SIMD_CFUNC simd_half16 simd_fract(simd_half16 x);
1373/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1374 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1375 *  positive and finite, then the two values are exactly equal.               */
1376static inline SIMD_CFUNC simd_half32 simd_fract(simd_half32 x);
1377/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1378 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1379 *  positive and finite, then the two values are exactly equal.               */
1380static inline SIMD_CFUNC float simd_fract(float x);
1381/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1382 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1383 *  positive and finite, then the two values are exactly equal.               */
1384static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x);
1385/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1386 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1387 *  positive and finite, then the two values are exactly equal.               */
1388static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x);
1389/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1390 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1391 *  positive and finite, then the two values are exactly equal.               */
1392static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x);
1393/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1394 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1395 *  positive and finite, then the two values are exactly equal.               */
1396static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x);
1397/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1398 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1399 *  positive and finite, then the two values are exactly equal.               */
1400static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x);
1401/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1402 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1403 *  positive and finite, then the two values are exactly equal.               */
1404static inline SIMD_CFUNC double simd_fract(double x);
1405/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1406 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1407 *  positive and finite, then the two values are exactly equal.               */
1408static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x);
1409/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1410 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1411 *  positive and finite, then the two values are exactly equal.               */
1412static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x);
1413/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1414 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1415 *  positive and finite, then the two values are exactly equal.               */
1416static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x);
1417/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1418 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1419 *  positive and finite, then the two values are exactly equal.               */
1420static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x);
1421/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1422 *  @discussion Deprecated. Use simd_fract(x) instead.                        */
1423#define vector_fract simd_fract
1424
1425/*! @abstract 0 if x < edge, and 1 otherwise.
1426 *  @discussion Use a scalar value for edge if you want to apply the same
1427 *  threshold to all lanes.                                                   */
1428static inline SIMD_CFUNC _Float16 simd_step(_Float16 edge, _Float16 x);
1429/*! @abstract 0 if x < edge, and 1 otherwise.
1430 *  @discussion Use a scalar value for edge if you want to apply the same
1431 *  threshold to all lanes.                                                   */
1432static inline SIMD_CFUNC simd_half2 simd_step(simd_half2 edge, simd_half2 x);
1433/*! @abstract 0 if x < edge, and 1 otherwise.
1434 *  @discussion Use a scalar value for edge if you want to apply the same
1435 *  threshold to all lanes.                                                   */
1436static inline SIMD_CFUNC simd_half3 simd_step(simd_half3 edge, simd_half3 x);
1437/*! @abstract 0 if x < edge, and 1 otherwise.
1438 *  @discussion Use a scalar value for edge if you want to apply the same
1439 *  threshold to all lanes.                                                   */
1440static inline SIMD_CFUNC simd_half4 simd_step(simd_half4 edge, simd_half4 x);
1441/*! @abstract 0 if x < edge, and 1 otherwise.
1442 *  @discussion Use a scalar value for edge if you want to apply the same
1443 *  threshold to all lanes.                                                   */
1444static inline SIMD_CFUNC simd_half8 simd_step(simd_half8 edge, simd_half8 x);
1445/*! @abstract 0 if x < edge, and 1 otherwise.
1446 *  @discussion Use a scalar value for edge if you want to apply the same
1447 *  threshold to all lanes.                                                   */
1448static inline SIMD_CFUNC simd_half16 simd_step(simd_half16 edge, simd_half16 x);
1449/*! @abstract 0 if x < edge, and 1 otherwise.
1450 *  @discussion Use a scalar value for edge if you want to apply the same
1451 *  threshold to all lanes.                                                   */
1452static inline SIMD_CFUNC simd_half32 simd_step(simd_half32 edge, simd_half32 x);
1453/*! @abstract 0 if x < edge, and 1 otherwise.
1454 *  @discussion Use a scalar value for edge if you want to apply the same
1455 *  threshold to all lanes.                                                   */
1456static inline SIMD_CFUNC float simd_step(float edge, float x);
1457/*! @abstract 0 if x < edge, and 1 otherwise.
1458 *  @discussion Use a scalar value for edge if you want to apply the same
1459 *  threshold to all lanes.                                                   */
1460static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x);
1461/*! @abstract 0 if x < edge, and 1 otherwise.
1462 *  @discussion Use a scalar value for edge if you want to apply the same
1463 *  threshold to all lanes.                                                   */
1464static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x);
1465/*! @abstract 0 if x < edge, and 1 otherwise.
1466 *  @discussion Use a scalar value for edge if you want to apply the same
1467 *  threshold to all lanes.                                                   */
1468static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x);
1469/*! @abstract 0 if x < edge, and 1 otherwise.
1470 *  @discussion Use a scalar value for edge if you want to apply the same
1471 *  threshold to all lanes.                                                   */
1472static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x);
1473/*! @abstract 0 if x < edge, and 1 otherwise.
1474 *  @discussion Use a scalar value for edge if you want to apply the same
1475 *  threshold to all lanes.                                                   */
1476static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x);
1477/*! @abstract 0 if x < edge, and 1 otherwise.
1478 *  @discussion Use a scalar value for edge if you want to apply the same
1479 *  threshold to all lanes.                                                   */
1480static inline SIMD_CFUNC double simd_step(double edge, double x);
1481/*! @abstract 0 if x < edge, and 1 otherwise.
1482 *  @discussion Use a scalar value for edge if you want to apply the same
1483 *  threshold to all lanes.                                                   */
1484static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x);
1485/*! @abstract 0 if x < edge, and 1 otherwise.
1486 *  @discussion Use a scalar value for edge if you want to apply the same
1487 *  threshold to all lanes.                                                   */
1488static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x);
1489/*! @abstract 0 if x < edge, and 1 otherwise.
1490 *  @discussion Use a scalar value for edge if you want to apply the same
1491 *  threshold to all lanes.                                                   */
1492static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x);
1493/*! @abstract 0 if x < edge, and 1 otherwise.
1494 *  @discussion Use a scalar value for edge if you want to apply the same
1495 *  threshold to all lanes.                                                   */
1496static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x);
1497/*! @abstract 0 if x < edge, and 1 otherwise.
1498 *  @discussion Deprecated. Use simd_step(edge, x) instead.                   */
1499#define vector_step simd_step
1500
1501/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1502 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1503 *  to clamp all lanes at the same points.                                    */
1504static inline SIMD_CFUNC _Float16 simd_smoothstep(_Float16 edge0, _Float16 edge1, _Float16 x);
1505/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1506 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1507 *  to clamp all lanes at the same points.                                    */
1508static inline SIMD_CFUNC simd_half2 simd_smoothstep(simd_half2 edge0, simd_half2 edge1, simd_half2 x);
1509/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1510 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1511 *  to clamp all lanes at the same points.                                    */
1512static inline SIMD_CFUNC simd_half3 simd_smoothstep(simd_half3 edge0, simd_half3 edge1, simd_half3 x);
1513/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1514 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1515 *  to clamp all lanes at the same points.                                    */
1516static inline SIMD_CFUNC simd_half4 simd_smoothstep(simd_half4 edge0, simd_half4 edge1, simd_half4 x);
1517/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1518 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1519 *  to clamp all lanes at the same points.                                    */
1520static inline SIMD_CFUNC simd_half8 simd_smoothstep(simd_half8 edge0, simd_half8 edge1, simd_half8 x);
1521/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1522 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1523 *  to clamp all lanes at the same points.                                    */
1524static inline SIMD_CFUNC simd_half16 simd_smoothstep(simd_half16 edge0, simd_half16 edge1, simd_half16 x);
1525/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1526 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1527 *  to clamp all lanes at the same points.                                    */
1528static inline SIMD_CFUNC simd_half32 simd_smoothstep(simd_half32 edge0, simd_half32 edge1, simd_half32 x);
1529/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1530 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1531 *  to clamp all lanes at the same points.                                    */
1532static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x);
1533/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1534 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1535 *  to clamp all lanes at the same points.                                    */
1536static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x);
1537/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1538 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1539 *  to clamp all lanes at the same points.                                    */
1540static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x);
1541/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1542 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1543 *  to clamp all lanes at the same points.                                    */
1544static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x);
1545/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1546 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1547 *  to clamp all lanes at the same points.                                    */
1548static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x);
1549/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1550 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1551 *  to clamp all lanes at the same points.                                    */
1552static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x);
1553/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1554 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1555 *  to clamp all lanes at the same points.                                    */
1556static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x);
1557/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1558 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1559 *  to clamp all lanes at the same points.                                    */
1560static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x);
1561/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1562 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1563 *  to clamp all lanes at the same points.                                    */
1564static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x);
1565/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1566 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1567 *  to clamp all lanes at the same points.                                    */
1568static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x);
1569/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1570 *  @discussion You can use a scalar value for edge0 and edge1 if you want
1571 *  to clamp all lanes at the same points.                                    */
1572static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x);
1573/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1574 *  @discussion Deprecated. Use simd_smoothstep(edge0, edge1, x) instead.     */
1575#define vector_smoothstep simd_smoothstep
1576
1577/*! @abstract Sum of elements in x.
1578 *  @discussion This computation may overflow; especial for 8-bit types you
1579 *  may need to convert to a wider type before reducing.                      */
1580static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x);
1581/*! @abstract Sum of elements in x.
1582 *  @discussion This computation may overflow; especial for 8-bit types you
1583 *  may need to convert to a wider type before reducing.                      */
1584static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x);
1585/*! @abstract Sum of elements in x.
1586 *  @discussion This computation may overflow; especial for 8-bit types you
1587 *  may need to convert to a wider type before reducing.                      */
1588static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x);
1589/*! @abstract Sum of elements in x.
1590 *  @discussion This computation may overflow; especial for 8-bit types you
1591 *  may need to convert to a wider type before reducing.                      */
1592static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x);
1593/*! @abstract Sum of elements in x.
1594 *  @discussion This computation may overflow; especial for 8-bit types you
1595 *  may need to convert to a wider type before reducing.                      */
1596static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x);
1597/*! @abstract Sum of elements in x.
1598 *  @discussion This computation may overflow; especial for 8-bit types you
1599 *  may need to convert to a wider type before reducing.                      */
1600static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x);
1601/*! @abstract Sum of elements in x.
1602 *  @discussion This computation may overflow; especial for 8-bit types you
1603 *  may need to convert to a wider type before reducing.                      */
1604static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x);
1605/*! @abstract Sum of elements in x.
1606 *  @discussion This computation may overflow; especial for 8-bit types you
1607 *  may need to convert to a wider type before reducing.                      */
1608static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x);
1609/*! @abstract Sum of elements in x.
1610 *  @discussion This computation may overflow; especial for 8-bit types you
1611 *  may need to convert to a wider type before reducing.                      */
1612static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x);
1613/*! @abstract Sum of elements in x.
1614 *  @discussion This computation may overflow; especial for 8-bit types you
1615 *  may need to convert to a wider type before reducing.                      */
1616static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x);
1617/*! @abstract Sum of elements in x.
1618 *  @discussion This computation may overflow; especial for 8-bit types you
1619 *  may need to convert to a wider type before reducing.                      */
1620static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x);
1621/*! @abstract Sum of elements in x.
1622 *  @discussion This computation may overflow; especial for 8-bit types you
1623 *  may need to convert to a wider type before reducing.                      */
1624static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x);
1625/*! @abstract Sum of elements in x.
1626 *  @discussion This computation may overflow; especial for 8-bit types you
1627 *  may need to convert to a wider type before reducing.                      */
1628static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x);
1629/*! @abstract Sum of elements in x.
1630 *  @discussion This computation may overflow; especial for 8-bit types you
1631 *  may need to convert to a wider type before reducing.                      */
1632static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x);
1633/*! @abstract Sum of elements in x.
1634 *  @discussion This computation may overflow; especial for 8-bit types you
1635 *  may need to convert to a wider type before reducing.                      */
1636static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x);
1637/*! @abstract Sum of elements in x.
1638 *  @discussion This computation may overflow; especial for 8-bit types you
1639 *  may need to convert to a wider type before reducing.                      */
1640static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x);
1641/*! @abstract Sum of elements in x.
1642 *  @discussion This computation may overflow; especial for 8-bit types you
1643 *  may need to convert to a wider type before reducing.                      */
1644static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x);
1645/*! @abstract Sum of elements in x.
1646 *  @discussion This computation may overflow; especial for 8-bit types you
1647 *  may need to convert to a wider type before reducing.                      */
1648static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x);
1649/*! @abstract Sum of elements in x.
1650 *  @discussion This computation may overflow; especial for 8-bit types you
1651 *  may need to convert to a wider type before reducing.                      */
1652static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x);
1653/*! @abstract Sum of elements in x.
1654 *  @discussion This computation may overflow; especial for 8-bit types you
1655 *  may need to convert to a wider type before reducing.                      */
1656static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x);
1657/*! @abstract Sum of elements in x.
1658 *  @discussion This computation may overflow; especial for 8-bit types you
1659 *  may need to convert to a wider type before reducing.                      */
1660static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x);
1661/*! @abstract Sum of elements in x.
1662 *  @discussion This computation may overflow; especial for 8-bit types you
1663 *  may need to convert to a wider type before reducing.                      */
1664static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x);
1665/*! @abstract Sum of elements in x.
1666 *  @discussion This computation may overflow; especial for 8-bit types you
1667 *  may need to convert to a wider type before reducing.                      */
1668static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x);
1669/*! @abstract Sum of elements in x.
1670 *  @discussion This computation may overflow; especial for 8-bit types you
1671 *  may need to convert to a wider type before reducing.                      */
1672static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x);
1673/*! @abstract Sum of elements in x.
1674 *  @discussion This computation may overflow; especial for 8-bit types you
1675 *  may need to convert to a wider type before reducing.                      */
1676static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x);
1677/*! @abstract Sum of elements in x.
1678 *  @discussion This computation may overflow; especial for 8-bit types you
1679 *  may need to convert to a wider type before reducing.                      */
1680static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x);
1681/*! @abstract Sum of elements in x.
1682 *  @discussion This computation may overflow; especial for 8-bit types you
1683 *  may need to convert to a wider type before reducing.                      */
1684static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half2 x);
1685/*! @abstract Sum of elements in x.
1686 *  @discussion This computation may overflow; especial for 8-bit types you
1687 *  may need to convert to a wider type before reducing.                      */
1688static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half3 x);
1689/*! @abstract Sum of elements in x.
1690 *  @discussion This computation may overflow; especial for 8-bit types you
1691 *  may need to convert to a wider type before reducing.                      */
1692static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half4 x);
1693/*! @abstract Sum of elements in x.
1694 *  @discussion This computation may overflow; especial for 8-bit types you
1695 *  may need to convert to a wider type before reducing.                      */
1696static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half8 x);
1697/*! @abstract Sum of elements in x.
1698 *  @discussion This computation may overflow; especial for 8-bit types you
1699 *  may need to convert to a wider type before reducing.                      */
1700static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half16 x);
1701/*! @abstract Sum of elements in x.
1702 *  @discussion This computation may overflow; especial for 8-bit types you
1703 *  may need to convert to a wider type before reducing.                      */
1704static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half32 x);
1705/*! @abstract Sum of elements in x.
1706 *  @discussion This computation may overflow; especial for 8-bit types you
1707 *  may need to convert to a wider type before reducing.                      */
1708static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x);
1709/*! @abstract Sum of elements in x.
1710 *  @discussion This computation may overflow; especial for 8-bit types you
1711 *  may need to convert to a wider type before reducing.                      */
1712static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x);
1713/*! @abstract Sum of elements in x.
1714 *  @discussion This computation may overflow; especial for 8-bit types you
1715 *  may need to convert to a wider type before reducing.                      */
1716static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x);
1717/*! @abstract Sum of elements in x.
1718 *  @discussion This computation may overflow; especial for 8-bit types you
1719 *  may need to convert to a wider type before reducing.                      */
1720static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x);
1721/*! @abstract Sum of elements in x.
1722 *  @discussion This computation may overflow; especial for 8-bit types you
1723 *  may need to convert to a wider type before reducing.                      */
1724static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x);
1725/*! @abstract Sum of elements in x.
1726 *  @discussion This computation may overflow; especial for 8-bit types you
1727 *  may need to convert to a wider type before reducing.                      */
1728static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x);
1729/*! @abstract Sum of elements in x.
1730 *  @discussion This computation may overflow; especial for 8-bit types you
1731 *  may need to convert to a wider type before reducing.                      */
1732static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x);
1733/*! @abstract Sum of elements in x.
1734 *  @discussion This computation may overflow; especial for 8-bit types you
1735 *  may need to convert to a wider type before reducing.                      */
1736static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x);
1737/*! @abstract Sum of elements in x.
1738 *  @discussion This computation may overflow; especial for 8-bit types you
1739 *  may need to convert to a wider type before reducing.                      */
1740static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x);
1741/*! @abstract Sum of elements in x.
1742 *  @discussion This computation may overflow; especial for 8-bit types you
1743 *  may need to convert to a wider type before reducing.                      */
1744static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x);
1745/*! @abstract Sum of elements in x.
1746 *  @discussion This computation may overflow; especial for 8-bit types you
1747 *  may need to convert to a wider type before reducing.                      */
1748static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x);
1749/*! @abstract Sum of elements in x.
1750 *  @discussion This computation may overflow; especial for 8-bit types you
1751 *  may need to convert to a wider type before reducing.                      */
1752static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x);
1753/*! @abstract Sum of elements in x.
1754 *  @discussion This computation may overflow; especial for 8-bit types you
1755 *  may need to convert to a wider type before reducing.                      */
1756static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x);
1757/*! @abstract Sum of elements in x.
1758 *  @discussion This computation may overflow; especial for 8-bit types you
1759 *  may need to convert to a wider type before reducing.                      */
1760static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x);
1761/*! @abstract Sum of elements in x.
1762 *  @discussion This computation may overflow; especial for 8-bit types you
1763 *  may need to convert to a wider type before reducing.                      */
1764static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x);
1765/*! @abstract Sum of elements in x.
1766 *  @discussion This computation may overflow; especial for 8-bit types you
1767 *  may need to convert to a wider type before reducing.                      */
1768static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x);
1769/*! @abstract Sum of elements in x.
1770 *  @discussion This computation may overflow; especial for 8-bit types you
1771 *  may need to convert to a wider type before reducing.                      */
1772static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x);
1773/*! @abstract Sum of elements in x.
1774 *  @discussion This computation may overflow; especial for 8-bit types you
1775 *  may need to convert to a wider type before reducing.                      */
1776static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x);
1777/*! @abstract Sum of elements in x.
1778 *  @discussion This computation may overflow; especial for 8-bit types you
1779 *  may need to convert to a wider type before reducing.                      */
1780static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x);
1781/*! @abstract Sum of elements in x.
1782 *  @discussion This computation may overflow; especial for 8-bit types you
1783 *  may need to convert to a wider type before reducing.                      */
1784static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x);
1785/*! @abstract Sum of elements in x.
1786 *  @discussion This computation may overflow; especial for 8-bit types you
1787 *  may need to convert to a wider type before reducing.                      */
1788static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x);
1789/*! @abstract Sum of elements in x.
1790 *  @discussion This computation may overflow; especial for 8-bit types you
1791 *  may need to convert to a wider type before reducing.                      */
1792static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x);
1793/*! @abstract Sum of elements in x.
1794 *  @discussion This computation may overflow; especial for 8-bit types you
1795 *  may need to convert to a wider type before reducing.                      */
1796static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x);
1797/*! @abstract Sum of elements in x.
1798 *  @discussion This computation may overflow; especial for 8-bit types you
1799 *  may need to convert to a wider type before reducing.                      */
1800static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x);
1801/*! @abstract Sum of elements in x.
1802 *  @discussion This computation may overflow; especial for 8-bit types you
1803 *  may need to convert to a wider type before reducing.                      */
1804static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x);
1805/*! @abstract Sum of elements in x.
1806 *  @discussion This computation may overflow; especial for 8-bit types you
1807 *  may need to convert to a wider type before reducing.                      */
1808static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x);
1809/*! @abstract Sum of elements in x.
1810 *  @discussion This computation may overflow; especial for 8-bit types you
1811 *  may need to convert to a wider type before reducing.                      */
1812static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x);
1813/*! @abstract Sum of elements in x.
1814 *  @discussion Deprecated. Use simd_add(x) instead.                          */
1815#define vector_reduce_add simd_reduce_add
1816  
1817/*! @abstract Minimum of elements in x.                                       */
1818static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x);
1819/*! @abstract Minimum of elements in x.                                       */
1820static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x);
1821/*! @abstract Minimum of elements in x.                                       */
1822static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x);
1823/*! @abstract Minimum of elements in x.                                       */
1824static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x);
1825/*! @abstract Minimum of elements in x.                                       */
1826static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x);
1827/*! @abstract Minimum of elements in x.                                       */
1828static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x);
1829/*! @abstract Minimum of elements in x.                                       */
1830static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x);
1831/*! @abstract Minimum of elements in x.                                       */
1832static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x);
1833/*! @abstract Minimum of elements in x.                                       */
1834static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x);
1835/*! @abstract Minimum of elements in x.                                       */
1836static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x);
1837/*! @abstract Minimum of elements in x.                                       */
1838static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x);
1839/*! @abstract Minimum of elements in x.                                       */
1840static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x);
1841/*! @abstract Minimum of elements in x.                                       */
1842static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x);
1843/*! @abstract Minimum of elements in x.                                       */
1844static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x);
1845/*! @abstract Minimum of elements in x.                                       */
1846static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x);
1847/*! @abstract Minimum of elements in x.                                       */
1848static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x);
1849/*! @abstract Minimum of elements in x.                                       */
1850static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x);
1851/*! @abstract Minimum of elements in x.                                       */
1852static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x);
1853/*! @abstract Minimum of elements in x.                                       */
1854static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x);
1855/*! @abstract Minimum of elements in x.                                       */
1856static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x);
1857/*! @abstract Minimum of elements in x.                                       */
1858static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x);
1859/*! @abstract Minimum of elements in x.                                       */
1860static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x);
1861/*! @abstract Minimum of elements in x.                                       */
1862static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x);
1863/*! @abstract Minimum of elements in x.                                       */
1864static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x);
1865/*! @abstract Minimum of elements in x.                                       */
1866static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x);
1867/*! @abstract Minimum of elements in x.                                       */
1868static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x);
1869/*! @abstract Minimum of elements in x.                                       */
1870static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half2 x);
1871/*! @abstract Minimum of elements in x.                                       */
1872static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half3 x);
1873/*! @abstract Minimum of elements in x.                                       */
1874static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half4 x);
1875/*! @abstract Minimum of elements in x.                                       */
1876static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half8 x);
1877/*! @abstract Minimum of elements in x.                                       */
1878static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half16 x);
1879/*! @abstract Minimum of elements in x.                                       */
1880static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half32 x);
1881/*! @abstract Minimum of elements in x.                                       */
1882static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x);
1883/*! @abstract Minimum of elements in x.                                       */
1884static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x);
1885/*! @abstract Minimum of elements in x.                                       */
1886static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x);
1887/*! @abstract Minimum of elements in x.                                       */
1888static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x);
1889/*! @abstract Minimum of elements in x.                                       */
1890static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x);
1891/*! @abstract Minimum of elements in x.                                       */
1892static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x);
1893/*! @abstract Minimum of elements in x.                                       */
1894static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x);
1895/*! @abstract Minimum of elements in x.                                       */
1896static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x);
1897/*! @abstract Minimum of elements in x.                                       */
1898static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x);
1899/*! @abstract Minimum of elements in x.                                       */
1900static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x);
1901/*! @abstract Minimum of elements in x.                                       */
1902static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x);
1903/*! @abstract Minimum of elements in x.                                       */
1904static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x);
1905/*! @abstract Minimum of elements in x.                                       */
1906static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x);
1907/*! @abstract Minimum of elements in x.                                       */
1908static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x);
1909/*! @abstract Minimum of elements in x.                                       */
1910static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x);
1911/*! @abstract Minimum of elements in x.                                       */
1912static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x);
1913/*! @abstract Minimum of elements in x.                                       */
1914static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x);
1915/*! @abstract Minimum of elements in x.                                       */
1916static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x);
1917/*! @abstract Minimum of elements in x.                                       */
1918static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x);
1919/*! @abstract Minimum of elements in x.                                       */
1920static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x);
1921/*! @abstract Minimum of elements in x.                                       */
1922static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x);
1923/*! @abstract Minimum of elements in x.                                       */
1924static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x);
1925/*! @abstract Minimum of elements in x.                                       */
1926static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x);
1927/*! @abstract Minimum of elements in x.                                       */
1928static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x);
1929/*! @abstract Minimum of elements in x.                                       */
1930static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x);
1931/*! @abstract Minimum of elements in x.                                       */
1932static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x);
1933/*! @abstract Minimum of elements in x.                                       */
1934static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x);
1935/*! @abstract Minimum of elements in x.
1936 *  @discussion Deprecated. Use simd_min(x) instead.                          */
1937#define vector_reduce_min simd_reduce_min
1938  
1939/*! @abstract Maximum of elements in x.                                       */
1940static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x);
1941/*! @abstract Maximum of elements in x.                                       */
1942static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x);
1943/*! @abstract Maximum of elements in x.                                       */
1944static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x);
1945/*! @abstract Maximum of elements in x.                                       */
1946static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x);
1947/*! @abstract Maximum of elements in x.                                       */
1948static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x);
1949/*! @abstract Maximum of elements in x.                                       */
1950static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x);
1951/*! @abstract Maximum of elements in x.                                       */
1952static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x);
1953/*! @abstract Maximum of elements in x.                                       */
1954static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x);
1955/*! @abstract Maximum of elements in x.                                       */
1956static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x);
1957/*! @abstract Maximum of elements in x.                                       */
1958static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x);
1959/*! @abstract Maximum of elements in x.                                       */
1960static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x);
1961/*! @abstract Maximum of elements in x.                                       */
1962static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x);
1963/*! @abstract Maximum of elements in x.                                       */
1964static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x);
1965/*! @abstract Maximum of elements in x.                                       */
1966static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x);
1967/*! @abstract Maximum of elements in x.                                       */
1968static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x);
1969/*! @abstract Maximum of elements in x.                                       */
1970static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x);
1971/*! @abstract Maximum of elements in x.                                       */
1972static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x);
1973/*! @abstract Maximum of elements in x.                                       */
1974static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x);
1975/*! @abstract Maximum of elements in x.                                       */
1976static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x);
1977/*! @abstract Maximum of elements in x.                                       */
1978static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x);
1979/*! @abstract Maximum of elements in x.                                       */
1980static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x);
1981/*! @abstract Maximum of elements in x.                                       */
1982static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x);
1983/*! @abstract Maximum of elements in x.                                       */
1984static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x);
1985/*! @abstract Maximum of elements in x.                                       */
1986static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x);
1987/*! @abstract Maximum of elements in x.                                       */
1988static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x);
1989/*! @abstract Maximum of elements in x.                                       */
1990static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x);
1991/*! @abstract Maximum of elements in x.                                       */
1992static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half2 x);
1993/*! @abstract Maximum of elements in x.                                       */
1994static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half3 x);
1995/*! @abstract Maximum of elements in x.                                       */
1996static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half4 x);
1997/*! @abstract Maximum of elements in x.                                       */
1998static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half8 x);
1999/*! @abstract Maximum of elements in x.                                       */
2000static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half16 x);
2001/*! @abstract Maximum of elements in x.                                       */
2002static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half32 x);
2003/*! @abstract Maximum of elements in x.                                       */
2004static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x);
2005/*! @abstract Maximum of elements in x.                                       */
2006static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x);
2007/*! @abstract Maximum of elements in x.                                       */
2008static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x);
2009/*! @abstract Maximum of elements in x.                                       */
2010static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x);
2011/*! @abstract Maximum of elements in x.                                       */
2012static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x);
2013/*! @abstract Maximum of elements in x.                                       */
2014static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x);
2015/*! @abstract Maximum of elements in x.                                       */
2016static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x);
2017/*! @abstract Maximum of elements in x.                                       */
2018static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x);
2019/*! @abstract Maximum of elements in x.                                       */
2020static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x);
2021/*! @abstract Maximum of elements in x.                                       */
2022static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x);
2023/*! @abstract Maximum of elements in x.                                       */
2024static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x);
2025/*! @abstract Maximum of elements in x.                                       */
2026static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x);
2027/*! @abstract Maximum of elements in x.                                       */
2028static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x);
2029/*! @abstract Maximum of elements in x.                                       */
2030static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x);
2031/*! @abstract Maximum of elements in x.                                       */
2032static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x);
2033/*! @abstract Maximum of elements in x.                                       */
2034static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x);
2035/*! @abstract Maximum of elements in x.                                       */
2036static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x);
2037/*! @abstract Maximum of elements in x.                                       */
2038static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x);
2039/*! @abstract Maximum of elements in x.                                       */
2040static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x);
2041/*! @abstract Maximum of elements in x.                                       */
2042static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x);
2043/*! @abstract Maximum of elements in x.                                       */
2044static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x);
2045/*! @abstract Maximum of elements in x.                                       */
2046static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x);
2047/*! @abstract Maximum of elements in x.                                       */
2048static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x);
2049/*! @abstract Maximum of elements in x.                                       */
2050static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x);
2051/*! @abstract Maximum of elements in x.                                       */
2052static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x);
2053/*! @abstract Maximum of elements in x.                                       */
2054static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x);
2055/*! @abstract Maximum of elements in x.                                       */
2056static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x);
2057/*! @abstract Maximum of elements in x.
2058 *  @discussion Deprecated. Use simd_max(x) instead.                          */
2059#define vector_reduce_max simd_reduce_max
2060  
2061/*! @abstract True if and only if each lane of x is equal to the
2062 *  corresponding lane of y.                                                  */
2063static inline SIMD_CFUNC simd_bool simd_equal(simd_char2 x, simd_char2 y) {
2064  return simd_all(x == y);
2065}
2066/*! @abstract True if and only if each lane of x is equal to the
2067 *  corresponding lane of y.                                                  */
2068static inline SIMD_CFUNC simd_bool simd_equal(simd_char3 x, simd_char3 y) {
2069  return simd_all(x == y);
2070}
2071/*! @abstract True if and only if each lane of x is equal to the
2072 *  corresponding lane of y.                                                  */
2073static inline SIMD_CFUNC simd_bool simd_equal(simd_char4 x, simd_char4 y) {
2074  return simd_all(x == y);
2075}
2076/*! @abstract True if and only if each lane of x is equal to the
2077 *  corresponding lane of y.                                                  */
2078static inline SIMD_CFUNC simd_bool simd_equal(simd_char8 x, simd_char8 y) {
2079  return simd_all(x == y);
2080}
2081/*! @abstract True if and only if each lane of x is equal to the
2082 *  corresponding lane of y.                                                  */
2083static inline SIMD_CFUNC simd_bool simd_equal(simd_char16 x, simd_char16 y) {
2084  return simd_all(x == y);
2085}
2086/*! @abstract True if and only if each lane of x is equal to the
2087 *  corresponding lane of y.                                                  */
2088static inline SIMD_CFUNC simd_bool simd_equal(simd_char32 x, simd_char32 y) {
2089  return simd_all(x == y);
2090}
2091/*! @abstract True if and only if each lane of x is equal to the
2092 *  corresponding lane of y.                                                  */
2093static inline SIMD_CFUNC simd_bool simd_equal(simd_char64 x, simd_char64 y) {
2094  return simd_all(x == y);
2095}
2096/*! @abstract True if and only if each lane of x is equal to the
2097 *  corresponding lane of y.                                                  */
2098static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar2 x, simd_uchar2 y) {
2099  return simd_all(x == y);
2100}
2101/*! @abstract True if and only if each lane of x is equal to the
2102 *  corresponding lane of y.                                                  */
2103static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar3 x, simd_uchar3 y) {
2104  return simd_all(x == y);
2105}
2106/*! @abstract True if and only if each lane of x is equal to the
2107 *  corresponding lane of y.                                                  */
2108static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar4 x, simd_uchar4 y) {
2109  return simd_all(x == y);
2110}
2111/*! @abstract True if and only if each lane of x is equal to the
2112 *  corresponding lane of y.                                                  */
2113static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar8 x, simd_uchar8 y) {
2114  return simd_all(x == y);
2115}
2116/*! @abstract True if and only if each lane of x is equal to the
2117 *  corresponding lane of y.                                                  */
2118static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar16 x, simd_uchar16 y) {
2119  return simd_all(x == y);
2120}
2121/*! @abstract True if and only if each lane of x is equal to the
2122 *  corresponding lane of y.                                                  */
2123static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar32 x, simd_uchar32 y) {
2124  return simd_all(x == y);
2125}
2126/*! @abstract True if and only if each lane of x is equal to the
2127 *  corresponding lane of y.                                                  */
2128static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar64 x, simd_uchar64 y) {
2129  return simd_all(x == y);
2130}
2131/*! @abstract True if and only if each lane of x is equal to the
2132 *  corresponding lane of y.                                                  */
2133static inline SIMD_CFUNC simd_bool simd_equal(simd_short2 x, simd_short2 y) {
2134  return simd_all(x == y);
2135}
2136/*! @abstract True if and only if each lane of x is equal to the
2137 *  corresponding lane of y.                                                  */
2138static inline SIMD_CFUNC simd_bool simd_equal(simd_short3 x, simd_short3 y) {
2139  return simd_all(x == y);
2140}
2141/*! @abstract True if and only if each lane of x is equal to the
2142 *  corresponding lane of y.                                                  */
2143static inline SIMD_CFUNC simd_bool simd_equal(simd_short4 x, simd_short4 y) {
2144  return simd_all(x == y);
2145}
2146/*! @abstract True if and only if each lane of x is equal to the
2147 *  corresponding lane of y.                                                  */
2148static inline SIMD_CFUNC simd_bool simd_equal(simd_short8 x, simd_short8 y) {
2149  return simd_all(x == y);
2150}
2151/*! @abstract True if and only if each lane of x is equal to the
2152 *  corresponding lane of y.                                                  */
2153static inline SIMD_CFUNC simd_bool simd_equal(simd_short16 x, simd_short16 y) {
2154  return simd_all(x == y);
2155}
2156/*! @abstract True if and only if each lane of x is equal to the
2157 *  corresponding lane of y.                                                  */
2158static inline SIMD_CFUNC simd_bool simd_equal(simd_short32 x, simd_short32 y) {
2159  return simd_all(x == y);
2160}
2161/*! @abstract True if and only if each lane of x is equal to the
2162 *  corresponding lane of y.                                                  */
2163static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort2 x, simd_ushort2 y) {
2164  return simd_all(x == y);
2165}
2166/*! @abstract True if and only if each lane of x is equal to the
2167 *  corresponding lane of y.                                                  */
2168static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort3 x, simd_ushort3 y) {
2169  return simd_all(x == y);
2170}
2171/*! @abstract True if and only if each lane of x is equal to the
2172 *  corresponding lane of y.                                                  */
2173static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort4 x, simd_ushort4 y) {
2174  return simd_all(x == y);
2175}
2176/*! @abstract True if and only if each lane of x is equal to the
2177 *  corresponding lane of y.                                                  */
2178static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort8 x, simd_ushort8 y) {
2179  return simd_all(x == y);
2180}
2181/*! @abstract True if and only if each lane of x is equal to the
2182 *  corresponding lane of y.                                                  */
2183static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort16 x, simd_ushort16 y) {
2184  return simd_all(x == y);
2185}
2186/*! @abstract True if and only if each lane of x is equal to the
2187 *  corresponding lane of y.                                                  */
2188static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort32 x, simd_ushort32 y) {
2189  return simd_all(x == y);
2190}
2191/*! @abstract True if and only if each lane of x is equal to the
2192 *  corresponding lane of y.                                                  */
2193static inline SIMD_CFUNC simd_bool simd_equal(simd_half2 x, simd_half2 y) {
2194  return simd_all(x == y);
2195}
2196/*! @abstract True if and only if each lane of x is equal to the
2197 *  corresponding lane of y.                                                  */
2198static inline SIMD_CFUNC simd_bool simd_equal(simd_half3 x, simd_half3 y) {
2199  return simd_all(x == y);
2200}
2201/*! @abstract True if and only if each lane of x is equal to the
2202 *  corresponding lane of y.                                                  */
2203static inline SIMD_CFUNC simd_bool simd_equal(simd_half4 x, simd_half4 y) {
2204  return simd_all(x == y);
2205}
2206/*! @abstract True if and only if each lane of x is equal to the
2207 *  corresponding lane of y.                                                  */
2208static inline SIMD_CFUNC simd_bool simd_equal(simd_half8 x, simd_half8 y) {
2209  return simd_all(x == y);
2210}
2211/*! @abstract True if and only if each lane of x is equal to the
2212 *  corresponding lane of y.                                                  */
2213static inline SIMD_CFUNC simd_bool simd_equal(simd_half16 x, simd_half16 y) {
2214  return simd_all(x == y);
2215}
2216/*! @abstract True if and only if each lane of x is equal to the
2217 *  corresponding lane of y.                                                  */
2218static inline SIMD_CFUNC simd_bool simd_equal(simd_half32 x, simd_half32 y) {
2219  return simd_all(x == y);
2220}
2221/*! @abstract True if and only if each lane of x is equal to the
2222 *  corresponding lane of y.                                                  */
2223static inline SIMD_CFUNC simd_bool simd_equal(simd_int2 x, simd_int2 y) {
2224  return simd_all(x == y);
2225}
2226/*! @abstract True if and only if each lane of x is equal to the
2227 *  corresponding lane of y.                                                  */
2228static inline SIMD_CFUNC simd_bool simd_equal(simd_int3 x, simd_int3 y) {
2229  return simd_all(x == y);
2230}
2231/*! @abstract True if and only if each lane of x is equal to the
2232 *  corresponding lane of y.                                                  */
2233static inline SIMD_CFUNC simd_bool simd_equal(simd_int4 x, simd_int4 y) {
2234  return simd_all(x == y);
2235}
2236/*! @abstract True if and only if each lane of x is equal to the
2237 *  corresponding lane of y.                                                  */
2238static inline SIMD_CFUNC simd_bool simd_equal(simd_int8 x, simd_int8 y) {
2239  return simd_all(x == y);
2240}
2241/*! @abstract True if and only if each lane of x is equal to the
2242 *  corresponding lane of y.                                                  */
2243static inline SIMD_CFUNC simd_bool simd_equal(simd_int16 x, simd_int16 y) {
2244  return simd_all(x == y);
2245}
2246/*! @abstract True if and only if each lane of x is equal to the
2247 *  corresponding lane of y.                                                  */
2248static inline SIMD_CFUNC simd_bool simd_equal(simd_uint2 x, simd_uint2 y) {
2249  return simd_all(x == y);
2250}
2251/*! @abstract True if and only if each lane of x is equal to the
2252 *  corresponding lane of y.                                                  */
2253static inline SIMD_CFUNC simd_bool simd_equal(simd_uint3 x, simd_uint3 y) {
2254  return simd_all(x == y);
2255}
2256/*! @abstract True if and only if each lane of x is equal to the
2257 *  corresponding lane of y.                                                  */
2258static inline SIMD_CFUNC simd_bool simd_equal(simd_uint4 x, simd_uint4 y) {
2259  return simd_all(x == y);
2260}
2261/*! @abstract True if and only if each lane of x is equal to the
2262 *  corresponding lane of y.                                                  */
2263static inline SIMD_CFUNC simd_bool simd_equal(simd_uint8 x, simd_uint8 y) {
2264  return simd_all(x == y);
2265}
2266/*! @abstract True if and only if each lane of x is equal to the
2267 *  corresponding lane of y.                                                  */
2268static inline SIMD_CFUNC simd_bool simd_equal(simd_uint16 x, simd_uint16 y) {
2269  return simd_all(x == y);
2270}
2271/*! @abstract True if and only if each lane of x is equal to the
2272 *  corresponding lane of y.                                                  */
2273static inline SIMD_CFUNC simd_bool simd_equal(simd_float2 x, simd_float2 y) {
2274  return simd_all(x == y);
2275}
2276/*! @abstract True if and only if each lane of x is equal to the
2277 *  corresponding lane of y.                                                  */
2278static inline SIMD_CFUNC simd_bool simd_equal(simd_float3 x, simd_float3 y) {
2279  return simd_all(x == y);
2280}
2281/*! @abstract True if and only if each lane of x is equal to the
2282 *  corresponding lane of y.                                                  */
2283static inline SIMD_CFUNC simd_bool simd_equal(simd_float4 x, simd_float4 y) {
2284  return simd_all(x == y);
2285}
2286/*! @abstract True if and only if each lane of x is equal to the
2287 *  corresponding lane of y.                                                  */
2288static inline SIMD_CFUNC simd_bool simd_equal(simd_float8 x, simd_float8 y) {
2289  return simd_all(x == y);
2290}
2291/*! @abstract True if and only if each lane of x is equal to the
2292 *  corresponding lane of y.                                                  */
2293static inline SIMD_CFUNC simd_bool simd_equal(simd_float16 x, simd_float16 y) {
2294  return simd_all(x == y);
2295}
2296/*! @abstract True if and only if each lane of x is equal to the
2297 *  corresponding lane of y.                                                  */
2298static inline SIMD_CFUNC simd_bool simd_equal(simd_long2 x, simd_long2 y) {
2299  return simd_all(x == y);
2300}
2301/*! @abstract True if and only if each lane of x is equal to the
2302 *  corresponding lane of y.                                                  */
2303static inline SIMD_CFUNC simd_bool simd_equal(simd_long3 x, simd_long3 y) {
2304  return simd_all(x == y);
2305}
2306/*! @abstract True if and only if each lane of x is equal to the
2307 *  corresponding lane of y.                                                  */
2308static inline SIMD_CFUNC simd_bool simd_equal(simd_long4 x, simd_long4 y) {
2309  return simd_all(x == y);
2310}
2311/*! @abstract True if and only if each lane of x is equal to the
2312 *  corresponding lane of y.                                                  */
2313static inline SIMD_CFUNC simd_bool simd_equal(simd_long8 x, simd_long8 y) {
2314  return simd_all(x == y);
2315}
2316/*! @abstract True if and only if each lane of x is equal to the
2317 *  corresponding lane of y.                                                  */
2318static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong2 x, simd_ulong2 y) {
2319  return simd_all(x == y);
2320}
2321/*! @abstract True if and only if each lane of x is equal to the
2322 *  corresponding lane of y.                                                  */
2323static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong3 x, simd_ulong3 y) {
2324  return simd_all(x == y);
2325}
2326/*! @abstract True if and only if each lane of x is equal to the
2327 *  corresponding lane of y.                                                  */
2328static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong4 x, simd_ulong4 y) {
2329  return simd_all(x == y);
2330}
2331/*! @abstract True if and only if each lane of x is equal to the
2332 *  corresponding lane of y.                                                  */
2333static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong8 x, simd_ulong8 y) {
2334  return simd_all(x == y);
2335}
2336/*! @abstract True if and only if each lane of x is equal to the
2337 *  corresponding lane of y.                                                  */
2338static inline SIMD_CFUNC simd_bool simd_equal(simd_double2 x, simd_double2 y) {
2339  return simd_all(x == y);
2340}
2341/*! @abstract True if and only if each lane of x is equal to the
2342 *  corresponding lane of y.                                                  */
2343static inline SIMD_CFUNC simd_bool simd_equal(simd_double3 x, simd_double3 y) {
2344  return simd_all(x == y);
2345}
2346/*! @abstract True if and only if each lane of x is equal to the
2347 *  corresponding lane of y.                                                  */
2348static inline SIMD_CFUNC simd_bool simd_equal(simd_double4 x, simd_double4 y) {
2349  return simd_all(x == y);
2350}
2351/*! @abstract True if and only if each lane of x is equal to the
2352 *  corresponding lane of y.                                                  */
2353static inline SIMD_CFUNC simd_bool simd_equal(simd_double8 x, simd_double8 y) {
2354  return simd_all(x == y);
2355}
2356  
2357#ifdef __cplusplus
2358} /* extern "C" */
2359
2360namespace simd {
2361  /*! @abstract The lanewise absolute value of x.                             */
2362  template <typename typeN> static SIMD_CPPFUNC typeN abs(const typeN x) { return ::simd_abs(x); }
2363  /*! @abstract The lanewise maximum of x and y.                              */
2364  template <typename typeN> static SIMD_CPPFUNC typeN max(const typeN x, const typeN y) { return ::simd_max(x,y); }
2365  /*! @abstract The lanewise minimum of x and y.                              */
2366  template <typename typeN> static SIMD_CPPFUNC typeN min(const typeN x, const typeN y) { return ::simd_min(x,y); }
2367  /*! @abstract x clamped to the interval [min, max].                         */
2368  template <typename typeN> static SIMD_CPPFUNC typeN clamp(const typeN x, const typeN min, const typeN max) { return ::simd_clamp(x,min,max); }
2369  /*! @abstract -1 if x < 0, +1 if x > 0, and 0 otherwise.                    */
2370  template <typename fptypeN> static SIMD_CPPFUNC fptypeN sign(const fptypeN x) { return ::simd_sign(x); }
2371  /*! @abstract Linearly interpolates between x and y, taking the value x when t=0 and y when t=1 */
2372  template <typename fptypeN> static SIMD_CPPFUNC fptypeN mix(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); }
2373  template <typename fptypeN> static SIMD_CPPFUNC fptypeN lerp(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); }
2374  /*! @abstract An approximation to 1/x.                                      */
2375  template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return simd_recip(x); }
2376  /*! @abstract An approximation to 1/sqrt(x).                                */
2377  template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return simd_rsqrt(x); }
2378  /*! @abstract The "fracional part" of x, in the range [0,1).                */
2379  template <typename fptypeN> static SIMD_CPPFUNC fptypeN fract(const fptypeN x) { return ::simd_fract(x); }
2380  /*! @abstract 0 if x < edge, 1 otherwise.                                   */
2381  template <typename fptypeN> static SIMD_CPPFUNC fptypeN step(const fptypeN edge, const fptypeN x) { return ::simd_step(edge,x); }
2382  /*! @abstract smoothly interpolates from 0 at edge0 to 1 at edge1.          */
2383  template <typename fptypeN> static SIMD_CPPFUNC fptypeN smoothstep(const fptypeN edge0, const fptypeN edge1, const fptypeN x) { return ::simd_smoothstep(edge0,edge1,x); }
2384  /*! @abstract True if and only if each lane of x is equal to the
2385   *  corresponding lane of y.
2386   *
2387   *  @discussion This isn't operator== because that's already defined by
2388   *  the compiler to return a lane mask.                                     */
2389  template <typename fptypeN> static SIMD_CPPFUNC simd_bool equal(const fptypeN x, const fptypeN y) { return ::simd_equal(x, y); }
2390#if __cpp_decltype_auto
2391  /*  If you are targeting an earlier version of the C++ standard that lacks
2392   decltype_auto support, you may use the C-style simd_reduce_* functions
2393   instead.                                                                   */
2394  /*! @abstract The sum of the elements in x. May overflow.                   */
2395  template <typename typeN> static SIMD_CPPFUNC auto reduce_add(typeN x) { return ::simd_reduce_add(x); }
2396  /*! @abstract The least element in x.                                       */
2397  template <typename typeN> static SIMD_CPPFUNC auto reduce_min(typeN x) { return ::simd_reduce_min(x); }
2398  /*! @abstract The greatest element in x.                                    */
2399  template <typename typeN> static SIMD_CPPFUNC auto reduce_max(typeN x) { return ::simd_reduce_max(x); }
2400#endif
2401  namespace precise {
2402    /*! @abstract An approximation to 1/x.                                      */
2403    template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_precise_recip(x); }
2404    /*! @abstract An approximation to 1/sqrt(x).                                */
2405    template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_precise_rsqrt(x); }
2406  }
2407  namespace fast {
2408    /*! @abstract An approximation to 1/x.                                      */
2409    template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_fast_recip(x); }
2410    /*! @abstract An approximation to 1/sqrt(x).                                */
2411    template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_fast_rsqrt(x); }
2412  }
2413}
2414
2415extern "C" {
2416#endif /* __cplusplus */
2417
2418#pragma mark - Implementation
2419
2420static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x) {
2421  return simd_make_char2(simd_abs(simd_make_char8_undef(x)));
2422}
2423
2424static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x) {
2425  return simd_make_char3(simd_abs(simd_make_char8_undef(x)));
2426}
2427
2428static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x) {
2429  return simd_make_char4(simd_abs(simd_make_char8_undef(x)));
2430}
2431
2432static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x) {
2433#if defined __arm__ || defined __arm64__ || defined __aarch64__
2434  return vabs_s8(x);
2435#else
2436  return simd_make_char8(simd_abs(simd_make_char16_undef(x)));
2437#endif
2438}
2439
2440static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x) {
2441#if defined __arm__ || defined __arm64__ || defined __aarch64__
2442  return vabsq_s8(x);
2443#elif defined __SSE4_1__
2444  return (simd_char16) _mm_abs_epi8((__m128i)x);
2445#else
2446  simd_char16 mask = x >> 7; return (x ^ mask) - mask;
2447#endif
2448}
2449
2450static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x) {
2451#if defined __AVX2__
2452  return _mm256_abs_epi8(x);
2453#else
2454  return simd_make_char32(simd_abs(x.lo), simd_abs(x.hi));
2455#endif
2456}
2457
2458static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x) {
2459#if defined __AVX512BW__
2460  return _mm512_abs_epi8(x);
2461#else
2462  return simd_make_char64(simd_abs(x.lo), simd_abs(x.hi));
2463#endif
2464}
2465
2466static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x) {
2467  return simd_make_short2(simd_abs(simd_make_short4_undef(x)));
2468}
2469
2470static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x) {
2471  return simd_make_short3(simd_abs(simd_make_short4_undef(x)));
2472}
2473
2474static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x) {
2475#if defined __arm__ || defined __arm64__ || defined __aarch64__
2476  return vabs_s16(x);
2477#else
2478  return simd_make_short4(simd_abs(simd_make_short8_undef(x)));
2479#endif
2480}
2481
2482static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x) {
2483#if defined __arm__ || defined __arm64__ || defined __aarch64__
2484  return vabsq_s16(x);
2485#elif defined __SSE4_1__
2486  return (simd_short8) _mm_abs_epi16((__m128i)x);
2487#else
2488  simd_short8 mask = x >> 15; return (x ^ mask) - mask;
2489#endif
2490}
2491
2492static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x) {
2493#if defined __AVX2__
2494  return _mm256_abs_epi16(x);
2495#else
2496  return simd_make_short16(simd_abs(x.lo), simd_abs(x.hi));
2497#endif
2498}
2499
2500static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x) {
2501#if defined __AVX512BW__
2502  return _mm512_abs_epi16(x);
2503#else
2504  return simd_make_short32(simd_abs(x.lo), simd_abs(x.hi));
2505#endif
2506}
2507
2508static inline SIMD_CFUNC simd_half2 simd_abs(simd_half2 x) {
2509  return __tg_fabs(x);
2510}
2511
2512static inline SIMD_CFUNC simd_half3 simd_abs(simd_half3 x) {
2513  return __tg_fabs(x);
2514}
2515
2516static inline SIMD_CFUNC simd_half4 simd_abs(simd_half4 x) {
2517  return __tg_fabs(x);
2518}
2519
2520static inline SIMD_CFUNC simd_half8 simd_abs(simd_half8 x) {
2521  return __tg_fabs(x);
2522}
2523
2524static inline SIMD_CFUNC simd_half16 simd_abs(simd_half16 x) {
2525  return __tg_fabs(x);
2526}
2527
2528static inline SIMD_CFUNC simd_half32 simd_abs(simd_half32 x) {
2529  return __tg_fabs(x);
2530}
2531
2532static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x) {
2533#if defined __arm__ || defined __arm64__ || defined __aarch64__
2534  return vabs_s32(x);
2535#else
2536  return simd_make_int2(simd_abs(simd_make_int4_undef(x)));
2537#endif
2538}
2539
2540static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x) {
2541  return simd_make_int3(simd_abs(simd_make_int4_undef(x)));
2542}
2543
2544static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x) {
2545#if defined __arm__ || defined __arm64__ || defined __aarch64__
2546  return vabsq_s32(x);
2547#elif defined __SSE4_1__
2548  return (simd_int4) _mm_abs_epi32((__m128i)x);
2549#else
2550  simd_int4 mask = x >> 31; return (x ^ mask) - mask;
2551#endif
2552}
2553
2554static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x) {
2555#if defined __AVX2__
2556  return _mm256_abs_epi32(x);
2557#else
2558  return simd_make_int8(simd_abs(x.lo), simd_abs(x.hi));
2559#endif
2560}
2561
2562static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x) {
2563#if defined __AVX512F__
2564  return _mm512_abs_epi32(x);
2565#else
2566  return simd_make_int16(simd_abs(x.lo), simd_abs(x.hi));
2567#endif
2568}
2569
2570static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x) {
2571  return __tg_fabs(x);
2572}
2573
2574static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x) {
2575  return __tg_fabs(x);
2576}
2577
2578static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x) {
2579  return __tg_fabs(x);
2580}
2581
2582static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x) {
2583  return __tg_fabs(x);
2584}
2585
2586static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x) {
2587  return __tg_fabs(x);
2588}
2589
2590static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x) {
2591#if defined __arm64__ || defined __aarch64__
2592  return vabsq_s64(x);
2593#elif defined __AVX512VL__
2594  return (simd_long2) _mm_abs_epi64((__m128i)x);
2595#else
2596  simd_long2 mask = x >> 63; return (x ^ mask) - mask;
2597#endif
2598}
2599
2600static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x) {
2601  return simd_make_long3(simd_abs(simd_make_long4_undef(x)));
2602}
2603
2604static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x) {
2605#if defined __AVX512VL__
2606  return _mm256_abs_epi64(x);
2607#else
2608  return simd_make_long4(simd_abs(x.lo), simd_abs(x.hi));
2609#endif
2610}
2611
2612static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x) {
2613#if defined __AVX512F__
2614  return _mm512_abs_epi64(x);
2615#else
2616  return simd_make_long8(simd_abs(x.lo), simd_abs(x.hi));
2617#endif
2618}
2619
2620static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x) {
2621  return __tg_fabs(x);
2622}
2623
2624static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x) {
2625  return __tg_fabs(x);
2626}
2627
2628static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x) {
2629  return __tg_fabs(x);
2630}
2631
2632static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x) {
2633  return __tg_fabs(x);
2634}
2635
2636static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y) {
2637  return simd_make_char2(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
2638}
2639
2640static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y) {
2641  return simd_make_char3(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
2642}
2643
2644static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y) {
2645  return simd_make_char4(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
2646}
2647
2648static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y) {
2649#if defined __arm__ || defined __arm64__ || defined __aarch64__
2650  return vmin_s8(x, y);
2651#else
2652  return simd_make_char8(simd_min(simd_make_char16_undef(x), simd_make_char16_undef(y)));
2653#endif
2654
2655}
2656
2657static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y) {
2658#if defined __arm__ || defined __arm64__ || defined __aarch64__
2659  return vminq_s8(x, y);
2660#elif defined __SSE4_1__
2661  return (simd_char16) _mm_min_epi8((__m128i)x, (__m128i)y);
2662#else
2663  return simd_bitselect(x, y, y < x);
2664#endif
2665}
2666
2667static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y) {
2668#if defined __AVX2__
2669  return _mm256_min_epi8(x, y);
2670#else
2671  return simd_bitselect(x, y, y < x);
2672#endif
2673}
2674
2675static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y) {
2676#if defined __AVX512BW__
2677  return _mm512_min_epi8(x, y);
2678#else
2679  return simd_bitselect(x, y, y < x);
2680#endif
2681}
2682
2683static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y) {
2684  return simd_make_uchar2(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
2685}
2686
2687static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y) {
2688  return simd_make_uchar3(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
2689}
2690
2691static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y) {
2692  return simd_make_uchar4(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
2693}
2694
2695static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y) {
2696#if defined __arm__ || defined __arm64__ || defined __aarch64__
2697  return vmin_u8(x, y);
2698#else
2699  return simd_make_uchar8(simd_min(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y)));
2700#endif
2701
2702}
2703
2704static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y) {
2705#if defined __arm__ || defined __arm64__ || defined __aarch64__
2706  return vminq_u8(x, y);
2707#elif defined __SSE4_1__
2708  return (simd_uchar16) _mm_min_epu8((__m128i)x, (__m128i)y);
2709#else
2710  return simd_bitselect(x, y, y < x);
2711#endif
2712}
2713
2714static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y) {
2715#if defined __AVX2__
2716  return _mm256_min_epu8(x, y);
2717#else
2718  return simd_bitselect(x, y, y < x);
2719#endif
2720}
2721
2722static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y) {
2723#if defined __AVX512BW__
2724  return _mm512_min_epu8(x, y);
2725#else
2726  return simd_bitselect(x, y, y < x);
2727#endif
2728}
2729
2730static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y) {
2731  return simd_make_short2(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y)));
2732}
2733
2734static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y) {
2735  return simd_make_short3(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y)));
2736}
2737
2738static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y) {
2739#if defined __arm__ || defined __arm64__ || defined __aarch64__
2740  return vmin_s16(x, y);
2741#else
2742  return simd_make_short4(simd_min(simd_make_short8_undef(x), simd_make_short8_undef(y)));
2743#endif
2744
2745}
2746
2747static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y) {
2748#if defined __arm__ || defined __arm64__ || defined __aarch64__
2749  return vminq_s16(x, y);
2750#elif defined __SSE4_1__
2751  return (simd_short8) _mm_min_epi16((__m128i)x, (__m128i)y);
2752#else
2753  return simd_bitselect(x, y, y < x);
2754#endif
2755}
2756
2757static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y) {
2758#if defined __AVX2__
2759  return _mm256_min_epi16(x, y);
2760#else
2761  return simd_bitselect(x, y, y < x);
2762#endif
2763}
2764
2765static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y) {
2766#if defined __AVX512BW__
2767  return _mm512_min_epi16(x, y);
2768#else
2769  return simd_bitselect(x, y, y < x);
2770#endif
2771}
2772
2773static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y) {
2774  return simd_make_ushort2(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
2775}
2776
2777static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y) {
2778  return simd_make_ushort3(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
2779}
2780
2781static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y) {
2782#if defined __arm__ || defined __arm64__ || defined __aarch64__
2783  return vmin_u16(x, y);
2784#else
2785  return simd_make_ushort4(simd_min(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y)));
2786#endif
2787
2788}
2789
2790static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y) {
2791#if defined __arm__ || defined __arm64__ || defined __aarch64__
2792  return vminq_u16(x, y);
2793#elif defined __SSE4_1__
2794  return (simd_ushort8) _mm_min_epu16((__m128i)x, (__m128i)y);
2795#else
2796  return simd_bitselect(x, y, y < x);
2797#endif
2798}
2799
2800static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y) {
2801#if defined __AVX2__
2802  return _mm256_min_epu16(x, y);
2803#else
2804  return simd_bitselect(x, y, y < x);
2805#endif
2806}
2807
2808static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y) {
2809#if defined __AVX512BW__
2810  return _mm512_min_epu16(x, y);
2811#else
2812  return simd_bitselect(x, y, y < x);
2813#endif
2814}
2815
2816static inline SIMD_CFUNC _Float16 simd_min(_Float16 x, _Float16 y) {
2817  return __fminf16(x,y);
2818}
2819
2820static inline SIMD_CFUNC simd_half2 simd_min(simd_half2 x, simd_half2 y) {
2821  return __tg_fmin(x,y);
2822}
2823
2824static inline SIMD_CFUNC simd_half3 simd_min(simd_half3 x, simd_half3 y) {
2825  return __tg_fmin(x,y);
2826}
2827
2828static inline SIMD_CFUNC simd_half4 simd_min(simd_half4 x, simd_half4 y) {
2829  return __tg_fmin(x,y);
2830}
2831
2832static inline SIMD_CFUNC simd_half8 simd_min(simd_half8 x, simd_half8 y) {
2833  return __tg_fmin(x,y);
2834}
2835
2836static inline SIMD_CFUNC simd_half16 simd_min(simd_half16 x, simd_half16 y) {
2837  return __tg_fmin(x,y);
2838}
2839
2840static inline SIMD_CFUNC simd_half32 simd_min(simd_half32 x, simd_half32 y) {
2841  return __tg_fmin(x,y);
2842}
2843
2844static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y) {
2845#if defined __arm__ || defined __arm64__ || defined __aarch64__
2846  return vmin_s32(x, y);
2847#else
2848  return simd_make_int2(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y)));
2849#endif
2850
2851}
2852
2853static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y) {
2854  return simd_make_int3(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y)));
2855}
2856
2857static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y) {
2858#if defined __arm__ || defined __arm64__ || defined __aarch64__
2859  return vminq_s32(x, y);
2860#elif defined __SSE4_1__
2861  return (simd_int4) _mm_min_epi32((__m128i)x, (__m128i)y);
2862#else
2863  return simd_bitselect(x, y, y < x);
2864#endif
2865}
2866
2867static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y) {
2868#if defined __AVX2__
2869  return _mm256_min_epi32(x, y);
2870#else
2871  return simd_bitselect(x, y, y < x);
2872#endif
2873}
2874
2875static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y) {
2876#if defined __AVX512F__
2877  return _mm512_min_epi32(x, y);
2878#else
2879  return simd_bitselect(x, y, y < x);
2880#endif
2881}
2882
2883static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y) {
2884#if defined __arm__ || defined __arm64__ || defined __aarch64__
2885  return vmin_u32(x, y);
2886#else
2887  return simd_make_uint2(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
2888#endif
2889
2890}
2891
2892static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y) {
2893  return simd_make_uint3(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
2894}
2895
2896static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y) {
2897#if defined __arm__ || defined __arm64__ || defined __aarch64__
2898  return vminq_u32(x, y);
2899#elif defined __SSE4_1__
2900  return (simd_uint4) _mm_min_epu32((__m128i)x, (__m128i)y);
2901#else
2902  return simd_bitselect(x, y, y < x);
2903#endif
2904}
2905
2906static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y) {
2907#if defined __AVX2__
2908  return _mm256_min_epu32(x, y);
2909#else
2910  return simd_bitselect(x, y, y < x);
2911#endif
2912}
2913
2914static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y) {
2915#if defined __AVX512F__
2916  return _mm512_min_epu32(x, y);
2917#else
2918  return simd_bitselect(x, y, y < x);
2919#endif
2920}
2921
2922static inline SIMD_CFUNC float simd_min(float x, float y) {
2923  return __tg_fmin(x,y);
2924}
2925
2926static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y) {
2927  return __tg_fmin(x,y);
2928}
2929
2930static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y) {
2931  return __tg_fmin(x,y);
2932}
2933
2934static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y) {
2935  return __tg_fmin(x,y);
2936}
2937
2938static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y) {
2939  return __tg_fmin(x,y);
2940}
2941
2942static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y) {
2943  return __tg_fmin(x,y);
2944}
2945
2946static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y) {
2947#if defined __AVX512VL__
2948  return _mm_min_epi64(x, y);
2949#else
2950  return simd_bitselect(x, y, y < x);
2951#endif
2952}
2953
2954static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y) {
2955  return simd_make_long3(simd_min(simd_make_long4_undef(x), simd_make_long4_undef(y)));
2956}
2957
2958static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y) {
2959#if defined __AVX512VL__
2960  return _mm256_min_epi64(x, y);
2961#else
2962  return simd_bitselect(x, y, y < x);
2963#endif
2964}
2965
2966static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y) {
2967#if defined __AVX512F__
2968  return _mm512_min_epi64(x, y);
2969#else
2970  return simd_bitselect(x, y, y < x);
2971#endif
2972}
2973
2974static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y) {
2975#if defined __AVX512VL__
2976  return _mm_min_epu64(x, y);
2977#else
2978  return simd_bitselect(x, y, y < x);
2979#endif
2980}
2981
2982static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y) {
2983  return simd_make_ulong3(simd_min(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y)));
2984}
2985
2986static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y) {
2987#if defined __AVX512VL__
2988  return _mm256_min_epu64(x, y);
2989#else
2990  return simd_bitselect(x, y, y < x);
2991#endif
2992}
2993
2994static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y) {
2995#if defined __AVX512F__
2996  return _mm512_min_epu64(x, y);
2997#else
2998  return simd_bitselect(x, y, y < x);
2999#endif
3000}
3001
3002static inline SIMD_CFUNC double simd_min(double x, double y) {
3003  return __tg_fmin(x,y);
3004}
3005
3006static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y) {
3007  return __tg_fmin(x,y);
3008}
3009
3010static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y) {
3011  return __tg_fmin(x,y);
3012}
3013
3014static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y) {
3015  return __tg_fmin(x,y);
3016}
3017
3018static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y) {
3019  return __tg_fmin(x,y);
3020}
3021
3022static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y) {
3023  return simd_make_char2(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
3024}
3025
3026static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y) {
3027  return simd_make_char3(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
3028}
3029
3030static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y) {
3031  return simd_make_char4(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
3032}
3033
3034static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y) {
3035#if defined __arm__ || defined __arm64__ || defined __aarch64__
3036  return vmax_s8(x, y);
3037#else
3038  return simd_make_char8(simd_max(simd_make_char16_undef(x), simd_make_char16_undef(y)));
3039#endif
3040
3041}
3042
3043static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y) {
3044#if defined __arm__ || defined __arm64__ || defined __aarch64__
3045  return vmaxq_s8(x, y);
3046#elif defined __SSE4_1__
3047  return (simd_char16) _mm_max_epi8((__m128i)x, (__m128i)y);
3048#else
3049  return simd_bitselect(x, y, x < y);
3050#endif
3051}
3052
3053static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y) {
3054#if defined __AVX2__
3055  return _mm256_max_epi8(x, y);
3056#else
3057  return simd_bitselect(x, y, x < y);
3058#endif
3059}
3060
3061static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y) {
3062#if defined __AVX512BW__
3063  return _mm512_max_epi8(x, y);
3064#else
3065  return simd_bitselect(x, y, x < y);
3066#endif
3067}
3068
3069static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y) {
3070  return simd_make_uchar2(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
3071}
3072
3073static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y) {
3074  return simd_make_uchar3(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
3075}
3076
3077static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y) {
3078  return simd_make_uchar4(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
3079}
3080
3081static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y) {
3082#if defined __arm__ || defined __arm64__ || defined __aarch64__
3083  return vmax_u8(x, y);
3084#else
3085  return simd_make_uchar8(simd_max(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y)));
3086#endif
3087
3088}
3089
3090static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y) {
3091#if defined __arm__ || defined __arm64__ || defined __aarch64__
3092  return vmaxq_u8(x, y);
3093#elif defined __SSE4_1__
3094  return (simd_uchar16) _mm_max_epu8((__m128i)x, (__m128i)y);
3095#else
3096  return simd_bitselect(x, y, x < y);
3097#endif
3098}
3099
3100static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y) {
3101#if defined __AVX2__
3102  return _mm256_max_epu8(x, y);
3103#else
3104  return simd_bitselect(x, y, x < y);
3105#endif
3106}
3107
3108static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y) {
3109#if defined __AVX512BW__
3110  return _mm512_max_epu8(x, y);
3111#else
3112  return simd_bitselect(x, y, x < y);
3113#endif
3114}
3115
3116static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y) {
3117  return simd_make_short2(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y)));
3118}
3119
3120static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y) {
3121  return simd_make_short3(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y)));
3122}
3123
3124static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y) {
3125#if defined __arm__ || defined __arm64__ || defined __aarch64__
3126  return vmax_s16(x, y);
3127#else
3128  return simd_make_short4(simd_max(simd_make_short8_undef(x), simd_make_short8_undef(y)));
3129#endif
3130
3131}
3132
3133static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y) {
3134#if defined __arm__ || defined __arm64__ || defined __aarch64__
3135  return vmaxq_s16(x, y);
3136#elif defined __SSE4_1__
3137  return (simd_short8) _mm_max_epi16((__m128i)x, (__m128i)y);
3138#else
3139  return simd_bitselect(x, y, x < y);
3140#endif
3141}
3142
3143static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y) {
3144#if defined __AVX2__
3145  return _mm256_max_epi16(x, y);
3146#else
3147  return simd_bitselect(x, y, x < y);
3148#endif
3149}
3150
3151static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y) {
3152#if defined __AVX512BW__
3153  return _mm512_max_epi16(x, y);
3154#else
3155  return simd_bitselect(x, y, x < y);
3156#endif
3157}
3158
3159static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y) {
3160  return simd_make_ushort2(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
3161}
3162
3163static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y) {
3164  return simd_make_ushort3(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
3165}
3166
3167static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y) {
3168#if defined __arm__ || defined __arm64__ || defined __aarch64__
3169  return vmax_u16(x, y);
3170#else
3171  return simd_make_ushort4(simd_max(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y)));
3172#endif
3173
3174}
3175
3176static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y) {
3177#if defined __arm__ || defined __arm64__ || defined __aarch64__
3178  return vmaxq_u16(x, y);
3179#elif defined __SSE4_1__
3180  return (simd_ushort8) _mm_max_epu16((__m128i)x, (__m128i)y);
3181#else
3182  return simd_bitselect(x, y, x < y);
3183#endif
3184}
3185
3186static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y) {
3187#if defined __AVX2__
3188  return _mm256_max_epu16(x, y);
3189#else
3190  return simd_bitselect(x, y, x < y);
3191#endif
3192}
3193
3194static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y) {
3195#if defined __AVX512BW__
3196  return _mm512_max_epu16(x, y);
3197#else
3198  return simd_bitselect(x, y, x < y);
3199#endif
3200}
3201
3202static inline SIMD_CFUNC _Float16 simd_max(_Float16 x, _Float16 y) {
3203  return __fmaxf16(x,y);
3204}
3205
3206static inline SIMD_CFUNC simd_half2 simd_max(simd_half2 x, simd_half2 y) {
3207  return __tg_fmax(x,y);
3208}
3209
3210static inline SIMD_CFUNC simd_half3 simd_max(simd_half3 x, simd_half3 y) {
3211  return __tg_fmax(x,y);
3212}
3213
3214static inline SIMD_CFUNC simd_half4 simd_max(simd_half4 x, simd_half4 y) {
3215  return __tg_fmax(x,y);
3216}
3217
3218static inline SIMD_CFUNC simd_half8 simd_max(simd_half8 x, simd_half8 y) {
3219  return __tg_fmax(x,y);
3220}
3221
3222static inline SIMD_CFUNC simd_half16 simd_max(simd_half16 x, simd_half16 y) {
3223  return __tg_fmax(x,y);
3224}
3225
3226static inline SIMD_CFUNC simd_half32 simd_max(simd_half32 x, simd_half32 y) {
3227  return __tg_fmax(x,y);
3228}
3229
3230static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y) {
3231#if defined __arm__ || defined __arm64__ || defined __aarch64__
3232  return vmax_s32(x, y);
3233#else
3234  return simd_make_int2(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y)));
3235#endif
3236
3237}
3238
3239static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y) {
3240  return simd_make_int3(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y)));
3241}
3242
3243static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y) {
3244#if defined __arm__ || defined __arm64__ || defined __aarch64__
3245  return vmaxq_s32(x, y);
3246#elif defined __SSE4_1__
3247  return (simd_int4) _mm_max_epi32((__m128i)x, (__m128i)y);
3248#else
3249  return simd_bitselect(x, y, x < y);
3250#endif
3251}
3252
3253static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y) {
3254#if defined __AVX2__
3255  return _mm256_max_epi32(x, y);
3256#else
3257  return simd_bitselect(x, y, x < y);
3258#endif
3259}
3260
3261static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y) {
3262#if defined __AVX512F__
3263  return _mm512_max_epi32(x, y);
3264#else
3265  return simd_bitselect(x, y, x < y);
3266#endif
3267}
3268
3269static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y) {
3270#if defined __arm__ || defined __arm64__ || defined __aarch64__
3271  return vmax_u32(x, y);
3272#else
3273  return simd_make_uint2(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
3274#endif
3275
3276}
3277
3278static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y) {
3279  return simd_make_uint3(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
3280}
3281
3282static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y) {
3283#if defined __arm__ || defined __arm64__ || defined __aarch64__
3284  return vmaxq_u32(x, y);
3285#elif defined __SSE4_1__
3286  return (simd_uint4) _mm_max_epu32((__m128i)x, (__m128i)y);
3287#else
3288  return simd_bitselect(x, y, x < y);
3289#endif
3290}
3291
3292static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y) {
3293#if defined __AVX2__
3294  return _mm256_max_epu32(x, y);
3295#else
3296  return simd_bitselect(x, y, x < y);
3297#endif
3298}
3299
3300static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y) {
3301#if defined __AVX512F__
3302  return _mm512_max_epu32(x, y);
3303#else
3304  return simd_bitselect(x, y, x < y);
3305#endif
3306}
3307
3308static inline SIMD_CFUNC float simd_max(float x, float y) {
3309  return __tg_fmax(x,y);
3310}
3311
3312static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y) {
3313  return __tg_fmax(x,y);
3314}
3315
3316static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y) {
3317  return __tg_fmax(x,y);
3318}
3319
3320static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y) {
3321  return __tg_fmax(x,y);
3322}
3323
3324static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y) {
3325  return __tg_fmax(x,y);
3326}
3327
3328static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y) {
3329  return __tg_fmax(x,y);
3330}
3331
3332static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y) {
3333#if defined __AVX512VL__
3334  return _mm_max_epi64(x, y);
3335#else
3336  return simd_bitselect(x, y, x < y);
3337#endif
3338}
3339
3340static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y) {
3341  return simd_make_long3(simd_max(simd_make_long4_undef(x), simd_make_long4_undef(y)));
3342}
3343
3344static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y) {
3345#if defined __AVX512VL__
3346  return _mm256_max_epi64(x, y);
3347#else
3348  return simd_bitselect(x, y, x < y);
3349#endif
3350}
3351
3352static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y) {
3353#if defined __AVX512F__
3354  return _mm512_max_epi64(x, y);
3355#else
3356  return simd_bitselect(x, y, x < y);
3357#endif
3358}
3359
3360static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y) {
3361#if defined __AVX512VL__
3362  return _mm_max_epu64(x, y);
3363#else
3364  return simd_bitselect(x, y, x < y);
3365#endif
3366}
3367
3368static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y) {
3369  return simd_make_ulong3(simd_max(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y)));
3370}
3371
3372static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y) {
3373#if defined __AVX512VL__
3374  return _mm256_max_epu64(x, y);
3375#else
3376  return simd_bitselect(x, y, x < y);
3377#endif
3378}
3379
3380static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y) {
3381#if defined __AVX512F__
3382  return _mm512_max_epu64(x, y);
3383#else
3384  return simd_bitselect(x, y, x < y);
3385#endif
3386}
3387
3388static inline SIMD_CFUNC double simd_max(double x, double y) {
3389  return __tg_fmax(x,y);
3390}
3391
3392static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y) {
3393  return __tg_fmax(x,y);
3394}
3395
3396static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y) {
3397  return __tg_fmax(x,y);
3398}
3399
3400static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y) {
3401  return __tg_fmax(x,y);
3402}
3403
3404static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y) {
3405  return __tg_fmax(x,y);
3406}
3407
3408static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max) {
3409  return simd_min(simd_max(x, min), max);
3410}
3411
3412static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max) {
3413  return simd_min(simd_max(x, min), max);
3414}
3415
3416static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max) {
3417  return simd_min(simd_max(x, min), max);
3418}
3419
3420static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max) {
3421  return simd_min(simd_max(x, min), max);
3422}
3423
3424static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max) {
3425  return simd_min(simd_max(x, min), max);
3426}
3427
3428static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max) {
3429  return simd_min(simd_max(x, min), max);
3430}
3431
3432static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max) {
3433  return simd_min(simd_max(x, min), max);
3434}
3435
3436static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max) {
3437  return simd_min(simd_max(x, min), max);
3438}
3439
3440static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max) {
3441  return simd_min(simd_max(x, min), max);
3442}
3443
3444static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max) {
3445  return simd_min(simd_max(x, min), max);
3446}
3447
3448static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max) {
3449  return simd_min(simd_max(x, min), max);
3450}
3451
3452static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max) {
3453  return simd_min(simd_max(x, min), max);
3454}
3455
3456static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max) {
3457  return simd_min(simd_max(x, min), max);
3458}
3459
3460static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max) {
3461  return simd_min(simd_max(x, min), max);
3462}
3463
3464static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max) {
3465  return simd_min(simd_max(x, min), max);
3466}
3467
3468static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max) {
3469  return simd_min(simd_max(x, min), max);
3470}
3471
3472static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max) {
3473  return simd_min(simd_max(x, min), max);
3474}
3475
3476static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max) {
3477  return simd_min(simd_max(x, min), max);
3478}
3479
3480static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max) {
3481  return simd_min(simd_max(x, min), max);
3482}
3483
3484static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max) {
3485  return simd_min(simd_max(x, min), max);
3486}
3487
3488static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max) {
3489  return simd_min(simd_max(x, min), max);
3490}
3491
3492static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max) {
3493  return simd_min(simd_max(x, min), max);
3494}
3495
3496static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max) {
3497  return simd_min(simd_max(x, min), max);
3498}
3499
3500static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max) {
3501  return simd_min(simd_max(x, min), max);
3502}
3503
3504static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max) {
3505  return simd_min(simd_max(x, min), max);
3506}
3507
3508static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max) {
3509  return simd_min(simd_max(x, min), max);
3510}
3511
3512static inline SIMD_CFUNC _Float16 simd_clamp(_Float16 x, _Float16 min, _Float16 max) {
3513  return simd_min(simd_max(x, min), max);
3514}
3515
3516static inline SIMD_CFUNC simd_half2 simd_clamp(simd_half2 x, simd_half2 min, simd_half2 max) {
3517  return simd_min(simd_max(x, min), max);
3518}
3519
3520static inline SIMD_CFUNC simd_half3 simd_clamp(simd_half3 x, simd_half3 min, simd_half3 max) {
3521  return simd_min(simd_max(x, min), max);
3522}
3523
3524static inline SIMD_CFUNC simd_half4 simd_clamp(simd_half4 x, simd_half4 min, simd_half4 max) {
3525  return simd_min(simd_max(x, min), max);
3526}
3527
3528static inline SIMD_CFUNC simd_half8 simd_clamp(simd_half8 x, simd_half8 min, simd_half8 max) {
3529  return simd_min(simd_max(x, min), max);
3530}
3531
3532static inline SIMD_CFUNC simd_half16 simd_clamp(simd_half16 x, simd_half16 min, simd_half16 max) {
3533  return simd_min(simd_max(x, min), max);
3534}
3535
3536static inline SIMD_CFUNC simd_half32 simd_clamp(simd_half32 x, simd_half32 min, simd_half32 max) {
3537  return simd_min(simd_max(x, min), max);
3538}
3539
3540static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max) {
3541  return simd_min(simd_max(x, min), max);
3542}
3543
3544static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max) {
3545  return simd_min(simd_max(x, min), max);
3546}
3547
3548static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max) {
3549  return simd_min(simd_max(x, min), max);
3550}
3551
3552static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max) {
3553  return simd_min(simd_max(x, min), max);
3554}
3555
3556static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max) {
3557  return simd_min(simd_max(x, min), max);
3558}
3559
3560static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max) {
3561  return simd_min(simd_max(x, min), max);
3562}
3563
3564static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max) {
3565  return simd_min(simd_max(x, min), max);
3566}
3567
3568static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max) {
3569  return simd_min(simd_max(x, min), max);
3570}
3571
3572static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max) {
3573  return simd_min(simd_max(x, min), max);
3574}
3575
3576static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max) {
3577  return simd_min(simd_max(x, min), max);
3578}
3579
3580static inline SIMD_CFUNC float simd_clamp(float x, float min, float max) {
3581  return simd_min(simd_max(x, min), max);
3582}
3583
3584static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max) {
3585  return simd_min(simd_max(x, min), max);
3586}
3587
3588static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max) {
3589  return simd_min(simd_max(x, min), max);
3590}
3591
3592static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max) {
3593  return simd_min(simd_max(x, min), max);
3594}
3595
3596static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max) {
3597  return simd_min(simd_max(x, min), max);
3598}
3599
3600static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max) {
3601  return simd_min(simd_max(x, min), max);
3602}
3603
3604static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max) {
3605  return simd_min(simd_max(x, min), max);
3606}
3607
3608static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max) {
3609  return simd_min(simd_max(x, min), max);
3610}
3611
3612static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max) {
3613  return simd_min(simd_max(x, min), max);
3614}
3615
3616static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max) {
3617  return simd_min(simd_max(x, min), max);
3618}
3619
3620static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max) {
3621  return simd_min(simd_max(x, min), max);
3622}
3623
3624static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max) {
3625  return simd_min(simd_max(x, min), max);
3626}
3627
3628static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max) {
3629  return simd_min(simd_max(x, min), max);
3630}
3631
3632static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max) {
3633  return simd_min(simd_max(x, min), max);
3634}
3635
3636static inline SIMD_CFUNC double simd_clamp(double x, double min, double max) {
3637  return simd_min(simd_max(x, min), max);
3638}
3639
3640static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max) {
3641  return simd_min(simd_max(x, min), max);
3642}
3643
3644static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max) {
3645  return simd_min(simd_max(x, min), max);
3646}
3647
3648static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max) {
3649  return simd_min(simd_max(x, min), max);
3650}
3651
3652static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max) {
3653  return simd_min(simd_max(x, min), max);
3654}
3655
3656
3657static inline SIMD_CFUNC _Float16 simd_sign(_Float16 x) {
3658  return (x == 0 | x != x) ? 0 : __copysignf16(1,x);
3659}
3660
3661static inline SIMD_CFUNC simd_half2 simd_sign(simd_half2 x) {
3662  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3663}
3664
3665static inline SIMD_CFUNC simd_half3 simd_sign(simd_half3 x) {
3666  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3667}
3668
3669static inline SIMD_CFUNC simd_half4 simd_sign(simd_half4 x) {
3670  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3671}
3672
3673static inline SIMD_CFUNC simd_half8 simd_sign(simd_half8 x) {
3674  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3675}
3676
3677static inline SIMD_CFUNC simd_half16 simd_sign(simd_half16 x) {
3678  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3679}
3680
3681static inline SIMD_CFUNC simd_half32 simd_sign(simd_half32 x) {
3682  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3683}
3684
3685static inline SIMD_CFUNC float simd_sign(float x) {
3686  return (x == 0 | x != x) ? 0 : copysignf(1,x);
3687}
3688
3689static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x) {
3690  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3691}
3692
3693static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x) {
3694  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3695}
3696
3697static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x) {
3698  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3699}
3700
3701static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x) {
3702  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3703}
3704
3705static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x) {
3706  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3707}
3708
3709static inline SIMD_CFUNC double simd_sign(double x) {
3710  return (x == 0 | x != x) ? 0 : copysign(1,x);
3711}
3712
3713static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x) {
3714  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3715}
3716
3717static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x) {
3718  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3719}
3720
3721static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x) {
3722  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3723}
3724
3725static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x) {
3726  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3727}
3728
3729static inline SIMD_CFUNC _Float16 simd_mix(_Float16 x, _Float16 y, _Float16 t) {
3730  return x + t*(y - x);
3731}
3732  
3733static inline SIMD_CFUNC simd_half2 simd_mix(simd_half2 x, simd_half2 y, simd_half2 t) {
3734  return x + t*(y - x);
3735}
3736  
3737static inline SIMD_CFUNC simd_half3 simd_mix(simd_half3 x, simd_half3 y, simd_half3 t) {
3738  return x + t*(y - x);
3739}
3740  
3741static inline SIMD_CFUNC simd_half4 simd_mix(simd_half4 x, simd_half4 y, simd_half4 t) {
3742  return x + t*(y - x);
3743}
3744  
3745static inline SIMD_CFUNC simd_half8 simd_mix(simd_half8 x, simd_half8 y, simd_half8 t) {
3746  return x + t*(y - x);
3747}
3748  
3749static inline SIMD_CFUNC simd_half16 simd_mix(simd_half16 x, simd_half16 y, simd_half16 t) {
3750  return x + t*(y - x);
3751}
3752  
3753static inline SIMD_CFUNC simd_half32 simd_mix(simd_half32 x, simd_half32 y, simd_half32 t) {
3754  return x + t*(y - x);
3755}
3756  
3757static inline SIMD_CFUNC float simd_mix(float x, float y, float t) {
3758  return x + t*(y - x);
3759}
3760  
3761static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t) {
3762  return x + t*(y - x);
3763}
3764  
3765static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t) {
3766  return x + t*(y - x);
3767}
3768  
3769static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t) {
3770  return x + t*(y - x);
3771}
3772  
3773static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t) {
3774  return x + t*(y - x);
3775}
3776  
3777static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t) {
3778  return x + t*(y - x);
3779}
3780  
3781static inline SIMD_CFUNC double simd_mix(double x, double y, double t) {
3782  return x + t*(y - x);
3783}
3784  
3785static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t) {
3786  return x + t*(y - x);
3787}
3788  
3789static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t) {
3790  return x + t*(y - x);
3791}
3792  
3793static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t) {
3794  return x + t*(y - x);
3795}
3796  
3797static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t) {
3798  return x + t*(y - x);
3799}
3800  
3801static inline SIMD_CFUNC _Float16 simd_recip(_Float16 x) {
3802#if __FAST_MATH__
3803  return simd_fast_recip(x);
3804#else
3805  return simd_precise_recip(x);
3806#endif
3807}
3808
3809static inline SIMD_CFUNC simd_half2 simd_recip(simd_half2 x) {
3810#if __FAST_MATH__
3811  return simd_fast_recip(x);
3812#else
3813  return simd_precise_recip(x);
3814#endif
3815}
3816
3817static inline SIMD_CFUNC simd_half3 simd_recip(simd_half3 x) {
3818#if __FAST_MATH__
3819  return simd_fast_recip(x);
3820#else
3821  return simd_precise_recip(x);
3822#endif
3823}
3824
3825static inline SIMD_CFUNC simd_half4 simd_recip(simd_half4 x) {
3826#if __FAST_MATH__
3827  return simd_fast_recip(x);
3828#else
3829  return simd_precise_recip(x);
3830#endif
3831}
3832
3833static inline SIMD_CFUNC simd_half8 simd_recip(simd_half8 x) {
3834#if __FAST_MATH__
3835  return simd_fast_recip(x);
3836#else
3837  return simd_precise_recip(x);
3838#endif
3839}
3840
3841static inline SIMD_CFUNC simd_half16 simd_recip(simd_half16 x) {
3842#if __FAST_MATH__
3843  return simd_fast_recip(x);
3844#else
3845  return simd_precise_recip(x);
3846#endif
3847}
3848
3849static inline SIMD_CFUNC simd_half32 simd_recip(simd_half32 x) {
3850#if __FAST_MATH__
3851  return simd_fast_recip(x);
3852#else
3853  return simd_precise_recip(x);
3854#endif
3855}
3856
3857static inline SIMD_CFUNC float simd_recip(float x) {
3858#if __FAST_MATH__
3859  return simd_fast_recip(x);
3860#else
3861  return simd_precise_recip(x);
3862#endif
3863}
3864
3865static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x) {
3866#if __FAST_MATH__
3867  return simd_fast_recip(x);
3868#else
3869  return simd_precise_recip(x);
3870#endif
3871}
3872
3873static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x) {
3874#if __FAST_MATH__
3875  return simd_fast_recip(x);
3876#else
3877  return simd_precise_recip(x);
3878#endif
3879}
3880
3881static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x) {
3882#if __FAST_MATH__
3883  return simd_fast_recip(x);
3884#else
3885  return simd_precise_recip(x);
3886#endif
3887}
3888
3889static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x) {
3890#if __FAST_MATH__
3891  return simd_fast_recip(x);
3892#else
3893  return simd_precise_recip(x);
3894#endif
3895}
3896
3897static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x) {
3898#if __FAST_MATH__
3899  return simd_fast_recip(x);
3900#else
3901  return simd_precise_recip(x);
3902#endif
3903}
3904
3905static inline SIMD_CFUNC double simd_recip(double x) {
3906#if __FAST_MATH__
3907  return simd_fast_recip(x);
3908#else
3909  return simd_precise_recip(x);
3910#endif
3911}
3912
3913static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x) {
3914#if __FAST_MATH__
3915  return simd_fast_recip(x);
3916#else
3917  return simd_precise_recip(x);
3918#endif
3919}
3920
3921static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x) {
3922#if __FAST_MATH__
3923  return simd_fast_recip(x);
3924#else
3925  return simd_precise_recip(x);
3926#endif
3927}
3928
3929static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x) {
3930#if __FAST_MATH__
3931  return simd_fast_recip(x);
3932#else
3933  return simd_precise_recip(x);
3934#endif
3935}
3936
3937static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x) {
3938#if __FAST_MATH__
3939  return simd_fast_recip(x);
3940#else
3941  return simd_precise_recip(x);
3942#endif
3943}
3944
3945static inline SIMD_CFUNC _Float16 simd_fast_recip(_Float16 x) {
3946#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
3947  return simd_fast_recip(simd_make_half4_undef(x)).x;
3948#else
3949  return simd_precise_recip(x);
3950#endif
3951}
3952
3953static inline SIMD_CFUNC simd_half2 simd_fast_recip(simd_half2 x) {
3954  return simd_fast_recip(simd_make_half4_undef(x)).lo;
3955}
3956
3957static inline SIMD_CFUNC simd_half3 simd_fast_recip(simd_half3 x) {
3958  return simd_make_half3(simd_fast_recip(simd_make_half4_undef(x)));
3959}
3960
3961static inline SIMD_CFUNC simd_half4 simd_fast_recip(simd_half4 x) {
3962#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
3963  simd_half4 r = vrecpe_f16(x);
3964  return r * vrecps_f16(x, r);
3965#else
3966  return simd_precise_recip(x);
3967#endif
3968}
3969
3970static inline SIMD_CFUNC simd_half8 simd_fast_recip(simd_half8 x) {
3971#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
3972  simd_half8 r = vrecpeq_f16(x);
3973  return r * vrecpsq_f16(x, r);
3974#else
3975  return simd_precise_recip(x);
3976#endif
3977}
3978
3979static inline SIMD_CFUNC simd_half16 simd_fast_recip(simd_half16 x) {
3980  return simd_make_half16(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
3981}
3982
3983static inline SIMD_CFUNC simd_half32 simd_fast_recip(simd_half32 x) {
3984  return simd_make_half32(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
3985}
3986
3987static inline SIMD_CFUNC float simd_fast_recip(float x) {
3988#if defined __AVX512VL__
3989  simd_float4 x4 = simd_make_float4(x);
3990  return ((simd_float4)_mm_rcp14_ss(x4, x4)).x;
3991#elif defined __SSE__
3992  return ((simd_float4)_mm_rcp_ss(simd_make_float4(x))).x;
3993#elif defined __ARM_NEON
3994  return simd_fast_recip(simd_make_float2_undef(x)).x;
3995#else
3996  return simd_precise_recip(x);
3997#endif
3998}
3999
4000static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x) {
4001#if defined __SSE__
4002  return simd_make_float2(simd_fast_recip(simd_make_float4_undef(x)));
4003#elif defined __ARM_NEON
4004  simd_float2 r = vrecpe_f32(x);
4005  return r * vrecps_f32(x, r);
4006#else
4007  return simd_precise_recip(x);
4008#endif
4009}
4010
4011static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x) {
4012  return simd_make_float3(simd_fast_recip(simd_make_float4_undef(x)));
4013}
4014
4015static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x) {
4016#if defined __AVX512VL__
4017  return _mm_rcp14_ps(x);
4018#elif defined __SSE__
4019  return _mm_rcp_ps(x);
4020#elif defined __ARM_NEON
4021  simd_float4 r = vrecpeq_f32(x);
4022  return r * vrecpsq_f32(x, r);
4023#else
4024  return simd_precise_recip(x);
4025#endif
4026}
4027
4028static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x) {
4029#if defined __AVX512VL__
4030  return _mm256_rcp14_ps(x);
4031#elif defined __AVX__
4032  return _mm256_rcp_ps(x);
4033#else
4034  return simd_make_float8(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
4035#endif
4036}
4037
4038static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x) {
4039#if defined __AVX512F__
4040  return _mm512_rcp14_ps(x);
4041#else
4042  return simd_make_float16(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
4043#endif
4044}
4045
4046static inline SIMD_CFUNC double simd_fast_recip(double x) {
4047  return simd_precise_recip(x);
4048}
4049
4050static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x) {
4051  return simd_precise_recip(x);
4052}
4053
4054static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x) {
4055  return simd_precise_recip(x);
4056}
4057
4058static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x) {
4059  return simd_precise_recip(x);
4060}
4061
4062static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x) {
4063  return simd_precise_recip(x);
4064}
4065
4066static inline SIMD_CFUNC _Float16 simd_precise_recip(_Float16 x) {
4067#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4068  return simd_precise_recip(simd_make_half4_undef(x)).x;
4069#else
4070  return 1/x;
4071#endif
4072}
4073
4074static inline SIMD_CFUNC simd_half2 simd_precise_recip(simd_half2 x) {
4075  return simd_precise_recip(simd_make_half4_undef(x)).lo;
4076}
4077
4078static inline SIMD_CFUNC simd_half3 simd_precise_recip(simd_half3 x) {
4079  return simd_make_half3(simd_precise_recip(simd_make_half4_undef(x)));
4080}
4081
4082static inline SIMD_CFUNC simd_half4 simd_precise_recip(simd_half4 x) {
4083#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4084  simd_half4 r = simd_fast_recip(x);
4085  return r*vrecps_f16(x, r);
4086#else
4087  return 1/x;
4088#endif
4089}
4090
4091static inline SIMD_CFUNC simd_half8 simd_precise_recip(simd_half8 x) {
4092#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4093  simd_half8 r = simd_fast_recip(x);
4094  return r*vrecpsq_f16(x, r);
4095#else
4096  return 1/x;
4097#endif
4098}
4099
4100static inline SIMD_CFUNC simd_half16 simd_precise_recip(simd_half16 x) {
4101  return simd_make_half16(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4102}
4103
4104static inline SIMD_CFUNC simd_half32 simd_precise_recip(simd_half32 x) {
4105  return simd_make_half32(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4106}
4107
4108static inline SIMD_CFUNC float simd_precise_recip(float x) {
4109#if defined __SSE__
4110  float r = simd_fast_recip(x);
4111  return r*(2 - (x == 0 ? -INFINITY : x)*r);
4112#elif defined __ARM_NEON
4113  return simd_precise_recip(simd_make_float2_undef(x)).x;
4114#else
4115  return 1/x;
4116#endif
4117}
4118
4119static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x) {
4120#if defined __SSE__
4121  return simd_make_float2(simd_precise_recip(simd_make_float4_undef(x)));
4122#elif defined __ARM_NEON
4123  simd_float2 r = simd_fast_recip(x);
4124  return r*vrecps_f32(x, r);
4125#else
4126  return 1/x;
4127#endif
4128}
4129
4130static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x) {
4131  return simd_make_float3(simd_precise_recip(simd_make_float4_undef(x)));
4132}
4133
4134static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x) {
4135#if defined __SSE__
4136  simd_float4 r = simd_fast_recip(x);
4137  return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
4138#elif defined __ARM_NEON
4139  simd_float4 r = simd_fast_recip(x);
4140  return r*vrecpsq_f32(x, r);
4141#else
4142  return 1/x;
4143#endif
4144}
4145
4146static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x) {
4147#if defined __AVX__
4148  simd_float8 r = simd_fast_recip(x);
4149  return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
4150#else
4151  return simd_make_float8(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4152#endif
4153}
4154
4155static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x) {
4156#if defined __AVX512F__
4157  simd_float16 r = simd_fast_recip(x);
4158  return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
4159#else
4160  return simd_make_float16(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4161#endif
4162}
4163
4164static inline SIMD_CFUNC double simd_precise_recip(double x) {
4165  return 1/x;
4166}
4167
4168static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x) {
4169  return 1/x;
4170}
4171
4172static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x) {
4173  return 1/x;
4174}
4175
4176static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x) {
4177  return 1/x;
4178}
4179
4180static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x) {
4181  return 1/x;
4182}
4183
4184static inline SIMD_CFUNC _Float16 simd_rsqrt(_Float16 x) {
4185#if __FAST_MATH__
4186  return simd_fast_rsqrt(x);
4187#else
4188  return simd_precise_rsqrt(x);
4189#endif
4190}
4191  
4192static inline SIMD_CFUNC simd_half2 simd_rsqrt(simd_half2 x) {
4193#if __FAST_MATH__
4194  return simd_fast_rsqrt(x);
4195#else
4196  return simd_precise_rsqrt(x);
4197#endif
4198}
4199  
4200static inline SIMD_CFUNC simd_half3 simd_rsqrt(simd_half3 x) {
4201#if __FAST_MATH__
4202  return simd_fast_rsqrt(x);
4203#else
4204  return simd_precise_rsqrt(x);
4205#endif
4206}
4207  
4208static inline SIMD_CFUNC simd_half4 simd_rsqrt(simd_half4 x) {
4209#if __FAST_MATH__
4210  return simd_fast_rsqrt(x);
4211#else
4212  return simd_precise_rsqrt(x);
4213#endif
4214}
4215  
4216static inline SIMD_CFUNC simd_half8 simd_rsqrt(simd_half8 x) {
4217#if __FAST_MATH__
4218  return simd_fast_rsqrt(x);
4219#else
4220  return simd_precise_rsqrt(x);
4221#endif
4222}
4223  
4224static inline SIMD_CFUNC simd_half16 simd_rsqrt(simd_half16 x) {
4225#if __FAST_MATH__
4226  return simd_fast_rsqrt(x);
4227#else
4228  return simd_precise_rsqrt(x);
4229#endif
4230}
4231  
4232static inline SIMD_CFUNC simd_half32 simd_rsqrt(simd_half32 x) {
4233#if __FAST_MATH__
4234  return simd_fast_rsqrt(x);
4235#else
4236  return simd_precise_rsqrt(x);
4237#endif
4238}
4239  
4240static inline SIMD_CFUNC float simd_rsqrt(float x) {
4241#if __FAST_MATH__
4242  return simd_fast_rsqrt(x);
4243#else
4244  return simd_precise_rsqrt(x);
4245#endif
4246}
4247  
4248static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x) {
4249#if __FAST_MATH__
4250  return simd_fast_rsqrt(x);
4251#else
4252  return simd_precise_rsqrt(x);
4253#endif
4254}
4255  
4256static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x) {
4257#if __FAST_MATH__
4258  return simd_fast_rsqrt(x);
4259#else
4260  return simd_precise_rsqrt(x);
4261#endif
4262}
4263  
4264static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x) {
4265#if __FAST_MATH__
4266  return simd_fast_rsqrt(x);
4267#else
4268  return simd_precise_rsqrt(x);
4269#endif
4270}
4271  
4272static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x) {
4273#if __FAST_MATH__
4274  return simd_fast_rsqrt(x);
4275#else
4276  return simd_precise_rsqrt(x);
4277#endif
4278}
4279  
4280static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x) {
4281#if __FAST_MATH__
4282  return simd_fast_rsqrt(x);
4283#else
4284  return simd_precise_rsqrt(x);
4285#endif
4286}
4287  
4288static inline SIMD_CFUNC double simd_rsqrt(double x) {
4289#if __FAST_MATH__
4290  return simd_fast_rsqrt(x);
4291#else
4292  return simd_precise_rsqrt(x);
4293#endif
4294}
4295  
4296static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x) {
4297#if __FAST_MATH__
4298  return simd_fast_rsqrt(x);
4299#else
4300  return simd_precise_rsqrt(x);
4301#endif
4302}
4303  
4304static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x) {
4305#if __FAST_MATH__
4306  return simd_fast_rsqrt(x);
4307#else
4308  return simd_precise_rsqrt(x);
4309#endif
4310}
4311  
4312static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x) {
4313#if __FAST_MATH__
4314  return simd_fast_rsqrt(x);
4315#else
4316  return simd_precise_rsqrt(x);
4317#endif
4318}
4319  
4320static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x) {
4321#if __FAST_MATH__
4322  return simd_fast_rsqrt(x);
4323#else
4324  return simd_precise_rsqrt(x);
4325#endif
4326}
4327  
4328static inline SIMD_CFUNC _Float16 simd_fast_rsqrt(_Float16 x) {
4329#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4330  return simd_fast_rsqrt(simd_make_half4_undef(x)).x;
4331#else
4332  return simd_precise_rsqrt(x);
4333#endif
4334}
4335
4336static inline SIMD_CFUNC simd_half2 simd_fast_rsqrt(simd_half2 x) {
4337  return simd_fast_rsqrt(simd_make_half4_undef(x)).lo;
4338}
4339
4340static inline SIMD_CFUNC simd_half3 simd_fast_rsqrt(simd_half3 x) {
4341  return simd_make_half3(simd_fast_rsqrt(simd_make_half4_undef(x)));
4342}
4343
4344static inline SIMD_CFUNC simd_half4 simd_fast_rsqrt(simd_half4 x) {
4345#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4346  simd_half4 r = vrsqrte_f16(x);
4347  return r * vrsqrts_f16(x, r*r);
4348#else
4349  return simd_precise_rsqrt(x);
4350#endif
4351}
4352
4353static inline SIMD_CFUNC simd_half8 simd_fast_rsqrt(simd_half8 x) {
4354#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4355  simd_half8 r = vrsqrteq_f16(x);
4356  return r * vrsqrtsq_f16(x, r*r);
4357#else
4358  return simd_precise_rsqrt(x);
4359#endif
4360}
4361
4362static inline SIMD_CFUNC simd_half16 simd_fast_rsqrt(simd_half16 x) {
4363  return simd_make_half16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4364}
4365
4366static inline SIMD_CFUNC simd_half32 simd_fast_rsqrt(simd_half32 x) {
4367  return simd_make_half32(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4368}
4369
4370static inline SIMD_CFUNC float simd_fast_rsqrt(float x) {
4371#if defined __AVX512VL__
4372  simd_float4 x4 = simd_make_float4(x);
4373  return ((simd_float4)_mm_rsqrt14_ss(x4, x4)).x;
4374#elif defined __SSE__
4375  return ((simd_float4)_mm_rsqrt_ss(simd_make_float4(x))).x;
4376#elif defined __ARM_NEON
4377  return simd_fast_rsqrt(simd_make_float2_undef(x)).x;
4378#else
4379  return simd_precise_rsqrt(x);
4380#endif
4381}
4382
4383static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x) {
4384#if defined __SSE__
4385  return simd_make_float2(simd_fast_rsqrt(simd_make_float4_undef(x)));
4386#elif defined __ARM_NEON
4387  simd_float2 r = vrsqrte_f32(x);
4388  return r * vrsqrts_f32(x, r*r);
4389#else
4390  return simd_precise_rsqrt(x);
4391#endif
4392}
4393
4394static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x) {
4395  return simd_make_float3(simd_fast_rsqrt(simd_make_float4_undef(x)));
4396}
4397
4398static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x) {
4399#if defined __AVX512VL__
4400  return _mm_rsqrt14_ps(x);
4401#elif defined __SSE__
4402  return _mm_rsqrt_ps(x);
4403#elif defined __ARM_NEON
4404  simd_float4 r = vrsqrteq_f32(x);
4405  return r * vrsqrtsq_f32(x, r*r);
4406#else
4407  return simd_precise_rsqrt(x);
4408#endif
4409}
4410
4411static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x) {
4412#if defined __AVX512VL__
4413  return _mm256_rsqrt14_ps(x);
4414#elif defined __AVX__
4415  return _mm256_rsqrt_ps(x);
4416#else
4417  return simd_make_float8(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4418#endif
4419}
4420
4421static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x) {
4422#if defined __AVX512F__
4423  return _mm512_rsqrt14_ps(x);
4424#else
4425  return simd_make_float16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4426#endif
4427}
4428
4429static inline SIMD_CFUNC double simd_fast_rsqrt(double x) {
4430  return simd_precise_rsqrt(x);
4431}
4432
4433static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x) {
4434  return simd_precise_rsqrt(x);
4435}
4436
4437static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x) {
4438  return simd_precise_rsqrt(x);
4439}
4440
4441static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x) {
4442  return simd_precise_rsqrt(x);
4443}
4444
4445static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x) {
4446  return simd_precise_rsqrt(x);
4447}
4448
4449static inline SIMD_CFUNC _Float16 simd_precise_rsqrt(_Float16 x) {
4450#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4451  return simd_precise_rsqrt(simd_make_half4_undef(x)).x;
4452#else
4453  return 1/__sqrtf16(x);
4454#endif
4455}
4456  
4457static inline SIMD_CFUNC simd_half2 simd_precise_rsqrt(simd_half2 x) {
4458  return simd_precise_rsqrt(simd_make_half4_undef(x)).lo;
4459}
4460  
4461static inline SIMD_CFUNC simd_half3 simd_precise_rsqrt(simd_half3 x) {
4462  return simd_make_half3(simd_precise_rsqrt(simd_make_half4_undef(x)));
4463}
4464  
4465static inline SIMD_CFUNC simd_half4 simd_precise_rsqrt(simd_half4 x) {
4466#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4467  simd_half4 r = simd_fast_rsqrt(x);
4468  return r*vrsqrts_f16(x, r*r);
4469#else
4470  return 1/__tg_sqrt(x);
4471#endif
4472}
4473  
4474static inline SIMD_CFUNC simd_half8 simd_precise_rsqrt(simd_half8 x) {
4475#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4476  simd_half8 r = simd_fast_rsqrt(x);
4477  return r*vrsqrtsq_f16(x, r*r);
4478#else
4479  return 1/__tg_sqrt(x);
4480#endif
4481}
4482  
4483static inline SIMD_CFUNC simd_half16 simd_precise_rsqrt(simd_half16 x) {
4484  return simd_make_half16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4485}
4486  
4487static inline SIMD_CFUNC simd_half32 simd_precise_rsqrt(simd_half32 x) {
4488  return simd_make_half32(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4489}
4490  
4491static inline SIMD_CFUNC float simd_precise_rsqrt(float x) {
4492#if defined __SSE__
4493  float r = simd_fast_rsqrt(x);
4494  return r*(1.5f - 0.5f*(r == INFINITY ? -INFINITY : x)*r*r);
4495#elif defined __ARM_NEON
4496  return simd_precise_rsqrt(simd_make_float2_undef(x)).x;
4497#else
4498  return 1/sqrtf(x);
4499#endif
4500}
4501  
4502static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x) {
4503#if defined __SSE__
4504  return simd_make_float2(simd_precise_rsqrt(simd_make_float4_undef(x)));
4505#elif defined __ARM_NEON
4506  simd_float2 r = simd_fast_rsqrt(x);
4507  return r*vrsqrts_f32(x, r*r);
4508#else
4509  return 1/__tg_sqrt(x);
4510#endif
4511}
4512  
4513static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x) {
4514  return simd_make_float3(simd_precise_rsqrt(simd_make_float4_undef(x)));
4515}
4516  
4517static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x) {
4518#if defined __SSE__
4519  simd_float4 r = simd_fast_rsqrt(x);
4520  return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
4521#elif defined __ARM_NEON
4522  simd_float4 r = simd_fast_rsqrt(x);
4523  return r*vrsqrtsq_f32(x, r*r);
4524#else
4525  return 1/__tg_sqrt(x);
4526#endif
4527}
4528  
4529static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x) {
4530#if defined __AVX__
4531  simd_float8 r = simd_fast_rsqrt(x);
4532  return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
4533#else
4534  return simd_make_float8(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4535#endif
4536}
4537  
4538static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x) {
4539#if defined __AVX512F__
4540  simd_float16 r = simd_fast_rsqrt(x);
4541  return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
4542#else
4543  return simd_make_float16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4544#endif
4545}
4546  
4547static inline SIMD_CFUNC double simd_precise_rsqrt(double x) {
4548  return 1/sqrt(x);
4549}
4550  
4551static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x) {
4552  return 1/__tg_sqrt(x);
4553}
4554  
4555static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x) {
4556  return 1/__tg_sqrt(x);
4557}
4558  
4559static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x) {
4560  return 1/__tg_sqrt(x);
4561}
4562  
4563static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x) {
4564  return 1/__tg_sqrt(x);
4565}
4566  
4567static inline SIMD_CFUNC _Float16 simd_fract(_Float16 x) {
4568  return __fminf16(x - __floorf16(x), 0x1.ffcp-1f16);
4569}
4570
4571static inline SIMD_CFUNC simd_half2 simd_fract(simd_half2 x) {
4572  return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4573}
4574
4575static inline SIMD_CFUNC simd_half3 simd_fract(simd_half3 x) {
4576  return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4577}
4578
4579static inline SIMD_CFUNC simd_half4 simd_fract(simd_half4 x) {
4580  return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4581}
4582
4583static inline SIMD_CFUNC simd_half8 simd_fract(simd_half8 x) {
4584  return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4585}
4586
4587static inline SIMD_CFUNC simd_half16 simd_fract(simd_half16 x) {
4588  return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4589}
4590
4591static inline SIMD_CFUNC simd_half32 simd_fract(simd_half32 x) {
4592  return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4593}
4594
4595static inline SIMD_CFUNC float simd_fract(float x) {
4596  return fminf(x - floorf(x), 0x1.fffffep-1f);
4597}
4598
4599static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x) {
4600  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4601}
4602
4603static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x) {
4604  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4605}
4606
4607static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x) {
4608  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4609}
4610
4611static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x) {
4612  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4613}
4614
4615static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x) {
4616  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4617}
4618
4619static inline SIMD_CFUNC double simd_fract(double x) {
4620  return fmin(x - floor(x), 0x1.fffffffffffffp-1);
4621}
4622
4623static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x) {
4624  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4625}
4626
4627static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x) {
4628  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4629}
4630
4631static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x) {
4632  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4633}
4634
4635static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x) {
4636  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4637}
4638
4639static inline SIMD_CFUNC _Float16 simd_step(_Float16 edge, _Float16 x) {
4640  return !(x < edge);
4641}
4642
4643static inline SIMD_CFUNC simd_half2 simd_step(simd_half2 edge, simd_half2 x) {
4644  return simd_bitselect((simd_half2)1, 0, x < edge);
4645}
4646
4647static inline SIMD_CFUNC simd_half3 simd_step(simd_half3 edge, simd_half3 x) {
4648  return simd_bitselect((simd_half3)1, 0, x < edge);
4649}
4650
4651static inline SIMD_CFUNC simd_half4 simd_step(simd_half4 edge, simd_half4 x) {
4652  return simd_bitselect((simd_half4)1, 0, x < edge);
4653}
4654
4655static inline SIMD_CFUNC simd_half8 simd_step(simd_half8 edge, simd_half8 x) {
4656  return simd_bitselect((simd_half8)1, 0, x < edge);
4657}
4658
4659static inline SIMD_CFUNC simd_half16 simd_step(simd_half16 edge, simd_half16 x) {
4660  return simd_bitselect((simd_half16)1, 0, x < edge);
4661}
4662
4663static inline SIMD_CFUNC simd_half32 simd_step(simd_half32 edge, simd_half32 x) {
4664  return simd_bitselect((simd_half32)1, 0, x < edge);
4665}
4666
4667static inline SIMD_CFUNC float simd_step(float edge, float x) {
4668  return !(x < edge);
4669}
4670
4671static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x) {
4672  return simd_bitselect((simd_float2)1, 0, x < edge);
4673}
4674
4675static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x) {
4676  return simd_bitselect((simd_float3)1, 0, x < edge);
4677}
4678
4679static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x) {
4680  return simd_bitselect((simd_float4)1, 0, x < edge);
4681}
4682
4683static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x) {
4684  return simd_bitselect((simd_float8)1, 0, x < edge);
4685}
4686
4687static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x) {
4688  return simd_bitselect((simd_float16)1, 0, x < edge);
4689}
4690
4691static inline SIMD_CFUNC double simd_step(double edge, double x) {
4692  return !(x < edge);
4693}
4694
4695static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x) {
4696  return simd_bitselect((simd_double2)1, 0, x < edge);
4697}
4698
4699static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x) {
4700  return simd_bitselect((simd_double3)1, 0, x < edge);
4701}
4702
4703static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x) {
4704  return simd_bitselect((simd_double4)1, 0, x < edge);
4705}
4706
4707static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x) {
4708  return simd_bitselect((simd_double8)1, 0, x < edge);
4709}
4710
4711static inline SIMD_CFUNC _Float16 simd_smoothstep(_Float16 edge0, _Float16 edge1, _Float16 x) {
4712  _Float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4713  return t*t*(3 - 2*t);
4714}
4715
4716static inline SIMD_CFUNC simd_half2 simd_smoothstep(simd_half2 edge0, simd_half2 edge1, simd_half2 x) {
4717  simd_half2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4718  return t*t*(3 - 2*t);
4719}
4720
4721static inline SIMD_CFUNC simd_half3 simd_smoothstep(simd_half3 edge0, simd_half3 edge1, simd_half3 x) {
4722  simd_half3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4723  return t*t*(3 - 2*t);
4724}
4725
4726static inline SIMD_CFUNC simd_half4 simd_smoothstep(simd_half4 edge0, simd_half4 edge1, simd_half4 x) {
4727  simd_half4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4728  return t*t*(3 - 2*t);
4729}
4730
4731static inline SIMD_CFUNC simd_half8 simd_smoothstep(simd_half8 edge0, simd_half8 edge1, simd_half8 x) {
4732  simd_half8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4733  return t*t*(3 - 2*t);
4734}
4735
4736static inline SIMD_CFUNC simd_half16 simd_smoothstep(simd_half16 edge0, simd_half16 edge1, simd_half16 x) {
4737  simd_half16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4738  return t*t*(3 - 2*t);
4739}
4740
4741static inline SIMD_CFUNC simd_half32 simd_smoothstep(simd_half32 edge0, simd_half32 edge1, simd_half32 x) {
4742  simd_half32 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4743  return t*t*(3 - 2*t);
4744}
4745
4746static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x) {
4747  float t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4748  return t*t*(3 - 2*t);
4749}
4750
4751static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x) {
4752  simd_float2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4753  return t*t*(3 - 2*t);
4754}
4755
4756static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x) {
4757  simd_float3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4758  return t*t*(3 - 2*t);
4759}
4760
4761static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x) {
4762  simd_float4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4763  return t*t*(3 - 2*t);
4764}
4765
4766static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x) {
4767  simd_float8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4768  return t*t*(3 - 2*t);
4769}
4770
4771static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x) {
4772  simd_float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4773  return t*t*(3 - 2*t);
4774}
4775
4776static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x) {
4777  double t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4778  return t*t*(3 - 2*t);
4779}
4780
4781static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x) {
4782  simd_double2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4783  return t*t*(3 - 2*t);
4784}
4785
4786static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x) {
4787  simd_double3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4788  return t*t*(3 - 2*t);
4789}
4790
4791static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x) {
4792  simd_double4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4793  return t*t*(3 - 2*t);
4794}
4795
4796static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x) {
4797  simd_double8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4798  return t*t*(3 - 2*t);
4799}
4800
4801static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x) {
4802  return x.x + x.y;
4803}
4804
4805static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x) {
4806  return x.x + x.y + x.z;
4807}
4808
4809static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x) {
4810  return simd_reduce_add(x.lo + x.hi);
4811}
4812
4813static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x) {
4814  return simd_reduce_add(x.lo + x.hi);
4815}
4816
4817static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x) {
4818#if defined __arm64__ || defined __aarch64__
4819  return vaddvq_s8(x);
4820#else
4821  return simd_reduce_add(x.lo + x.hi);
4822#endif
4823}
4824
4825static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x) {
4826  return simd_reduce_add(x.lo + x.hi);
4827}
4828
4829static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x) {
4830  return simd_reduce_add(x.lo + x.hi);
4831}
4832
4833static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x) {
4834  return x.x + x.y;
4835}
4836
4837static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x) {
4838  return x.x + x.y + x.z;
4839}
4840
4841static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x) {
4842  return simd_reduce_add(x.lo + x.hi);
4843}
4844
4845static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x) {
4846  return simd_reduce_add(x.lo + x.hi);
4847}
4848
4849static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x) {
4850#if defined __arm64__ || defined __aarch64__
4851  return vaddvq_u8(x);
4852#else
4853  return simd_reduce_add(x.lo + x.hi);
4854#endif
4855}
4856
4857static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x) {
4858  return simd_reduce_add(x.lo + x.hi);
4859}
4860
4861static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x) {
4862  return simd_reduce_add(x.lo + x.hi);
4863}
4864
4865static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x) {
4866  return x.x + x.y;
4867}
4868
4869static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x) {
4870  return x.x + x.y + x.z;
4871}
4872
4873static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x) {
4874  return simd_reduce_add(x.lo + x.hi);
4875}
4876
4877static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x) {
4878#if defined __arm64__ || defined __aarch64__
4879  return vaddvq_s16(x);
4880#else
4881  return simd_reduce_add(x.lo + x.hi);
4882#endif
4883}
4884
4885static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x) {
4886  return simd_reduce_add(x.lo + x.hi);
4887}
4888
4889static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x) {
4890  return simd_reduce_add(x.lo + x.hi);
4891}
4892
4893static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x) {
4894  return x.x + x.y;
4895}
4896
4897static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x) {
4898  return x.x + x.y + x.z;
4899}
4900
4901static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x) {
4902  return simd_reduce_add(x.lo + x.hi);
4903}
4904
4905static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x) {
4906#if defined __arm64__ || defined __aarch64__
4907  return vaddvq_u16(x);
4908#else
4909  return simd_reduce_add(x.lo + x.hi);
4910#endif
4911}
4912
4913static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x) {
4914  return simd_reduce_add(x.lo + x.hi);
4915}
4916
4917static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x) {
4918  return simd_reduce_add(x.lo + x.hi);
4919}
4920
4921static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x) {
4922  return x.x + x.y;
4923}
4924
4925static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x) {
4926  return x.x + x.y + x.z;
4927}
4928
4929static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x) {
4930#if defined __arm64__ || defined __aarch64__
4931  return vaddvq_s32(x);
4932#else
4933  return simd_reduce_add(x.lo + x.hi);
4934#endif
4935}
4936
4937static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x) {
4938  return simd_reduce_add(x.lo + x.hi);
4939}
4940
4941static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x) {
4942  return simd_reduce_add(x.lo + x.hi);
4943}
4944
4945static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x) {
4946  return x.x + x.y;
4947}
4948
4949static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x) {
4950  return x.x + x.y + x.z;
4951}
4952
4953static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x) {
4954#if defined __arm64__ || defined __aarch64__
4955  return vaddvq_u32(x);
4956#else
4957  return simd_reduce_add(x.lo + x.hi);
4958#endif
4959}
4960
4961static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x) {
4962  return simd_reduce_add(x.lo + x.hi);
4963}
4964
4965static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x) {
4966  return simd_reduce_add(x.lo + x.hi);
4967}
4968
4969static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x) {
4970  return x.x + x.y;
4971}
4972
4973static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x) {
4974  return x.x + x.y + x.z;
4975}
4976
4977static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x) {
4978  return simd_reduce_add(x.lo + x.hi);
4979}
4980
4981static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x) {
4982  return simd_reduce_add(x.lo + x.hi);
4983}
4984
4985static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x) {
4986  return x.x + x.y;
4987}
4988
4989static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x) {
4990  return x.x + x.y + x.z;
4991}
4992
4993static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x) {
4994  return simd_reduce_add(x.lo + x.hi);
4995}
4996
4997static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x) {
4998  return simd_reduce_add(x.lo + x.hi);
4999}
5000
5001static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half2 x) {
5002  return x.x + x.y;
5003}
5004
5005static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half3 x) {
5006  return x.x + x.y + x.z;
5007}
5008
5009static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half4 x) {
5010  return simd_reduce_add(x.lo + x.hi);
5011}
5012
5013static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half8 x) {
5014  return simd_reduce_add(x.lo + x.hi);
5015}
5016
5017static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half16 x) {
5018  return simd_reduce_add(x.lo + x.hi);
5019}
5020
5021static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half32 x) {
5022  return simd_reduce_add(x.lo + x.hi);
5023}
5024
5025static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x) {
5026  return x.x + x.y;
5027}
5028
5029static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x) {
5030  return x.x + x.y + x.z;
5031}
5032
5033static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x) {
5034  return simd_reduce_add(x.lo + x.hi);
5035}
5036
5037static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x) {
5038  return simd_reduce_add(x.lo + x.hi);
5039}
5040
5041static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x) {
5042  return simd_reduce_add(x.lo + x.hi);
5043}
5044
5045static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x) {
5046  return x.x + x.y;
5047}
5048
5049static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x) {
5050  return x.x + x.y + x.z;
5051}
5052
5053static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x) {
5054  return simd_reduce_add(x.lo + x.hi);
5055}
5056
5057static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x) {
5058  return simd_reduce_add(x.lo + x.hi);
5059}
5060
5061static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x) {
5062  return x.y < x.x ? x.y : x.x;
5063}
5064
5065static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x) {
5066  char t = x.z < x.x ? x.z : x.x;
5067  return x.y < t ? x.y : t;
5068}
5069
5070static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x) {
5071  return simd_reduce_min(simd_min(x.lo, x.hi));
5072}
5073
5074static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x) {
5075  return simd_reduce_min(simd_min(x.lo, x.hi));
5076}
5077
5078static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x) {
5079#if defined __arm64__ || defined __aarch64__
5080  return vminvq_s8(x);
5081#else
5082  return simd_reduce_min(simd_min(x.lo, x.hi));
5083#endif
5084}
5085
5086static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x) {
5087  return simd_reduce_min(simd_min(x.lo, x.hi));
5088}
5089
5090static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x) {
5091  return simd_reduce_min(simd_min(x.lo, x.hi));
5092}
5093
5094static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x) {
5095  return x.y < x.x ? x.y : x.x;
5096}
5097
5098static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x) {
5099  unsigned char t = x.z < x.x ? x.z : x.x;
5100  return x.y < t ? x.y : t;
5101}
5102
5103static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x) {
5104  return simd_reduce_min(simd_min(x.lo, x.hi));
5105}
5106
5107static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x) {
5108  return simd_reduce_min(simd_min(x.lo, x.hi));
5109}
5110
5111static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x) {
5112#if defined __arm64__ || defined __aarch64__
5113  return vminvq_u8(x);
5114#else
5115  return simd_reduce_min(simd_min(x.lo, x.hi));
5116#endif
5117}
5118
5119static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x) {
5120  return simd_reduce_min(simd_min(x.lo, x.hi));
5121}
5122
5123static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x) {
5124  return simd_reduce_min(simd_min(x.lo, x.hi));
5125}
5126
5127static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x) {
5128  return x.y < x.x ? x.y : x.x;
5129}
5130
5131static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x) {
5132  short t = x.z < x.x ? x.z : x.x;
5133  return x.y < t ? x.y : t;
5134}
5135
5136static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x) {
5137  return simd_reduce_min(simd_min(x.lo, x.hi));
5138}
5139
5140static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x) {
5141#if defined __arm64__ || defined __aarch64__
5142  return vminvq_s16(x);
5143#else
5144  return simd_reduce_min(simd_min(x.lo, x.hi));
5145#endif
5146}
5147
5148static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x) {
5149  return simd_reduce_min(simd_min(x.lo, x.hi));
5150}
5151
5152static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x) {
5153  return simd_reduce_min(simd_min(x.lo, x.hi));
5154}
5155
5156static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x) {
5157  return x.y < x.x ? x.y : x.x;
5158}
5159
5160static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x) {
5161  unsigned short t = x.z < x.x ? x.z : x.x;
5162  return x.y < t ? x.y : t;
5163}
5164
5165static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x) {
5166  return simd_reduce_min(simd_min(x.lo, x.hi));
5167}
5168
5169static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x) {
5170#if defined __arm64__ || defined __aarch64__
5171  return vminvq_u16(x);
5172#else
5173  return simd_reduce_min(simd_min(x.lo, x.hi));
5174#endif
5175}
5176
5177static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x) {
5178  return simd_reduce_min(simd_min(x.lo, x.hi));
5179}
5180
5181static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x) {
5182  return simd_reduce_min(simd_min(x.lo, x.hi));
5183}
5184
5185static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x) {
5186  return x.y < x.x ? x.y : x.x;
5187}
5188
5189static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x) {
5190  int t = x.z < x.x ? x.z : x.x;
5191  return x.y < t ? x.y : t;
5192}
5193
5194static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x) {
5195#if defined __arm64__ || defined __aarch64__
5196  return vminvq_s32(x);
5197#else
5198  return simd_reduce_min(simd_min(x.lo, x.hi));
5199#endif
5200}
5201
5202static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x) {
5203  return simd_reduce_min(simd_min(x.lo, x.hi));
5204}
5205
5206static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x) {
5207  return simd_reduce_min(simd_min(x.lo, x.hi));
5208}
5209
5210static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x) {
5211  return x.y < x.x ? x.y : x.x;
5212}
5213
5214static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x) {
5215  unsigned int t = x.z < x.x ? x.z : x.x;
5216  return x.y < t ? x.y : t;
5217}
5218
5219static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x) {
5220#if defined __arm64__ || defined __aarch64__
5221  return vminvq_u32(x);
5222#else
5223  return simd_reduce_min(simd_min(x.lo, x.hi));
5224#endif
5225}
5226
5227static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x) {
5228  return simd_reduce_min(simd_min(x.lo, x.hi));
5229}
5230
5231static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x) {
5232  return simd_reduce_min(simd_min(x.lo, x.hi));
5233}
5234
5235static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x) {
5236  return x.y < x.x ? x.y : x.x;
5237}
5238
5239static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x) {
5240  simd_long1 t = x.z < x.x ? x.z : x.x;
5241  return x.y < t ? x.y : t;
5242}
5243
5244static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x) {
5245  return simd_reduce_min(simd_min(x.lo, x.hi));
5246}
5247
5248static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x) {
5249  return simd_reduce_min(simd_min(x.lo, x.hi));
5250}
5251
5252static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x) {
5253  return x.y < x.x ? x.y : x.x;
5254}
5255
5256static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x) {
5257  simd_ulong1 t = x.z < x.x ? x.z : x.x;
5258  return x.y < t ? x.y : t;
5259}
5260
5261static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x) {
5262  return simd_reduce_min(simd_min(x.lo, x.hi));
5263}
5264
5265static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x) {
5266  return simd_reduce_min(simd_min(x.lo, x.hi));
5267}
5268
5269static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half2 x) {
5270  return __fminf16(x.x, x.y);
5271}
5272
5273static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half3 x) {
5274  return __fminf16(__fminf16(x.x, x.z), x.y);
5275}
5276
5277static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half4 x) {
5278  return simd_reduce_min(simd_min(x.lo, x.hi));
5279}
5280
5281static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half8 x) {
5282  return simd_reduce_min(simd_min(x.lo, x.hi));
5283}
5284
5285static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half16 x) {
5286  return simd_reduce_min(simd_min(x.lo, x.hi));
5287}
5288
5289static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half32 x) {
5290  return simd_reduce_min(simd_min(x.lo, x.hi));
5291}
5292
5293static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x) {
5294  return fmin(x.x, x.y);
5295}
5296
5297static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x) {
5298  return fmin(fmin(x.x, x.z), x.y);
5299}
5300
5301static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x) {
5302#if defined __arm64__ || defined __aarch64__
5303  return vminvq_f32(x);
5304#else
5305  return simd_reduce_min(simd_min(x.lo, x.hi));
5306#endif
5307}
5308
5309static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x) {
5310  return simd_reduce_min(simd_min(x.lo, x.hi));
5311}
5312
5313static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x) {
5314  return simd_reduce_min(simd_min(x.lo, x.hi));
5315}
5316
5317static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x) {
5318#if defined __arm64__ || defined __aarch64__
5319  return vminvq_f64(x);
5320#else
5321  return fmin(x.x, x.y);
5322#endif
5323}
5324
5325static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x) {
5326  return fmin(fmin(x.x, x.z), x.y);
5327}
5328
5329static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x) {
5330  return simd_reduce_min(simd_min(x.lo, x.hi));
5331}
5332
5333static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x) {
5334  return simd_reduce_min(simd_min(x.lo, x.hi));
5335}
5336
5337static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x) {
5338  return x.y > x.x ? x.y : x.x;
5339}
5340
5341static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x) {
5342  char t = x.z > x.x ? x.z : x.x;
5343  return x.y > t ? x.y : t;
5344}
5345
5346static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x) {
5347  return simd_reduce_max(simd_max(x.lo, x.hi));
5348}
5349
5350static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x) {
5351  return simd_reduce_max(simd_max(x.lo, x.hi));
5352}
5353
5354static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x) {
5355#if defined __arm64__ || defined __aarch64__
5356  return vmaxvq_s8(x);
5357#else
5358  return simd_reduce_max(simd_max(x.lo, x.hi));
5359#endif
5360}
5361
5362static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x) {
5363  return simd_reduce_max(simd_max(x.lo, x.hi));
5364}
5365
5366static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x) {
5367  return simd_reduce_max(simd_max(x.lo, x.hi));
5368}
5369
5370static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x) {
5371  return x.y > x.x ? x.y : x.x;
5372}
5373
5374static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x) {
5375  unsigned char t = x.z > x.x ? x.z : x.x;
5376  return x.y > t ? x.y : t;
5377}
5378
5379static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x) {
5380  return simd_reduce_max(simd_max(x.lo, x.hi));
5381}
5382
5383static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x) {
5384  return simd_reduce_max(simd_max(x.lo, x.hi));
5385}
5386
5387static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x) {
5388#if defined __arm64__ || defined __aarch64__
5389  return vmaxvq_u8(x);
5390#else
5391  return simd_reduce_max(simd_max(x.lo, x.hi));
5392#endif
5393}
5394
5395static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x) {
5396  return simd_reduce_max(simd_max(x.lo, x.hi));
5397}
5398
5399static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x) {
5400  return simd_reduce_max(simd_max(x.lo, x.hi));
5401}
5402
5403static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x) {
5404  return x.y > x.x ? x.y : x.x;
5405}
5406
5407static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x) {
5408  short t = x.z > x.x ? x.z : x.x;
5409  return x.y > t ? x.y : t;
5410}
5411
5412static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x) {
5413  return simd_reduce_max(simd_max(x.lo, x.hi));
5414}
5415
5416static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x) {
5417#if defined __arm64__ || defined __aarch64__
5418  return vmaxvq_s16(x);
5419#else
5420  return simd_reduce_max(simd_max(x.lo, x.hi));
5421#endif
5422}
5423
5424static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x) {
5425  return simd_reduce_max(simd_max(x.lo, x.hi));
5426}
5427
5428static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x) {
5429  return simd_reduce_max(simd_max(x.lo, x.hi));
5430}
5431
5432static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x) {
5433  return x.y > x.x ? x.y : x.x;
5434}
5435
5436static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x) {
5437  unsigned short t = x.z > x.x ? x.z : x.x;
5438  return x.y > t ? x.y : t;
5439}
5440
5441static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x) {
5442  return simd_reduce_max(simd_max(x.lo, x.hi));
5443}
5444
5445static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x) {
5446#if defined __arm64__ || defined __aarch64__
5447  return vmaxvq_u16(x);
5448#else
5449  return simd_reduce_max(simd_max(x.lo, x.hi));
5450#endif
5451}
5452
5453static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x) {
5454  return simd_reduce_max(simd_max(x.lo, x.hi));
5455}
5456
5457static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x) {
5458  return simd_reduce_max(simd_max(x.lo, x.hi));
5459}
5460
5461static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x) {
5462  return x.y > x.x ? x.y : x.x;
5463}
5464
5465static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x) {
5466  int t = x.z > x.x ? x.z : x.x;
5467  return x.y > t ? x.y : t;
5468}
5469
5470static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x) {
5471#if defined __arm64__ || defined __aarch64__
5472  return vmaxvq_s32(x);
5473#else
5474  return simd_reduce_max(simd_max(x.lo, x.hi));
5475#endif
5476}
5477
5478static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x) {
5479  return simd_reduce_max(simd_max(x.lo, x.hi));
5480}
5481
5482static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x) {
5483  return simd_reduce_max(simd_max(x.lo, x.hi));
5484}
5485
5486static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x) {
5487  return x.y > x.x ? x.y : x.x;
5488}
5489
5490static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x) {
5491  unsigned int t = x.z > x.x ? x.z : x.x;
5492  return x.y > t ? x.y : t;
5493}
5494
5495static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x) {
5496#if defined __arm64__ || defined __aarch64__
5497  return vmaxvq_u32(x);
5498#else
5499  return simd_reduce_max(simd_max(x.lo, x.hi));
5500#endif
5501}
5502
5503static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x) {
5504  return simd_reduce_max(simd_max(x.lo, x.hi));
5505}
5506
5507static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x) {
5508  return simd_reduce_max(simd_max(x.lo, x.hi));
5509}
5510
5511static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x) {
5512  return x.y > x.x ? x.y : x.x;
5513}
5514
5515static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x) {
5516  simd_long1 t = x.z > x.x ? x.z : x.x;
5517  return x.y > t ? x.y : t;
5518}
5519
5520static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x) {
5521  return simd_reduce_max(simd_max(x.lo, x.hi));
5522}
5523
5524static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x) {
5525  return simd_reduce_max(simd_max(x.lo, x.hi));
5526}
5527
5528static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x) {
5529  return x.y > x.x ? x.y : x.x;
5530}
5531
5532static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x) {
5533  simd_ulong1 t = x.z > x.x ? x.z : x.x;
5534  return x.y > t ? x.y : t;
5535}
5536
5537static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x) {
5538  return simd_reduce_max(simd_max(x.lo, x.hi));
5539}
5540
5541static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x) {
5542  return simd_reduce_max(simd_max(x.lo, x.hi));
5543}
5544
5545static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half2 x) {
5546  return __fmaxf16(x.x, x.y);
5547}
5548
5549static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half3 x) {
5550  return __fmaxf16(__fmaxf16(x.x, x.z), x.y);
5551}
5552
5553static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half4 x) {
5554  return simd_reduce_max(simd_max(x.lo, x.hi));
5555}
5556
5557static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half8 x) {
5558  return simd_reduce_max(simd_max(x.lo, x.hi));
5559}
5560
5561static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half16 x) {
5562  return simd_reduce_max(simd_max(x.lo, x.hi));
5563}
5564
5565static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half32 x) {
5566  return simd_reduce_max(simd_max(x.lo, x.hi));
5567}
5568
5569static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x) {
5570  return fmax(x.x, x.y);
5571}
5572
5573static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x) {
5574  return fmax(fmax(x.x, x.z), x.y);
5575}
5576
5577static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x) {
5578#if defined __arm64__ || defined __aarch64__
5579  return vmaxvq_f32(x);
5580#else
5581  return simd_reduce_max(simd_max(x.lo, x.hi));
5582#endif
5583}
5584
5585static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x) {
5586  return simd_reduce_max(simd_max(x.lo, x.hi));
5587}
5588
5589static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x) {
5590  return simd_reduce_max(simd_max(x.lo, x.hi));
5591}
5592
5593static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x) {
5594#if defined __arm64__ || defined __aarch64__
5595  return vmaxvq_f64(x);
5596#else
5597  return fmax(x.x, x.y);
5598#endif
5599}
5600
5601static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x) {
5602  return fmax(fmax(x.x, x.z), x.y);
5603}
5604
5605static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x) {
5606  return simd_reduce_max(simd_max(x.lo, x.hi));
5607}
5608
5609static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x) {
5610  return simd_reduce_max(simd_max(x.lo, x.hi));
5611}
5612
5613#ifdef __cplusplus
5614}
5615#endif
5616#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */
5617#endif /* SIMD_COMMON_HEADER */