master
   1/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
   2 *
   3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 * See https://llvm.org/LICENSE.txt for license information.
   5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 *
   7 *===-----------------------------------------------------------------------===
   8 */
   9
  10#ifndef __IMMINTRIN_H
  11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
  12#endif
  13
  14#ifndef __AVX512VLBWINTRIN_H
  15#define __AVX512VLBWINTRIN_H
  16
  17/* Define the default attributes for the functions in this file. */
  18#define __DEFAULT_FN_ATTRS128                                                  \
  19  __attribute__((__always_inline__, __nodebug__,                               \
  20                 __target__("avx512vl,avx512bw,no-evex512"),                   \
  21                 __min_vector_width__(128)))
  22#define __DEFAULT_FN_ATTRS256                                                  \
  23  __attribute__((__always_inline__, __nodebug__,                               \
  24                 __target__("avx512vl,avx512bw,no-evex512"),                   \
  25                 __min_vector_width__(256)))
  26
  27/* Integer compare */
  28
  29#define _mm_cmp_epi8_mask(a, b, p) \
  30  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
  31                                          (__v16qi)(__m128i)(b), (int)(p), \
  32                                          (__mmask16)-1))
  33
  34#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
  35  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
  36                                          (__v16qi)(__m128i)(b), (int)(p), \
  37                                          (__mmask16)(m)))
  38
  39#define _mm_cmp_epu8_mask(a, b, p) \
  40  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
  41                                           (__v16qi)(__m128i)(b), (int)(p), \
  42                                           (__mmask16)-1))
  43
  44#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
  45  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
  46                                           (__v16qi)(__m128i)(b), (int)(p), \
  47                                           (__mmask16)(m)))
  48
  49#define _mm256_cmp_epi8_mask(a, b, p) \
  50  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
  51                                          (__v32qi)(__m256i)(b), (int)(p), \
  52                                          (__mmask32)-1))
  53
  54#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
  55  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
  56                                          (__v32qi)(__m256i)(b), (int)(p), \
  57                                          (__mmask32)(m)))
  58
  59#define _mm256_cmp_epu8_mask(a, b, p) \
  60  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
  61                                           (__v32qi)(__m256i)(b), (int)(p), \
  62                                           (__mmask32)-1))
  63
  64#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
  65  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
  66                                           (__v32qi)(__m256i)(b), (int)(p), \
  67                                           (__mmask32)(m)))
  68
  69#define _mm_cmp_epi16_mask(a, b, p) \
  70  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
  71                                         (__v8hi)(__m128i)(b), (int)(p), \
  72                                         (__mmask8)-1))
  73
  74#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
  75  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
  76                                         (__v8hi)(__m128i)(b), (int)(p), \
  77                                         (__mmask8)(m)))
  78
  79#define _mm_cmp_epu16_mask(a, b, p) \
  80  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
  81                                          (__v8hi)(__m128i)(b), (int)(p), \
  82                                          (__mmask8)-1))
  83
  84#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
  85  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
  86                                          (__v8hi)(__m128i)(b), (int)(p), \
  87                                          (__mmask8)(m)))
  88
  89#define _mm256_cmp_epi16_mask(a, b, p) \
  90  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
  91                                          (__v16hi)(__m256i)(b), (int)(p), \
  92                                          (__mmask16)-1))
  93
  94#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
  95  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
  96                                          (__v16hi)(__m256i)(b), (int)(p), \
  97                                          (__mmask16)(m)))
  98
  99#define _mm256_cmp_epu16_mask(a, b, p) \
 100  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
 101                                           (__v16hi)(__m256i)(b), (int)(p), \
 102                                           (__mmask16)-1))
 103
 104#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
 105  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
 106                                           (__v16hi)(__m256i)(b), (int)(p), \
 107                                           (__mmask16)(m)))
 108
 109#define _mm_cmpeq_epi8_mask(A, B) \
 110    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
 111#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
 112    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
 113#define _mm_cmpge_epi8_mask(A, B) \
 114    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
 115#define _mm_mask_cmpge_epi8_mask(k, A, B) \
 116    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
 117#define _mm_cmpgt_epi8_mask(A, B) \
 118    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
 119#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
 120    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
 121#define _mm_cmple_epi8_mask(A, B) \
 122    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
 123#define _mm_mask_cmple_epi8_mask(k, A, B) \
 124    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
 125#define _mm_cmplt_epi8_mask(A, B) \
 126    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
 127#define _mm_mask_cmplt_epi8_mask(k, A, B) \
 128    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
 129#define _mm_cmpneq_epi8_mask(A, B) \
 130    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
 131#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
 132    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
 133
 134#define _mm256_cmpeq_epi8_mask(A, B) \
 135    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
 136#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
 137    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
 138#define _mm256_cmpge_epi8_mask(A, B) \
 139    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
 140#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
 141    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
 142#define _mm256_cmpgt_epi8_mask(A, B) \
 143    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
 144#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
 145    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
 146#define _mm256_cmple_epi8_mask(A, B) \
 147    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
 148#define _mm256_mask_cmple_epi8_mask(k, A, B) \
 149    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
 150#define _mm256_cmplt_epi8_mask(A, B) \
 151    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
 152#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
 153    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
 154#define _mm256_cmpneq_epi8_mask(A, B) \
 155    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
 156#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
 157    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
 158
 159#define _mm_cmpeq_epu8_mask(A, B) \
 160    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
 161#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
 162    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
 163#define _mm_cmpge_epu8_mask(A, B) \
 164    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
 165#define _mm_mask_cmpge_epu8_mask(k, A, B) \
 166    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
 167#define _mm_cmpgt_epu8_mask(A, B) \
 168    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
 169#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
 170    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
 171#define _mm_cmple_epu8_mask(A, B) \
 172    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
 173#define _mm_mask_cmple_epu8_mask(k, A, B) \
 174    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
 175#define _mm_cmplt_epu8_mask(A, B) \
 176    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
 177#define _mm_mask_cmplt_epu8_mask(k, A, B) \
 178    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
 179#define _mm_cmpneq_epu8_mask(A, B) \
 180    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
 181#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
 182    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
 183
 184#define _mm256_cmpeq_epu8_mask(A, B) \
 185    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
 186#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
 187    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
 188#define _mm256_cmpge_epu8_mask(A, B) \
 189    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
 190#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
 191    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
 192#define _mm256_cmpgt_epu8_mask(A, B) \
 193    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
 194#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
 195    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
 196#define _mm256_cmple_epu8_mask(A, B) \
 197    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
 198#define _mm256_mask_cmple_epu8_mask(k, A, B) \
 199    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
 200#define _mm256_cmplt_epu8_mask(A, B) \
 201    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
 202#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
 203    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
 204#define _mm256_cmpneq_epu8_mask(A, B) \
 205    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
 206#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
 207    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
 208
 209#define _mm_cmpeq_epi16_mask(A, B) \
 210    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
 211#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
 212    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
 213#define _mm_cmpge_epi16_mask(A, B) \
 214    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
 215#define _mm_mask_cmpge_epi16_mask(k, A, B) \
 216    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
 217#define _mm_cmpgt_epi16_mask(A, B) \
 218    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
 219#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
 220    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
 221#define _mm_cmple_epi16_mask(A, B) \
 222    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
 223#define _mm_mask_cmple_epi16_mask(k, A, B) \
 224    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
 225#define _mm_cmplt_epi16_mask(A, B) \
 226    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
 227#define _mm_mask_cmplt_epi16_mask(k, A, B) \
 228    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
 229#define _mm_cmpneq_epi16_mask(A, B) \
 230    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
 231#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
 232    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
 233
 234#define _mm256_cmpeq_epi16_mask(A, B) \
 235    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
 236#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
 237    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
 238#define _mm256_cmpge_epi16_mask(A, B) \
 239    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
 240#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
 241    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
 242#define _mm256_cmpgt_epi16_mask(A, B) \
 243    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
 244#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
 245    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
 246#define _mm256_cmple_epi16_mask(A, B) \
 247    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
 248#define _mm256_mask_cmple_epi16_mask(k, A, B) \
 249    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
 250#define _mm256_cmplt_epi16_mask(A, B) \
 251    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
 252#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
 253    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
 254#define _mm256_cmpneq_epi16_mask(A, B) \
 255    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
 256#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
 257    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
 258
 259#define _mm_cmpeq_epu16_mask(A, B) \
 260    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
 261#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
 262    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
 263#define _mm_cmpge_epu16_mask(A, B) \
 264    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
 265#define _mm_mask_cmpge_epu16_mask(k, A, B) \
 266    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
 267#define _mm_cmpgt_epu16_mask(A, B) \
 268    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
 269#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
 270    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
 271#define _mm_cmple_epu16_mask(A, B) \
 272    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
 273#define _mm_mask_cmple_epu16_mask(k, A, B) \
 274    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
 275#define _mm_cmplt_epu16_mask(A, B) \
 276    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
 277#define _mm_mask_cmplt_epu16_mask(k, A, B) \
 278    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
 279#define _mm_cmpneq_epu16_mask(A, B) \
 280    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
 281#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
 282    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 283
 284#define _mm256_cmpeq_epu16_mask(A, B) \
 285    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
 286#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
 287    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
 288#define _mm256_cmpge_epu16_mask(A, B) \
 289    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
 290#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
 291    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
 292#define _mm256_cmpgt_epu16_mask(A, B) \
 293    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
 294#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
 295    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
 296#define _mm256_cmple_epu16_mask(A, B) \
 297    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
 298#define _mm256_mask_cmple_epu16_mask(k, A, B) \
 299    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
 300#define _mm256_cmplt_epu16_mask(A, B) \
 301    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
 302#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
 303    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
 304#define _mm256_cmpneq_epu16_mask(A, B) \
 305    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
 306#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
 307    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 308
 309static __inline__ __m256i __DEFAULT_FN_ATTRS256
 310_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
 311  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 312                                             (__v32qi)_mm256_add_epi8(__A, __B),
 313                                             (__v32qi)__W);
 314}
 315
 316static __inline__ __m256i __DEFAULT_FN_ATTRS256
 317_mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
 318  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 319                                             (__v32qi)_mm256_add_epi8(__A, __B),
 320                                             (__v32qi)_mm256_setzero_si256());
 321}
 322
 323static __inline__ __m256i __DEFAULT_FN_ATTRS256
 324_mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
 325  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 326                                             (__v16hi)_mm256_add_epi16(__A, __B),
 327                                             (__v16hi)__W);
 328}
 329
 330static __inline__ __m256i __DEFAULT_FN_ATTRS256
 331_mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
 332  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 333                                             (__v16hi)_mm256_add_epi16(__A, __B),
 334                                             (__v16hi)_mm256_setzero_si256());
 335}
 336
 337static __inline__ __m256i __DEFAULT_FN_ATTRS256
 338_mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
 339  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 340                                             (__v32qi)_mm256_sub_epi8(__A, __B),
 341                                             (__v32qi)__W);
 342}
 343
 344static __inline__ __m256i __DEFAULT_FN_ATTRS256
 345_mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
 346  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 347                                             (__v32qi)_mm256_sub_epi8(__A, __B),
 348                                             (__v32qi)_mm256_setzero_si256());
 349}
 350
 351static __inline__ __m256i __DEFAULT_FN_ATTRS256
 352_mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
 353  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 354                                             (__v16hi)_mm256_sub_epi16(__A, __B),
 355                                             (__v16hi)__W);
 356}
 357
 358static __inline__ __m256i __DEFAULT_FN_ATTRS256
 359_mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
 360  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 361                                             (__v16hi)_mm256_sub_epi16(__A, __B),
 362                                             (__v16hi)_mm256_setzero_si256());
 363}
 364
 365static __inline__ __m128i __DEFAULT_FN_ATTRS128
 366_mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
 367  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 368                                             (__v16qi)_mm_add_epi8(__A, __B),
 369                                             (__v16qi)__W);
 370}
 371
 372static __inline__ __m128i __DEFAULT_FN_ATTRS128
 373_mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
 374  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 375                                             (__v16qi)_mm_add_epi8(__A, __B),
 376                                             (__v16qi)_mm_setzero_si128());
 377}
 378
 379static __inline__ __m128i __DEFAULT_FN_ATTRS128
 380_mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
 381  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 382                                             (__v8hi)_mm_add_epi16(__A, __B),
 383                                             (__v8hi)__W);
 384}
 385
 386static __inline__ __m128i __DEFAULT_FN_ATTRS128
 387_mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
 388  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 389                                             (__v8hi)_mm_add_epi16(__A, __B),
 390                                             (__v8hi)_mm_setzero_si128());
 391}
 392
 393static __inline__ __m128i __DEFAULT_FN_ATTRS128
 394_mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
 395  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 396                                             (__v16qi)_mm_sub_epi8(__A, __B),
 397                                             (__v16qi)__W);
 398}
 399
 400static __inline__ __m128i __DEFAULT_FN_ATTRS128
 401_mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
 402  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 403                                             (__v16qi)_mm_sub_epi8(__A, __B),
 404                                             (__v16qi)_mm_setzero_si128());
 405}
 406
 407static __inline__ __m128i __DEFAULT_FN_ATTRS128
 408_mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
 409  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 410                                             (__v8hi)_mm_sub_epi16(__A, __B),
 411                                             (__v8hi)__W);
 412}
 413
 414static __inline__ __m128i __DEFAULT_FN_ATTRS128
 415_mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
 416  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 417                                             (__v8hi)_mm_sub_epi16(__A, __B),
 418                                             (__v8hi)_mm_setzero_si128());
 419}
 420
 421static __inline__ __m256i __DEFAULT_FN_ATTRS256
 422_mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
 423  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 424                                             (__v16hi)_mm256_mullo_epi16(__A, __B),
 425                                             (__v16hi)__W);
 426}
 427
 428static __inline__ __m256i __DEFAULT_FN_ATTRS256
 429_mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
 430  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 431                                             (__v16hi)_mm256_mullo_epi16(__A, __B),
 432                                             (__v16hi)_mm256_setzero_si256());
 433}
 434
 435static __inline__ __m128i __DEFAULT_FN_ATTRS128
 436_mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
 437  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 438                                             (__v8hi)_mm_mullo_epi16(__A, __B),
 439                                             (__v8hi)__W);
 440}
 441
 442static __inline__ __m128i __DEFAULT_FN_ATTRS128
 443_mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
 444  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 445                                             (__v8hi)_mm_mullo_epi16(__A, __B),
 446                                             (__v8hi)_mm_setzero_si128());
 447}
 448
 449static __inline__ __m128i __DEFAULT_FN_ATTRS128
 450_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
 451{
 452  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
 453              (__v16qi) __W,
 454              (__v16qi) __A);
 455}
 456
 457static __inline__ __m256i __DEFAULT_FN_ATTRS256
 458_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
 459{
 460  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
 461               (__v32qi) __W,
 462               (__v32qi) __A);
 463}
 464
 465static __inline__ __m128i __DEFAULT_FN_ATTRS128
 466_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
 467{
 468  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
 469               (__v8hi) __W,
 470               (__v8hi) __A);
 471}
 472
 473static __inline__ __m256i __DEFAULT_FN_ATTRS256
 474_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
 475{
 476  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
 477               (__v16hi) __W,
 478               (__v16hi) __A);
 479}
 480
 481static __inline__ __m128i __DEFAULT_FN_ATTRS128
 482_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
 483{
 484  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 485                                             (__v16qi)_mm_abs_epi8(__A),
 486                                             (__v16qi)__W);
 487}
 488
 489static __inline__ __m128i __DEFAULT_FN_ATTRS128
 490_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
 491{
 492  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 493                                             (__v16qi)_mm_abs_epi8(__A),
 494                                             (__v16qi)_mm_setzero_si128());
 495}
 496
 497static __inline__ __m256i __DEFAULT_FN_ATTRS256
 498_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
 499{
 500  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 501                                             (__v32qi)_mm256_abs_epi8(__A),
 502                                             (__v32qi)__W);
 503}
 504
 505static __inline__ __m256i __DEFAULT_FN_ATTRS256
 506_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
 507{
 508  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 509                                             (__v32qi)_mm256_abs_epi8(__A),
 510                                             (__v32qi)_mm256_setzero_si256());
 511}
 512
 513static __inline__ __m128i __DEFAULT_FN_ATTRS128
 514_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
 515{
 516  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 517                                             (__v8hi)_mm_abs_epi16(__A),
 518                                             (__v8hi)__W);
 519}
 520
 521static __inline__ __m128i __DEFAULT_FN_ATTRS128
 522_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
 523{
 524  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 525                                             (__v8hi)_mm_abs_epi16(__A),
 526                                             (__v8hi)_mm_setzero_si128());
 527}
 528
 529static __inline__ __m256i __DEFAULT_FN_ATTRS256
 530_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
 531{
 532  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 533                                             (__v16hi)_mm256_abs_epi16(__A),
 534                                             (__v16hi)__W);
 535}
 536
 537static __inline__ __m256i __DEFAULT_FN_ATTRS256
 538_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
 539{
 540  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 541                                             (__v16hi)_mm256_abs_epi16(__A),
 542                                             (__v16hi)_mm256_setzero_si256());
 543}
 544
 545static __inline__ __m128i __DEFAULT_FN_ATTRS128
 546_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
 547  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 548                                             (__v8hi)_mm_packs_epi32(__A, __B),
 549                                             (__v8hi)_mm_setzero_si128());
 550}
 551
 552static __inline__ __m128i __DEFAULT_FN_ATTRS128
 553_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 554{
 555  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 556                                             (__v8hi)_mm_packs_epi32(__A, __B),
 557                                             (__v8hi)__W);
 558}
 559
 560static __inline__ __m256i __DEFAULT_FN_ATTRS256
 561_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
 562{
 563  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 564                                          (__v16hi)_mm256_packs_epi32(__A, __B),
 565                                          (__v16hi)_mm256_setzero_si256());
 566}
 567
 568static __inline__ __m256i __DEFAULT_FN_ATTRS256
 569_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 570{
 571  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 572                                          (__v16hi)_mm256_packs_epi32(__A, __B),
 573                                          (__v16hi)__W);
 574}
 575
 576static __inline__ __m128i __DEFAULT_FN_ATTRS128
 577_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
 578{
 579  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 580                                             (__v16qi)_mm_packs_epi16(__A, __B),
 581                                             (__v16qi)_mm_setzero_si128());
 582}
 583
 584static __inline__ __m128i __DEFAULT_FN_ATTRS128
 585_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 586{
 587  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 588                                             (__v16qi)_mm_packs_epi16(__A, __B),
 589                                             (__v16qi)__W);
 590}
 591
 592static __inline__ __m256i __DEFAULT_FN_ATTRS256
 593_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
 594{
 595  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 596                                          (__v32qi)_mm256_packs_epi16(__A, __B),
 597                                          (__v32qi)_mm256_setzero_si256());
 598}
 599
 600static __inline__ __m256i __DEFAULT_FN_ATTRS256
 601_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 602{
 603  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 604                                          (__v32qi)_mm256_packs_epi16(__A, __B),
 605                                          (__v32qi)__W);
 606}
 607
 608static __inline__ __m128i __DEFAULT_FN_ATTRS128
 609_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
 610{
 611  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 612                                             (__v8hi)_mm_packus_epi32(__A, __B),
 613                                             (__v8hi)_mm_setzero_si128());
 614}
 615
 616static __inline__ __m128i __DEFAULT_FN_ATTRS128
 617_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 618{
 619  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 620                                             (__v8hi)_mm_packus_epi32(__A, __B),
 621                                             (__v8hi)__W);
 622}
 623
 624static __inline__ __m256i __DEFAULT_FN_ATTRS256
 625_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
 626{
 627  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 628                                         (__v16hi)_mm256_packus_epi32(__A, __B),
 629                                         (__v16hi)_mm256_setzero_si256());
 630}
 631
 632static __inline__ __m256i __DEFAULT_FN_ATTRS256
 633_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 634{
 635  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 636                                         (__v16hi)_mm256_packus_epi32(__A, __B),
 637                                         (__v16hi)__W);
 638}
 639
 640static __inline__ __m128i __DEFAULT_FN_ATTRS128
 641_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
 642{
 643  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 644                                            (__v16qi)_mm_packus_epi16(__A, __B),
 645                                            (__v16qi)_mm_setzero_si128());
 646}
 647
 648static __inline__ __m128i __DEFAULT_FN_ATTRS128
 649_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 650{
 651  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 652                                            (__v16qi)_mm_packus_epi16(__A, __B),
 653                                            (__v16qi)__W);
 654}
 655
 656static __inline__ __m256i __DEFAULT_FN_ATTRS256
 657_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
 658{
 659  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 660                                         (__v32qi)_mm256_packus_epi16(__A, __B),
 661                                         (__v32qi)_mm256_setzero_si256());
 662}
 663
 664static __inline__ __m256i __DEFAULT_FN_ATTRS256
 665_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 666{
 667  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 668                                         (__v32qi)_mm256_packus_epi16(__A, __B),
 669                                         (__v32qi)__W);
 670}
 671
 672static __inline__ __m128i __DEFAULT_FN_ATTRS128
 673_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 674{
 675  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 676                                             (__v16qi)_mm_adds_epi8(__A, __B),
 677                                             (__v16qi)__W);
 678}
 679
 680static __inline__ __m128i __DEFAULT_FN_ATTRS128
 681_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
 682{
 683  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 684                                             (__v16qi)_mm_adds_epi8(__A, __B),
 685                                             (__v16qi)_mm_setzero_si128());
 686}
 687
 688static __inline__ __m256i __DEFAULT_FN_ATTRS256
 689_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 690{
 691  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 692                                            (__v32qi)_mm256_adds_epi8(__A, __B),
 693                                            (__v32qi)__W);
 694}
 695
 696static __inline__ __m256i __DEFAULT_FN_ATTRS256
 697_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
 698{
 699  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 700                                            (__v32qi)_mm256_adds_epi8(__A, __B),
 701                                            (__v32qi)_mm256_setzero_si256());
 702}
 703
 704static __inline__ __m128i __DEFAULT_FN_ATTRS128
 705_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 706{
 707  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 708                                             (__v8hi)_mm_adds_epi16(__A, __B),
 709                                             (__v8hi)__W);
 710}
 711
 712static __inline__ __m128i __DEFAULT_FN_ATTRS128
 713_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 714{
 715  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 716                                             (__v8hi)_mm_adds_epi16(__A, __B),
 717                                             (__v8hi)_mm_setzero_si128());
 718}
 719
 720static __inline__ __m256i __DEFAULT_FN_ATTRS256
 721_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 722{
 723  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 724                                           (__v16hi)_mm256_adds_epi16(__A, __B),
 725                                           (__v16hi)__W);
 726}
 727
 728static __inline__ __m256i __DEFAULT_FN_ATTRS256
 729_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 730{
 731  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 732                                           (__v16hi)_mm256_adds_epi16(__A, __B),
 733                                           (__v16hi)_mm256_setzero_si256());
 734}
 735
 736static __inline__ __m128i __DEFAULT_FN_ATTRS128
 737_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 738{
 739  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 740                                             (__v16qi)_mm_adds_epu8(__A, __B),
 741                                             (__v16qi)__W);
 742}
 743
 744static __inline__ __m128i __DEFAULT_FN_ATTRS128
 745_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
 746{
 747  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 748                                             (__v16qi)_mm_adds_epu8(__A, __B),
 749                                             (__v16qi)_mm_setzero_si128());
 750}
 751
 752static __inline__ __m256i __DEFAULT_FN_ATTRS256
 753_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 754{
 755  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 756                                            (__v32qi)_mm256_adds_epu8(__A, __B),
 757                                            (__v32qi)__W);
 758}
 759
 760static __inline__ __m256i __DEFAULT_FN_ATTRS256
 761_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
 762{
 763  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 764                                            (__v32qi)_mm256_adds_epu8(__A, __B),
 765                                            (__v32qi)_mm256_setzero_si256());
 766}
 767
 768static __inline__ __m128i __DEFAULT_FN_ATTRS128
 769_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 770{
 771  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 772                                             (__v8hi)_mm_adds_epu16(__A, __B),
 773                                             (__v8hi)__W);
 774}
 775
 776static __inline__ __m128i __DEFAULT_FN_ATTRS128
 777_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
 778{
 779  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 780                                             (__v8hi)_mm_adds_epu16(__A, __B),
 781                                             (__v8hi)_mm_setzero_si128());
 782}
 783
 784static __inline__ __m256i __DEFAULT_FN_ATTRS256
 785_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 786{
 787  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 788                                           (__v16hi)_mm256_adds_epu16(__A, __B),
 789                                           (__v16hi)__W);
 790}
 791
 792static __inline__ __m256i __DEFAULT_FN_ATTRS256
 793_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
 794{
 795  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 796                                           (__v16hi)_mm256_adds_epu16(__A, __B),
 797                                           (__v16hi)_mm256_setzero_si256());
 798}
 799
 800static __inline__ __m128i __DEFAULT_FN_ATTRS128
 801_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 802{
 803  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 804                                             (__v16qi)_mm_avg_epu8(__A, __B),
 805                                             (__v16qi)__W);
 806}
 807
 808static __inline__ __m128i __DEFAULT_FN_ATTRS128
 809_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
 810{
 811  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
 812                                             (__v16qi)_mm_avg_epu8(__A, __B),
 813                                             (__v16qi)_mm_setzero_si128());
 814}
 815
 816static __inline__ __m256i __DEFAULT_FN_ATTRS256
 817_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 818{
 819  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 820                                             (__v32qi)_mm256_avg_epu8(__A, __B),
 821                                             (__v32qi)__W);
 822}
 823
 824static __inline__ __m256i __DEFAULT_FN_ATTRS256
 825_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
 826{
 827  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
 828                                             (__v32qi)_mm256_avg_epu8(__A, __B),
 829                                             (__v32qi)_mm256_setzero_si256());
 830}
 831
 832static __inline__ __m128i __DEFAULT_FN_ATTRS128
 833_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 834{
 835  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 836                                             (__v8hi)_mm_avg_epu16(__A, __B),
 837                                             (__v8hi)__W);
 838}
 839
 840static __inline__ __m128i __DEFAULT_FN_ATTRS128
 841_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
 842{
 843  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
 844                                             (__v8hi)_mm_avg_epu16(__A, __B),
 845                                             (__v8hi)_mm_setzero_si128());
 846}
 847
 848static __inline__ __m256i __DEFAULT_FN_ATTRS256
 849_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 850{
 851  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 852                                            (__v16hi)_mm256_avg_epu16(__A, __B),
 853                                            (__v16hi)__W);
 854}
 855
 856static __inline__ __m256i __DEFAULT_FN_ATTRS256
 857_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
 858{
 859  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
 860                                            (__v16hi)_mm256_avg_epu16(__A, __B),
 861                                            (__v16hi)_mm256_setzero_si256());
 862}
 863
 864static __inline__ __m128i __DEFAULT_FN_ATTRS128
 865_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
 866{
 867  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 868                                             (__v16qi)_mm_max_epi8(__A, __B),
 869                                             (__v16qi)_mm_setzero_si128());
 870}
 871
 872static __inline__ __m128i __DEFAULT_FN_ATTRS128
 873_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 874{
 875  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 876                                             (__v16qi)_mm_max_epi8(__A, __B),
 877                                             (__v16qi)__W);
 878}
 879
 880static __inline__ __m256i __DEFAULT_FN_ATTRS256
 881_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
 882{
 883  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 884                                             (__v32qi)_mm256_max_epi8(__A, __B),
 885                                             (__v32qi)_mm256_setzero_si256());
 886}
 887
 888static __inline__ __m256i __DEFAULT_FN_ATTRS256
 889_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 890{
 891  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 892                                             (__v32qi)_mm256_max_epi8(__A, __B),
 893                                             (__v32qi)__W);
 894}
 895
 896static __inline__ __m128i __DEFAULT_FN_ATTRS128
 897_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
 898{
 899  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 900                                             (__v8hi)_mm_max_epi16(__A, __B),
 901                                             (__v8hi)_mm_setzero_si128());
 902}
 903
 904static __inline__ __m128i __DEFAULT_FN_ATTRS128
 905_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 906{
 907  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 908                                             (__v8hi)_mm_max_epi16(__A, __B),
 909                                             (__v8hi)__W);
 910}
 911
 912static __inline__ __m256i __DEFAULT_FN_ATTRS256
 913_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
 914{
 915  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 916                                            (__v16hi)_mm256_max_epi16(__A, __B),
 917                                            (__v16hi)_mm256_setzero_si256());
 918}
 919
 920static __inline__ __m256i __DEFAULT_FN_ATTRS256
 921_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 922{
 923  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 924                                            (__v16hi)_mm256_max_epi16(__A, __B),
 925                                            (__v16hi)__W);
 926}
 927
 928static __inline__ __m128i __DEFAULT_FN_ATTRS128
 929_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
 930{
 931  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 932                                             (__v16qi)_mm_max_epu8(__A, __B),
 933                                             (__v16qi)_mm_setzero_si128());
 934}
 935
 936static __inline__ __m128i __DEFAULT_FN_ATTRS128
 937_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 938{
 939  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 940                                             (__v16qi)_mm_max_epu8(__A, __B),
 941                                             (__v16qi)__W);
 942}
 943
 944static __inline__ __m256i __DEFAULT_FN_ATTRS256
 945_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
 946{
 947  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 948                                             (__v32qi)_mm256_max_epu8(__A, __B),
 949                                             (__v32qi)_mm256_setzero_si256());
 950}
 951
 952static __inline__ __m256i __DEFAULT_FN_ATTRS256
 953_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 954{
 955  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
 956                                             (__v32qi)_mm256_max_epu8(__A, __B),
 957                                             (__v32qi)__W);
 958}
 959
 960static __inline__ __m128i __DEFAULT_FN_ATTRS128
 961_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
 962{
 963  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 964                                             (__v8hi)_mm_max_epu16(__A, __B),
 965                                             (__v8hi)_mm_setzero_si128());
 966}
 967
 968static __inline__ __m128i __DEFAULT_FN_ATTRS128
 969_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 970{
 971  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
 972                                             (__v8hi)_mm_max_epu16(__A, __B),
 973                                             (__v8hi)__W);
 974}
 975
 976static __inline__ __m256i __DEFAULT_FN_ATTRS256
 977_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
 978{
 979  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 980                                            (__v16hi)_mm256_max_epu16(__A, __B),
 981                                            (__v16hi)_mm256_setzero_si256());
 982}
 983
 984static __inline__ __m256i __DEFAULT_FN_ATTRS256
 985_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 986{
 987  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
 988                                            (__v16hi)_mm256_max_epu16(__A, __B),
 989                                            (__v16hi)__W);
 990}
 991
 992static __inline__ __m128i __DEFAULT_FN_ATTRS128
 993_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
 994{
 995  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
 996                                             (__v16qi)_mm_min_epi8(__A, __B),
 997                                             (__v16qi)_mm_setzero_si128());
 998}
 999
1000static __inline__ __m128i __DEFAULT_FN_ATTRS128
1001_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
1002{
1003  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1004                                             (__v16qi)_mm_min_epi8(__A, __B),
1005                                             (__v16qi)__W);
1006}
1007
1008static __inline__ __m256i __DEFAULT_FN_ATTRS256
1009_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
1010{
1011  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1012                                             (__v32qi)_mm256_min_epi8(__A, __B),
1013                                             (__v32qi)_mm256_setzero_si256());
1014}
1015
1016static __inline__ __m256i __DEFAULT_FN_ATTRS256
1017_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1018{
1019  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1020                                             (__v32qi)_mm256_min_epi8(__A, __B),
1021                                             (__v32qi)__W);
1022}
1023
1024static __inline__ __m128i __DEFAULT_FN_ATTRS128
1025_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
1026{
1027  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1028                                             (__v8hi)_mm_min_epi16(__A, __B),
1029                                             (__v8hi)_mm_setzero_si128());
1030}
1031
1032static __inline__ __m128i __DEFAULT_FN_ATTRS128
1033_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1034{
1035  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1036                                             (__v8hi)_mm_min_epi16(__A, __B),
1037                                             (__v8hi)__W);
1038}
1039
1040static __inline__ __m256i __DEFAULT_FN_ATTRS256
1041_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
1042{
1043  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1044                                            (__v16hi)_mm256_min_epi16(__A, __B),
1045                                            (__v16hi)_mm256_setzero_si256());
1046}
1047
1048static __inline__ __m256i __DEFAULT_FN_ATTRS256
1049_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1050{
1051  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1052                                            (__v16hi)_mm256_min_epi16(__A, __B),
1053                                            (__v16hi)__W);
1054}
1055
1056static __inline__ __m128i __DEFAULT_FN_ATTRS128
1057_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
1058{
1059  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1060                                             (__v16qi)_mm_min_epu8(__A, __B),
1061                                             (__v16qi)_mm_setzero_si128());
1062}
1063
1064static __inline__ __m128i __DEFAULT_FN_ATTRS128
1065_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
1066{
1067  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1068                                             (__v16qi)_mm_min_epu8(__A, __B),
1069                                             (__v16qi)__W);
1070}
1071
1072static __inline__ __m256i __DEFAULT_FN_ATTRS256
1073_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1074{
1075  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1076                                             (__v32qi)_mm256_min_epu8(__A, __B),
1077                                             (__v32qi)_mm256_setzero_si256());
1078}
1079
1080static __inline__ __m256i __DEFAULT_FN_ATTRS256
1081_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1082{
1083  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1084                                             (__v32qi)_mm256_min_epu8(__A, __B),
1085                                             (__v32qi)__W);
1086}
1087
1088static __inline__ __m128i __DEFAULT_FN_ATTRS128
1089_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
1090{
1091  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1092                                             (__v8hi)_mm_min_epu16(__A, __B),
1093                                             (__v8hi)_mm_setzero_si128());
1094}
1095
1096static __inline__ __m128i __DEFAULT_FN_ATTRS128
1097_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1098{
1099  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1100                                             (__v8hi)_mm_min_epu16(__A, __B),
1101                                             (__v8hi)__W);
1102}
1103
1104static __inline__ __m256i __DEFAULT_FN_ATTRS256
1105_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
1106{
1107  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1108                                            (__v16hi)_mm256_min_epu16(__A, __B),
1109                                            (__v16hi)_mm256_setzero_si256());
1110}
1111
1112static __inline__ __m256i __DEFAULT_FN_ATTRS256
1113_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1114{
1115  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1116                                            (__v16hi)_mm256_min_epu16(__A, __B),
1117                                            (__v16hi)__W);
1118}
1119
1120static __inline__ __m128i __DEFAULT_FN_ATTRS128
1121_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1122{
1123  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1124                                            (__v16qi)_mm_shuffle_epi8(__A, __B),
1125                                            (__v16qi)__W);
1126}
1127
1128static __inline__ __m128i __DEFAULT_FN_ATTRS128
1129_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1130{
1131  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1132                                            (__v16qi)_mm_shuffle_epi8(__A, __B),
1133                                            (__v16qi)_mm_setzero_si128());
1134}
1135
1136static __inline__ __m256i __DEFAULT_FN_ATTRS256
1137_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1138{
1139  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1140                                         (__v32qi)_mm256_shuffle_epi8(__A, __B),
1141                                         (__v32qi)__W);
1142}
1143
1144static __inline__ __m256i __DEFAULT_FN_ATTRS256
1145_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1146{
1147  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1148                                         (__v32qi)_mm256_shuffle_epi8(__A, __B),
1149                                         (__v32qi)_mm256_setzero_si256());
1150}
1151
1152static __inline__ __m128i __DEFAULT_FN_ATTRS128
1153_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1154{
1155  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1156                                             (__v16qi)_mm_subs_epi8(__A, __B),
1157                                             (__v16qi)__W);
1158}
1159
1160static __inline__ __m128i __DEFAULT_FN_ATTRS128
1161_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1162{
1163  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1164                                             (__v16qi)_mm_subs_epi8(__A, __B),
1165                                             (__v16qi)_mm_setzero_si128());
1166}
1167
1168static __inline__ __m256i __DEFAULT_FN_ATTRS256
1169_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1170{
1171  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1172                                            (__v32qi)_mm256_subs_epi8(__A, __B),
1173                                            (__v32qi)__W);
1174}
1175
1176static __inline__ __m256i __DEFAULT_FN_ATTRS256
1177_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1178{
1179  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1180                                            (__v32qi)_mm256_subs_epi8(__A, __B),
1181                                            (__v32qi)_mm256_setzero_si256());
1182}
1183
1184static __inline__ __m128i __DEFAULT_FN_ATTRS128
1185_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1186{
1187  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1188                                             (__v8hi)_mm_subs_epi16(__A, __B),
1189                                             (__v8hi)__W);
1190}
1191
1192static __inline__ __m128i __DEFAULT_FN_ATTRS128
1193_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1194{
1195  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1196                                             (__v8hi)_mm_subs_epi16(__A, __B),
1197                                             (__v8hi)_mm_setzero_si128());
1198}
1199
1200static __inline__ __m256i __DEFAULT_FN_ATTRS256
1201_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1202{
1203  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1204                                           (__v16hi)_mm256_subs_epi16(__A, __B),
1205                                           (__v16hi)__W);
1206}
1207
1208static __inline__ __m256i __DEFAULT_FN_ATTRS256
1209_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1210{
1211  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1212                                           (__v16hi)_mm256_subs_epi16(__A, __B),
1213                                           (__v16hi)_mm256_setzero_si256());
1214}
1215
1216static __inline__ __m128i __DEFAULT_FN_ATTRS128
1217_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1218{
1219  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1220                                             (__v16qi)_mm_subs_epu8(__A, __B),
1221                                             (__v16qi)__W);
1222}
1223
1224static __inline__ __m128i __DEFAULT_FN_ATTRS128
1225_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
1226{
1227  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1228                                             (__v16qi)_mm_subs_epu8(__A, __B),
1229                                             (__v16qi)_mm_setzero_si128());
1230}
1231
1232static __inline__ __m256i __DEFAULT_FN_ATTRS256
1233_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1234{
1235  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1236                                            (__v32qi)_mm256_subs_epu8(__A, __B),
1237                                            (__v32qi)__W);
1238}
1239
1240static __inline__ __m256i __DEFAULT_FN_ATTRS256
1241_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
1242{
1243  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1244                                            (__v32qi)_mm256_subs_epu8(__A, __B),
1245                                            (__v32qi)_mm256_setzero_si256());
1246}
1247
1248static __inline__ __m128i __DEFAULT_FN_ATTRS128
1249_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1250{
1251  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1252                                             (__v8hi)_mm_subs_epu16(__A, __B),
1253                                             (__v8hi)__W);
1254}
1255
1256static __inline__ __m128i __DEFAULT_FN_ATTRS128
1257_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
1258{
1259  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1260                                             (__v8hi)_mm_subs_epu16(__A, __B),
1261                                             (__v8hi)_mm_setzero_si128());
1262}
1263
1264static __inline__ __m256i __DEFAULT_FN_ATTRS256
1265_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
1266      __m256i __B) {
1267  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1268                                           (__v16hi)_mm256_subs_epu16(__A, __B),
1269                                           (__v16hi)__W);
1270}
1271
1272static __inline__ __m256i __DEFAULT_FN_ATTRS256
1273_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
1274{
1275  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1276                                           (__v16hi)_mm256_subs_epu16(__A, __B),
1277                                           (__v16hi)_mm256_setzero_si256());
1278}
1279
1280static __inline__ __m128i __DEFAULT_FN_ATTRS128
1281_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
1282{
1283  return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1284                                                 (__v8hi) __B);
1285}
1286
1287static __inline__ __m128i __DEFAULT_FN_ATTRS128
1288_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
1289                            __m128i __B)
1290{
1291  return (__m128i)__builtin_ia32_selectw_128(__U,
1292                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1293                                  (__v8hi)__A);
1294}
1295
1296static __inline__ __m128i __DEFAULT_FN_ATTRS128
1297_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
1298                             __m128i __B)
1299{
1300  return (__m128i)__builtin_ia32_selectw_128(__U,
1301                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1302                                  (__v8hi)__I);
1303}
1304
1305static __inline__ __m128i __DEFAULT_FN_ATTRS128
1306_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
1307            __m128i __B)
1308{
1309  return (__m128i)__builtin_ia32_selectw_128(__U,
1310                                  (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1311                                  (__v8hi)_mm_setzero_si128());
1312}
1313
1314static __inline__ __m256i __DEFAULT_FN_ATTRS256
1315_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
1316{
1317  return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1318                                                 (__v16hi)__B);
1319}
1320
1321static __inline__ __m256i __DEFAULT_FN_ATTRS256
1322_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
1323                               __m256i __B)
1324{
1325  return (__m256i)__builtin_ia32_selectw_256(__U,
1326                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1327                              (__v16hi)__A);
1328}
1329
1330static __inline__ __m256i __DEFAULT_FN_ATTRS256
1331_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
1332                                __m256i __B)
1333{
1334  return (__m256i)__builtin_ia32_selectw_256(__U,
1335                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1336                              (__v16hi)__I);
1337}
1338
1339static __inline__ __m256i __DEFAULT_FN_ATTRS256
1340_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
1341                                 __m256i __B)
1342{
1343  return (__m256i)__builtin_ia32_selectw_256(__U,
1344                              (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1345                              (__v16hi)_mm256_setzero_si256());
1346}
1347
1348static __inline__ __m128i __DEFAULT_FN_ATTRS128
1349_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1350  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1351                                            (__v8hi)_mm_maddubs_epi16(__X, __Y),
1352                                            (__v8hi)__W);
1353}
1354
1355static __inline__ __m128i __DEFAULT_FN_ATTRS128
1356_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1357  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1358                                            (__v8hi)_mm_maddubs_epi16(__X, __Y),
1359                                            (__v8hi)_mm_setzero_si128());
1360}
1361
1362static __inline__ __m256i __DEFAULT_FN_ATTRS256
1363_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
1364                          __m256i __Y) {
1365  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1366                                        (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1367                                        (__v16hi)__W);
1368}
1369
1370static __inline__ __m256i __DEFAULT_FN_ATTRS256
1371_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1372  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1373                                        (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1374                                        (__v16hi)_mm256_setzero_si256());
1375}
1376
1377static __inline__ __m128i __DEFAULT_FN_ATTRS128
1378_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1379  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1380                                             (__v4si)_mm_madd_epi16(__A, __B),
1381                                             (__v4si)__W);
1382}
1383
1384static __inline__ __m128i __DEFAULT_FN_ATTRS128
1385_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1386  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1387                                             (__v4si)_mm_madd_epi16(__A, __B),
1388                                             (__v4si)_mm_setzero_si128());
1389}
1390
1391static __inline__ __m256i __DEFAULT_FN_ATTRS256
1392_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
1393  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1394                                            (__v8si)_mm256_madd_epi16(__A, __B),
1395                                            (__v8si)__W);
1396}
1397
1398static __inline__ __m256i __DEFAULT_FN_ATTRS256
1399_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
1400  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1401                                            (__v8si)_mm256_madd_epi16(__A, __B),
1402                                            (__v8si)_mm256_setzero_si256());
1403}
1404
1405static __inline__ __m128i __DEFAULT_FN_ATTRS128
1406_mm_cvtsepi16_epi8 (__m128i __A) {
1407  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1408               (__v16qi) _mm_setzero_si128(),
1409               (__mmask8) -1);
1410}
1411
1412static __inline__ __m128i __DEFAULT_FN_ATTRS128
1413_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1414  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1415               (__v16qi) __O,
1416                __M);
1417}
1418
1419static __inline__ __m128i __DEFAULT_FN_ATTRS128
1420_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) {
1421  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1422               (__v16qi) _mm_setzero_si128(),
1423               __M);
1424}
1425
1426static __inline__ __m128i __DEFAULT_FN_ATTRS256
1427_mm256_cvtsepi16_epi8 (__m256i __A) {
1428  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1429               (__v16qi) _mm_setzero_si128(),
1430               (__mmask16) -1);
1431}
1432
1433static __inline__ __m128i __DEFAULT_FN_ATTRS256
1434_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1435  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1436               (__v16qi) __O,
1437                __M);
1438}
1439
1440static __inline__ __m128i __DEFAULT_FN_ATTRS256
1441_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) {
1442  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1443               (__v16qi) _mm_setzero_si128(),
1444               __M);
1445}
1446
1447static __inline__ __m128i __DEFAULT_FN_ATTRS128
1448_mm_cvtusepi16_epi8 (__m128i __A) {
1449  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1450                (__v16qi) _mm_setzero_si128(),
1451                (__mmask8) -1);
1452}
1453
1454static __inline__ __m128i __DEFAULT_FN_ATTRS128
1455_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1456  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1457                (__v16qi) __O,
1458                __M);
1459}
1460
1461static __inline__ __m128i __DEFAULT_FN_ATTRS128
1462_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) {
1463  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1464                (__v16qi) _mm_setzero_si128(),
1465                __M);
1466}
1467
1468static __inline__ __m128i __DEFAULT_FN_ATTRS256
1469_mm256_cvtusepi16_epi8 (__m256i __A) {
1470  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1471                (__v16qi) _mm_setzero_si128(),
1472                (__mmask16) -1);
1473}
1474
1475static __inline__ __m128i __DEFAULT_FN_ATTRS256
1476_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1477  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1478                (__v16qi) __O,
1479                __M);
1480}
1481
1482static __inline__ __m128i __DEFAULT_FN_ATTRS256
1483_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
1484  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1485                (__v16qi) _mm_setzero_si128(),
1486                __M);
1487}
1488
1489static __inline__ __m128i __DEFAULT_FN_ATTRS128
1490_mm_cvtepi16_epi8 (__m128i __A) {
1491  return (__m128i)__builtin_shufflevector(
1492      __builtin_convertvector((__v8hi)__A, __v8qi),
1493      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1494      12, 13, 14, 15);
1495}
1496
1497static __inline__ __m128i __DEFAULT_FN_ATTRS128
1498_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1499  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1500               (__v16qi) __O,
1501               __M);
1502}
1503
1504static __inline__ __m128i __DEFAULT_FN_ATTRS128
1505_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
1506  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1507               (__v16qi) _mm_setzero_si128(),
1508               __M);
1509}
1510
1511static __inline__ void __DEFAULT_FN_ATTRS128
1512_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1513{
1514  __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1515}
1516
1517
1518static __inline__ void __DEFAULT_FN_ATTRS128
1519_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1520{
1521  __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1522}
1523
1524static __inline__ void __DEFAULT_FN_ATTRS128
1525_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1526{
1527  __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1528}
1529
1530static __inline__ __m128i __DEFAULT_FN_ATTRS256
1531_mm256_cvtepi16_epi8 (__m256i __A) {
1532  return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
1533}
1534
1535static __inline__ __m128i __DEFAULT_FN_ATTRS256
1536_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1537  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1538                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1539                                             (__v16qi)__O);
1540}
1541
1542static __inline__ __m128i __DEFAULT_FN_ATTRS256
1543_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
1544  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1545                                             (__v16qi)_mm256_cvtepi16_epi8(__A),
1546                                             (__v16qi)_mm_setzero_si128());
1547}
1548
1549static __inline__ void __DEFAULT_FN_ATTRS256
1550_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1551{
1552  __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1553}
1554
1555static __inline__ void __DEFAULT_FN_ATTRS256
1556_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1557{
1558  __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1559}
1560
1561static __inline__ void __DEFAULT_FN_ATTRS256
1562_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1563{
1564  __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
1565}
1566
1567static __inline__ __m128i __DEFAULT_FN_ATTRS128
1568_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1569  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1570                                             (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1571                                             (__v8hi)__W);
1572}
1573
1574static __inline__ __m128i __DEFAULT_FN_ATTRS128
1575_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1576  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1577                                             (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1578                                             (__v8hi)_mm_setzero_si128());
1579}
1580
1581static __inline__ __m256i __DEFAULT_FN_ATTRS256
1582_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
1583  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1584                                         (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1585                                         (__v16hi)__W);
1586}
1587
1588static __inline__ __m256i __DEFAULT_FN_ATTRS256
1589_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1590  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1591                                         (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1592                                         (__v16hi)_mm256_setzero_si256());
1593}
1594
1595static __inline__ __m128i __DEFAULT_FN_ATTRS128
1596_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1597  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1598                                             (__v8hi)_mm_mulhi_epu16(__A, __B),
1599                                             (__v8hi)__W);
1600}
1601
1602static __inline__ __m128i __DEFAULT_FN_ATTRS128
1603_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
1604  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1605                                             (__v8hi)_mm_mulhi_epu16(__A, __B),
1606                                             (__v8hi)_mm_setzero_si128());
1607}
1608
1609static __inline__ __m256i __DEFAULT_FN_ATTRS256
1610_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1611  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1612                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
1613                                          (__v16hi)__W);
1614}
1615
1616static __inline__ __m256i __DEFAULT_FN_ATTRS256
1617_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
1618  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1619                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
1620                                          (__v16hi)_mm256_setzero_si256());
1621}
1622
1623static __inline__ __m128i __DEFAULT_FN_ATTRS128
1624_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1625  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1626                                             (__v8hi)_mm_mulhi_epi16(__A, __B),
1627                                             (__v8hi)__W);
1628}
1629
1630static __inline__ __m128i __DEFAULT_FN_ATTRS128
1631_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1632  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1633                                             (__v8hi)_mm_mulhi_epi16(__A, __B),
1634                                             (__v8hi)_mm_setzero_si128());
1635}
1636
1637static __inline__ __m256i __DEFAULT_FN_ATTRS256
1638_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1639  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1640                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
1641                                          (__v16hi)__W);
1642}
1643
1644static __inline__ __m256i __DEFAULT_FN_ATTRS256
1645_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1646  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1647                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
1648                                          (__v16hi)_mm256_setzero_si256());
1649}
1650
1651static __inline__ __m128i __DEFAULT_FN_ATTRS128
1652_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1653  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1654                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
1655                                           (__v16qi)__W);
1656}
1657
1658static __inline__ __m128i __DEFAULT_FN_ATTRS128
1659_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1660  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1661                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
1662                                           (__v16qi)_mm_setzero_si128());
1663}
1664
1665static __inline__ __m256i __DEFAULT_FN_ATTRS256
1666_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1667  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1668                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1669                                        (__v32qi)__W);
1670}
1671
1672static __inline__ __m256i __DEFAULT_FN_ATTRS256
1673_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1674  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1675                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1676                                        (__v32qi)_mm256_setzero_si256());
1677}
1678
1679static __inline__ __m128i __DEFAULT_FN_ATTRS128
1680_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1681  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1682                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
1683                                           (__v8hi)__W);
1684}
1685
1686static __inline__ __m128i __DEFAULT_FN_ATTRS128
1687_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1688  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1689                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
1690                                           (__v8hi) _mm_setzero_si128());
1691}
1692
1693static __inline__ __m256i __DEFAULT_FN_ATTRS256
1694_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1695  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1696                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1697                                       (__v16hi)__W);
1698}
1699
1700static __inline__ __m256i __DEFAULT_FN_ATTRS256
1701_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1702  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1703                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1704                                       (__v16hi)_mm256_setzero_si256());
1705}
1706
1707static __inline__ __m128i __DEFAULT_FN_ATTRS128
1708_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1709  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1710                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
1711                                           (__v16qi)__W);
1712}
1713
1714static __inline__ __m128i __DEFAULT_FN_ATTRS128
1715_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1716  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1717                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
1718                                           (__v16qi)_mm_setzero_si128());
1719}
1720
1721static __inline__ __m256i __DEFAULT_FN_ATTRS256
1722_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1723  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1724                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1725                                        (__v32qi)__W);
1726}
1727
1728static __inline__ __m256i __DEFAULT_FN_ATTRS256
1729_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1730  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1731                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1732                                        (__v32qi)_mm256_setzero_si256());
1733}
1734
1735static __inline__ __m128i __DEFAULT_FN_ATTRS128
1736_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1737  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1738                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
1739                                           (__v8hi)__W);
1740}
1741
1742static __inline__ __m128i __DEFAULT_FN_ATTRS128
1743_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1744  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1745                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
1746                                           (__v8hi) _mm_setzero_si128());
1747}
1748
1749static __inline__ __m256i __DEFAULT_FN_ATTRS256
1750_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1751  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1752                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1753                                       (__v16hi)__W);
1754}
1755
1756static __inline__ __m256i __DEFAULT_FN_ATTRS256
1757_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1758  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1759                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1760                                       (__v16hi)_mm256_setzero_si256());
1761}
1762
1763static __inline__ __m128i __DEFAULT_FN_ATTRS128
1764_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1765{
1766  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1767                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1768                                             (__v8hi)__W);
1769}
1770
1771static __inline__ __m128i __DEFAULT_FN_ATTRS128
1772_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
1773{
1774  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1775                                             (__v8hi)_mm_cvtepi8_epi16(__A),
1776                                             (__v8hi)_mm_setzero_si128());
1777}
1778
1779static __inline__ __m256i __DEFAULT_FN_ATTRS256
1780_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1781{
1782  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1783                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1784                                             (__v16hi)__W);
1785}
1786
1787static __inline__ __m256i __DEFAULT_FN_ATTRS256
1788_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
1789{
1790  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1791                                             (__v16hi)_mm256_cvtepi8_epi16(__A),
1792                                             (__v16hi)_mm256_setzero_si256());
1793}
1794
1795
1796static __inline__ __m128i __DEFAULT_FN_ATTRS128
1797_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1798{
1799  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1800                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1801                                             (__v8hi)__W);
1802}
1803
1804static __inline__ __m128i __DEFAULT_FN_ATTRS128
1805_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
1806{
1807  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1808                                             (__v8hi)_mm_cvtepu8_epi16(__A),
1809                                             (__v8hi)_mm_setzero_si128());
1810}
1811
1812static __inline__ __m256i __DEFAULT_FN_ATTRS256
1813_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1814{
1815  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1816                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1817                                             (__v16hi)__W);
1818}
1819
1820static __inline__ __m256i __DEFAULT_FN_ATTRS256
1821_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
1822{
1823  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1824                                             (__v16hi)_mm256_cvtepu8_epi16(__A),
1825                                             (__v16hi)_mm256_setzero_si256());
1826}
1827
1828
1829#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1830  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1831                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1832                                       (__v8hi)(__m128i)(W)))
1833
1834#define _mm_maskz_shufflehi_epi16(U, A, imm) \
1835  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1836                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1837                                       (__v8hi)_mm_setzero_si128()))
1838
1839#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1840  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1841                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1842                                       (__v16hi)(__m256i)(W)))
1843
1844#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1845  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1846                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1847                                       (__v16hi)_mm256_setzero_si256()))
1848
1849#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1850  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1851                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1852                                       (__v8hi)(__m128i)(W)))
1853
1854#define _mm_maskz_shufflelo_epi16(U, A, imm) \
1855  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1856                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1857                                       (__v8hi)_mm_setzero_si128()))
1858
1859#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1860  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1861                                       (__v16hi)_mm256_shufflelo_epi16((A), \
1862                                                                       (imm)), \
1863                                       (__v16hi)(__m256i)(W)))
1864
1865#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1866  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1867                                       (__v16hi)_mm256_shufflelo_epi16((A), \
1868                                                                       (imm)), \
1869                                       (__v16hi)_mm256_setzero_si256()))
1870
1871static __inline__ __m256i __DEFAULT_FN_ATTRS256
1872_mm256_sllv_epi16(__m256i __A, __m256i __B)
1873{
1874  return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1875}
1876
1877static __inline__ __m256i __DEFAULT_FN_ATTRS256
1878_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1879{
1880  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1881                                           (__v16hi)_mm256_sllv_epi16(__A, __B),
1882                                           (__v16hi)__W);
1883}
1884
1885static __inline__ __m256i __DEFAULT_FN_ATTRS256
1886_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1887{
1888  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1889                                           (__v16hi)_mm256_sllv_epi16(__A, __B),
1890                                           (__v16hi)_mm256_setzero_si256());
1891}
1892
1893static __inline__ __m128i __DEFAULT_FN_ATTRS128
1894_mm_sllv_epi16(__m128i __A, __m128i __B)
1895{
1896  return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1897}
1898
1899static __inline__ __m128i __DEFAULT_FN_ATTRS128
1900_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1901{
1902  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1903                                             (__v8hi)_mm_sllv_epi16(__A, __B),
1904                                             (__v8hi)__W);
1905}
1906
1907static __inline__ __m128i __DEFAULT_FN_ATTRS128
1908_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1909{
1910  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1911                                             (__v8hi)_mm_sllv_epi16(__A, __B),
1912                                             (__v8hi)_mm_setzero_si128());
1913}
1914
1915static __inline__ __m128i __DEFAULT_FN_ATTRS128
1916_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1917{
1918  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1919                                             (__v8hi)_mm_sll_epi16(__A, __B),
1920                                             (__v8hi)__W);
1921}
1922
1923static __inline__ __m128i __DEFAULT_FN_ATTRS128
1924_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
1925{
1926  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1927                                             (__v8hi)_mm_sll_epi16(__A, __B),
1928                                             (__v8hi)_mm_setzero_si128());
1929}
1930
1931static __inline__ __m256i __DEFAULT_FN_ATTRS256
1932_mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
1933{
1934  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1935                                          (__v16hi)_mm256_sll_epi16(__A, __B),
1936                                          (__v16hi)__W);
1937}
1938
1939static __inline__ __m256i __DEFAULT_FN_ATTRS256
1940_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
1941{
1942  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1943                                          (__v16hi)_mm256_sll_epi16(__A, __B),
1944                                          (__v16hi)_mm256_setzero_si256());
1945}
1946
1947static __inline__ __m128i __DEFAULT_FN_ATTRS128
1948_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
1949{
1950  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1951                                             (__v8hi)_mm_slli_epi16(__A, (int)__B),
1952                                             (__v8hi)__W);
1953}
1954
1955static __inline__ __m128i __DEFAULT_FN_ATTRS128
1956_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
1957{
1958  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1959                                             (__v8hi)_mm_slli_epi16(__A, (int)__B),
1960                                             (__v8hi)_mm_setzero_si128());
1961}
1962
1963static __inline__ __m256i __DEFAULT_FN_ATTRS256
1964_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A,
1965                       unsigned int __B)
1966{
1967  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1968                                         (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1969                                         (__v16hi)__W);
1970}
1971
1972static __inline__ __m256i __DEFAULT_FN_ATTRS256
1973_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
1974{
1975  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1976                                         (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1977                                         (__v16hi)_mm256_setzero_si256());
1978}
1979
1980static __inline__ __m256i __DEFAULT_FN_ATTRS256
1981_mm256_srlv_epi16(__m256i __A, __m256i __B)
1982{
1983  return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1984}
1985
1986static __inline__ __m256i __DEFAULT_FN_ATTRS256
1987_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1988{
1989  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1990                                           (__v16hi)_mm256_srlv_epi16(__A, __B),
1991                                           (__v16hi)__W);
1992}
1993
1994static __inline__ __m256i __DEFAULT_FN_ATTRS256
1995_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1996{
1997  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1998                                           (__v16hi)_mm256_srlv_epi16(__A, __B),
1999                                           (__v16hi)_mm256_setzero_si256());
2000}
2001
2002static __inline__ __m128i __DEFAULT_FN_ATTRS128
2003_mm_srlv_epi16(__m128i __A, __m128i __B)
2004{
2005  return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
2006}
2007
2008static __inline__ __m128i __DEFAULT_FN_ATTRS128
2009_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2010{
2011  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2012                                             (__v8hi)_mm_srlv_epi16(__A, __B),
2013                                             (__v8hi)__W);
2014}
2015
2016static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2018{
2019  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2020                                             (__v8hi)_mm_srlv_epi16(__A, __B),
2021                                             (__v8hi)_mm_setzero_si128());
2022}
2023
2024static __inline__ __m256i __DEFAULT_FN_ATTRS256
2025_mm256_srav_epi16(__m256i __A, __m256i __B)
2026{
2027  return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2028}
2029
2030static __inline__ __m256i __DEFAULT_FN_ATTRS256
2031_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
2032{
2033  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2034                                           (__v16hi)_mm256_srav_epi16(__A, __B),
2035                                           (__v16hi)__W);
2036}
2037
2038static __inline__ __m256i __DEFAULT_FN_ATTRS256
2039_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
2040{
2041  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2042                                           (__v16hi)_mm256_srav_epi16(__A, __B),
2043                                           (__v16hi)_mm256_setzero_si256());
2044}
2045
2046static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047_mm_srav_epi16(__m128i __A, __m128i __B)
2048{
2049  return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
2050}
2051
2052static __inline__ __m128i __DEFAULT_FN_ATTRS128
2053_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2054{
2055  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2056                                             (__v8hi)_mm_srav_epi16(__A, __B),
2057                                             (__v8hi)__W);
2058}
2059
2060static __inline__ __m128i __DEFAULT_FN_ATTRS128
2061_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2062{
2063  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2064                                             (__v8hi)_mm_srav_epi16(__A, __B),
2065                                             (__v8hi)_mm_setzero_si128());
2066}
2067
2068static __inline__ __m128i __DEFAULT_FN_ATTRS128
2069_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2070{
2071  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2072                                             (__v8hi)_mm_sra_epi16(__A, __B),
2073                                             (__v8hi)__W);
2074}
2075
2076static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077_mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2078{
2079  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2080                                             (__v8hi)_mm_sra_epi16(__A, __B),
2081                                             (__v8hi)_mm_setzero_si128());
2082}
2083
2084static __inline__ __m256i __DEFAULT_FN_ATTRS256
2085_mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2086{
2087  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2088                                          (__v16hi)_mm256_sra_epi16(__A, __B),
2089                                          (__v16hi)__W);
2090}
2091
2092static __inline__ __m256i __DEFAULT_FN_ATTRS256
2093_mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2094{
2095  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2096                                          (__v16hi)_mm256_sra_epi16(__A, __B),
2097                                          (__v16hi)_mm256_setzero_si256());
2098}
2099
2100static __inline__ __m128i __DEFAULT_FN_ATTRS128
2101_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
2102{
2103  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2104                                             (__v8hi)_mm_srai_epi16(__A, (int)__B),
2105                                             (__v8hi)__W);
2106}
2107
2108static __inline__ __m128i __DEFAULT_FN_ATTRS128
2109_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
2110{
2111  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2112                                             (__v8hi)_mm_srai_epi16(__A, (int)__B),
2113                                             (__v8hi)_mm_setzero_si128());
2114}
2115
2116static __inline__ __m256i __DEFAULT_FN_ATTRS256
2117_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A,
2118                       unsigned int __B)
2119{
2120  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2121                                         (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2122                                         (__v16hi)__W);
2123}
2124
2125static __inline__ __m256i __DEFAULT_FN_ATTRS256
2126_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
2127{
2128  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2129                                         (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2130                                         (__v16hi)_mm256_setzero_si256());
2131}
2132
2133static __inline__ __m128i __DEFAULT_FN_ATTRS128
2134_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2135{
2136  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2137                                             (__v8hi)_mm_srl_epi16(__A, __B),
2138                                             (__v8hi)__W);
2139}
2140
2141static __inline__ __m128i __DEFAULT_FN_ATTRS128
2142_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2143{
2144  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2145                                             (__v8hi)_mm_srl_epi16(__A, __B),
2146                                             (__v8hi)_mm_setzero_si128());
2147}
2148
2149static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150_mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2151{
2152  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2153                                          (__v16hi)_mm256_srl_epi16(__A, __B),
2154                                          (__v16hi)__W);
2155}
2156
2157static __inline__ __m256i __DEFAULT_FN_ATTRS256
2158_mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2159{
2160  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2161                                          (__v16hi)_mm256_srl_epi16(__A, __B),
2162                                          (__v16hi)_mm256_setzero_si256());
2163}
2164
2165static __inline__ __m128i __DEFAULT_FN_ATTRS128
2166_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
2167{
2168  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2169                                             (__v8hi)_mm_srli_epi16(__A, __B),
2170                                             (__v8hi)__W);
2171}
2172
2173static __inline__ __m128i __DEFAULT_FN_ATTRS128
2174_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
2175{
2176  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2177                                             (__v8hi)_mm_srli_epi16(__A, __B),
2178                                             (__v8hi)_mm_setzero_si128());
2179}
2180
2181static __inline__ __m256i __DEFAULT_FN_ATTRS256
2182_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
2183{
2184  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2185                                         (__v16hi)_mm256_srli_epi16(__A, __B),
2186                                         (__v16hi)__W);
2187}
2188
2189static __inline__ __m256i __DEFAULT_FN_ATTRS256
2190_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
2191{
2192  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2193                                         (__v16hi)_mm256_srli_epi16(__A, __B),
2194                                         (__v16hi)_mm256_setzero_si256());
2195}
2196
2197static __inline__ __m128i __DEFAULT_FN_ATTRS128
2198_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2199{
2200  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2201                (__v8hi) __A,
2202                (__v8hi) __W);
2203}
2204
2205static __inline__ __m128i __DEFAULT_FN_ATTRS128
2206_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
2207{
2208  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2209                (__v8hi) __A,
2210                (__v8hi) _mm_setzero_si128 ());
2211}
2212
2213static __inline__ __m256i __DEFAULT_FN_ATTRS256
2214_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
2215{
2216  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2217                (__v16hi) __A,
2218                (__v16hi) __W);
2219}
2220
2221static __inline__ __m256i __DEFAULT_FN_ATTRS256
2222_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
2223{
2224  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2225                (__v16hi) __A,
2226                (__v16hi) _mm256_setzero_si256 ());
2227}
2228
2229static __inline__ __m128i __DEFAULT_FN_ATTRS128
2230_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
2231{
2232  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2233                (__v16qi) __A,
2234                (__v16qi) __W);
2235}
2236
2237static __inline__ __m128i __DEFAULT_FN_ATTRS128
2238_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
2239{
2240  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2241                (__v16qi) __A,
2242                (__v16qi) _mm_setzero_si128 ());
2243}
2244
2245static __inline__ __m256i __DEFAULT_FN_ATTRS256
2246_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
2247{
2248  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2249                (__v32qi) __A,
2250                (__v32qi) __W);
2251}
2252
2253static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
2255{
2256  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2257                (__v32qi) __A,
2258                (__v32qi) _mm256_setzero_si256 ());
2259}
2260
2261
2262static __inline__ __m128i __DEFAULT_FN_ATTRS128
2263_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
2264{
2265  return (__m128i) __builtin_ia32_selectb_128(__M,
2266                                              (__v16qi) _mm_set1_epi8(__A),
2267                                              (__v16qi) __O);
2268}
2269
2270static __inline__ __m128i __DEFAULT_FN_ATTRS128
2271_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
2272{
2273 return (__m128i) __builtin_ia32_selectb_128(__M,
2274                                             (__v16qi) _mm_set1_epi8(__A),
2275                                             (__v16qi) _mm_setzero_si128());
2276}
2277
2278static __inline__ __m256i __DEFAULT_FN_ATTRS256
2279_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
2280{
2281  return (__m256i) __builtin_ia32_selectb_256(__M,
2282                                              (__v32qi) _mm256_set1_epi8(__A),
2283                                              (__v32qi) __O);
2284}
2285
2286static __inline__ __m256i __DEFAULT_FN_ATTRS256
2287_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
2288{
2289  return (__m256i) __builtin_ia32_selectb_256(__M,
2290                                              (__v32qi) _mm256_set1_epi8(__A),
2291                                              (__v32qi) _mm256_setzero_si256());
2292}
2293
2294static __inline __m128i __DEFAULT_FN_ATTRS128
2295_mm_loadu_epi16 (void const *__P)
2296{
2297  struct __loadu_epi16 {
2298    __m128i_u __v;
2299  } __attribute__((__packed__, __may_alias__));
2300  return ((const struct __loadu_epi16*)__P)->__v;
2301}
2302
2303static __inline__ __m128i __DEFAULT_FN_ATTRS128
2304_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
2305{
2306  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2307                 (__v8hi) __W,
2308                 (__mmask8) __U);
2309}
2310
2311static __inline__ __m128i __DEFAULT_FN_ATTRS128
2312_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
2313{
2314  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2315                 (__v8hi)
2316                 _mm_setzero_si128 (),
2317                 (__mmask8) __U);
2318}
2319
2320static __inline __m256i __DEFAULT_FN_ATTRS256
2321_mm256_loadu_epi16 (void const *__P)
2322{
2323  struct __loadu_epi16 {
2324    __m256i_u __v;
2325  } __attribute__((__packed__, __may_alias__));
2326  return ((const struct __loadu_epi16*)__P)->__v;
2327}
2328
2329static __inline__ __m256i __DEFAULT_FN_ATTRS256
2330_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
2331{
2332  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2333                 (__v16hi) __W,
2334                 (__mmask16) __U);
2335}
2336
2337static __inline__ __m256i __DEFAULT_FN_ATTRS256
2338_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
2339{
2340  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2341                 (__v16hi)
2342                 _mm256_setzero_si256 (),
2343                 (__mmask16) __U);
2344}
2345
2346static __inline __m128i __DEFAULT_FN_ATTRS128
2347_mm_loadu_epi8 (void const *__P)
2348{
2349  struct __loadu_epi8 {
2350    __m128i_u __v;
2351  } __attribute__((__packed__, __may_alias__));
2352  return ((const struct __loadu_epi8*)__P)->__v;
2353}
2354
2355static __inline__ __m128i __DEFAULT_FN_ATTRS128
2356_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
2357{
2358  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2359                 (__v16qi) __W,
2360                 (__mmask16) __U);
2361}
2362
2363static __inline__ __m128i __DEFAULT_FN_ATTRS128
2364_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
2365{
2366  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2367                 (__v16qi)
2368                 _mm_setzero_si128 (),
2369                 (__mmask16) __U);
2370}
2371
2372static __inline __m256i __DEFAULT_FN_ATTRS256
2373_mm256_loadu_epi8 (void const *__P)
2374{
2375  struct __loadu_epi8 {
2376    __m256i_u __v;
2377  } __attribute__((__packed__, __may_alias__));
2378  return ((const struct __loadu_epi8*)__P)->__v;
2379}
2380
2381static __inline__ __m256i __DEFAULT_FN_ATTRS256
2382_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
2383{
2384  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2385                 (__v32qi) __W,
2386                 (__mmask32) __U);
2387}
2388
2389static __inline__ __m256i __DEFAULT_FN_ATTRS256
2390_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
2391{
2392  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2393                 (__v32qi)
2394                 _mm256_setzero_si256 (),
2395                 (__mmask32) __U);
2396}
2397
2398static __inline void __DEFAULT_FN_ATTRS128
2399_mm_storeu_epi16 (void *__P, __m128i __A)
2400{
2401  struct __storeu_epi16 {
2402    __m128i_u __v;
2403  } __attribute__((__packed__, __may_alias__));
2404  ((struct __storeu_epi16*)__P)->__v = __A;
2405}
2406
2407static __inline__ void __DEFAULT_FN_ATTRS128
2408_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
2409{
2410  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2411             (__v8hi) __A,
2412             (__mmask8) __U);
2413}
2414
2415static __inline void __DEFAULT_FN_ATTRS256
2416_mm256_storeu_epi16 (void *__P, __m256i __A)
2417{
2418  struct __storeu_epi16 {
2419    __m256i_u __v;
2420  } __attribute__((__packed__, __may_alias__));
2421  ((struct __storeu_epi16*)__P)->__v = __A;
2422}
2423
2424static __inline__ void __DEFAULT_FN_ATTRS256
2425_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
2426{
2427  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2428             (__v16hi) __A,
2429             (__mmask16) __U);
2430}
2431
2432static __inline void __DEFAULT_FN_ATTRS128
2433_mm_storeu_epi8 (void *__P, __m128i __A)
2434{
2435  struct __storeu_epi8 {
2436    __m128i_u __v;
2437  } __attribute__((__packed__, __may_alias__));
2438  ((struct __storeu_epi8*)__P)->__v = __A;
2439}
2440
2441static __inline__ void __DEFAULT_FN_ATTRS128
2442_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
2443{
2444  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2445             (__v16qi) __A,
2446             (__mmask16) __U);
2447}
2448
2449static __inline void __DEFAULT_FN_ATTRS256
2450_mm256_storeu_epi8 (void *__P, __m256i __A)
2451{
2452  struct __storeu_epi8 {
2453    __m256i_u __v;
2454  } __attribute__((__packed__, __may_alias__));
2455  ((struct __storeu_epi8*)__P)->__v = __A;
2456}
2457
2458static __inline__ void __DEFAULT_FN_ATTRS256
2459_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
2460{
2461  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2462             (__v32qi) __A,
2463             (__mmask32) __U);
2464}
2465
2466static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2467_mm_test_epi8_mask (__m128i __A, __m128i __B)
2468{
2469  return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
2470}
2471
2472static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2473_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2474{
2475  return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2476                                    _mm_setzero_si128());
2477}
2478
2479static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2480_mm256_test_epi8_mask (__m256i __A, __m256i __B)
2481{
2482  return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
2483                                  _mm256_setzero_si256());
2484}
2485
2486static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2487_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2488{
2489  return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
2490                                       _mm256_setzero_si256());
2491}
2492
2493static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2494_mm_test_epi16_mask (__m128i __A, __m128i __B)
2495{
2496  return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2497}
2498
2499static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2500_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2501{
2502  return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
2503                                     _mm_setzero_si128());
2504}
2505
2506static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2507_mm256_test_epi16_mask (__m256i __A, __m256i __B)
2508{
2509  return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
2510                                   _mm256_setzero_si256 ());
2511}
2512
2513static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2514_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2515{
2516  return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
2517                                        _mm256_setzero_si256());
2518}
2519
2520static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2521_mm_testn_epi8_mask (__m128i __A, __m128i __B)
2522{
2523  return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2524}
2525
2526static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2527_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2528{
2529  return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2530                                  _mm_setzero_si128());
2531}
2532
2533static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2534_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
2535{
2536  return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
2537                                 _mm256_setzero_si256());
2538}
2539
2540static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2541_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2542{
2543  return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
2544                                      _mm256_setzero_si256());
2545}
2546
2547static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2548_mm_testn_epi16_mask (__m128i __A, __m128i __B)
2549{
2550  return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
2551}
2552
2553static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2554_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2555{
2556  return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128());
2557}
2558
2559static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2560_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
2561{
2562  return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
2563                                  _mm256_setzero_si256());
2564}
2565
2566static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2567_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2568{
2569  return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
2570                                       _mm256_setzero_si256());
2571}
2572
2573static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2574_mm_movepi8_mask (__m128i __A)
2575{
2576  return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2577}
2578
2579static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2580_mm256_movepi8_mask (__m256i __A)
2581{
2582  return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2583}
2584
2585static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2586_mm_movepi16_mask (__m128i __A)
2587{
2588  return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2589}
2590
2591static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2592_mm256_movepi16_mask (__m256i __A)
2593{
2594  return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2595}
2596
2597static __inline__ __m128i __DEFAULT_FN_ATTRS128
2598_mm_movm_epi8 (__mmask16 __A)
2599{
2600  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2601}
2602
2603static __inline__ __m256i __DEFAULT_FN_ATTRS256
2604_mm256_movm_epi8 (__mmask32 __A)
2605{
2606  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2607}
2608
2609static __inline__ __m128i __DEFAULT_FN_ATTRS128
2610_mm_movm_epi16 (__mmask8 __A)
2611{
2612  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2613}
2614
2615static __inline__ __m256i __DEFAULT_FN_ATTRS256
2616_mm256_movm_epi16 (__mmask16 __A)
2617{
2618  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2619}
2620
2621static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
2623{
2624  return (__m128i)__builtin_ia32_selectb_128(__M,
2625                                             (__v16qi) _mm_broadcastb_epi8(__A),
2626                                             (__v16qi) __O);
2627}
2628
2629static __inline__ __m128i __DEFAULT_FN_ATTRS128
2630_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
2631{
2632  return (__m128i)__builtin_ia32_selectb_128(__M,
2633                                             (__v16qi) _mm_broadcastb_epi8(__A),
2634                                             (__v16qi) _mm_setzero_si128());
2635}
2636
2637static __inline__ __m256i __DEFAULT_FN_ATTRS256
2638_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
2639{
2640  return (__m256i)__builtin_ia32_selectb_256(__M,
2641                                             (__v32qi) _mm256_broadcastb_epi8(__A),
2642                                             (__v32qi) __O);
2643}
2644
2645static __inline__ __m256i __DEFAULT_FN_ATTRS256
2646_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
2647{
2648  return (__m256i)__builtin_ia32_selectb_256(__M,
2649                                             (__v32qi) _mm256_broadcastb_epi8(__A),
2650                                             (__v32qi) _mm256_setzero_si256());
2651}
2652
2653static __inline__ __m128i __DEFAULT_FN_ATTRS128
2654_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2655{
2656  return (__m128i)__builtin_ia32_selectw_128(__M,
2657                                             (__v8hi) _mm_broadcastw_epi16(__A),
2658                                             (__v8hi) __O);
2659}
2660
2661static __inline__ __m128i __DEFAULT_FN_ATTRS128
2662_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
2663{
2664  return (__m128i)__builtin_ia32_selectw_128(__M,
2665                                             (__v8hi) _mm_broadcastw_epi16(__A),
2666                                             (__v8hi) _mm_setzero_si128());
2667}
2668
2669static __inline__ __m256i __DEFAULT_FN_ATTRS256
2670_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
2671{
2672  return (__m256i)__builtin_ia32_selectw_256(__M,
2673                                             (__v16hi) _mm256_broadcastw_epi16(__A),
2674                                             (__v16hi) __O);
2675}
2676
2677static __inline__ __m256i __DEFAULT_FN_ATTRS256
2678_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
2679{
2680  return (__m256i)__builtin_ia32_selectw_256(__M,
2681                                             (__v16hi) _mm256_broadcastw_epi16(__A),
2682                                             (__v16hi) _mm256_setzero_si256());
2683}
2684
2685static __inline__ __m256i __DEFAULT_FN_ATTRS256
2686_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
2687{
2688  return (__m256i) __builtin_ia32_selectw_256 (__M,
2689                                               (__v16hi) _mm256_set1_epi16(__A),
2690                                               (__v16hi) __O);
2691}
2692
2693static __inline__ __m256i __DEFAULT_FN_ATTRS256
2694_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
2695{
2696  return (__m256i) __builtin_ia32_selectw_256(__M,
2697                                              (__v16hi)_mm256_set1_epi16(__A),
2698                                              (__v16hi) _mm256_setzero_si256());
2699}
2700
2701static __inline__ __m128i __DEFAULT_FN_ATTRS128
2702_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
2703{
2704  return (__m128i) __builtin_ia32_selectw_128(__M,
2705                                              (__v8hi) _mm_set1_epi16(__A),
2706                                              (__v8hi) __O);
2707}
2708
2709static __inline__ __m128i __DEFAULT_FN_ATTRS128
2710_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
2711{
2712  return (__m128i) __builtin_ia32_selectw_128(__M,
2713                                              (__v8hi) _mm_set1_epi16(__A),
2714                                              (__v8hi) _mm_setzero_si128());
2715}
2716
2717static __inline__ __m128i __DEFAULT_FN_ATTRS128
2718_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
2719{
2720  return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
2721}
2722
2723static __inline__ __m128i __DEFAULT_FN_ATTRS128
2724_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
2725{
2726  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2727                                        (__v8hi)_mm_permutexvar_epi16(__A, __B),
2728                                        (__v8hi) _mm_setzero_si128());
2729}
2730
2731static __inline__ __m128i __DEFAULT_FN_ATTRS128
2732_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
2733          __m128i __B)
2734{
2735  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2736                                        (__v8hi)_mm_permutexvar_epi16(__A, __B),
2737                                        (__v8hi)__W);
2738}
2739
2740static __inline__ __m256i __DEFAULT_FN_ATTRS256
2741_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
2742{
2743  return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
2744}
2745
2746static __inline__ __m256i __DEFAULT_FN_ATTRS256
2747_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
2748        __m256i __B)
2749{
2750  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2751                                    (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2752                                    (__v16hi)_mm256_setzero_si256());
2753}
2754
2755static __inline__ __m256i __DEFAULT_FN_ATTRS256
2756_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
2757             __m256i __B)
2758{
2759  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2760                                    (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2761                                    (__v16hi)__W);
2762}
2763
2764#define _mm_mask_alignr_epi8(W, U, A, B, N) \
2765  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2766                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2767                                 (__v16qi)(__m128i)(W)))
2768
2769#define _mm_maskz_alignr_epi8(U, A, B, N) \
2770  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2771                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2772                                 (__v16qi)_mm_setzero_si128()))
2773
2774#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2775  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2776                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2777                              (__v32qi)(__m256i)(W)))
2778
2779#define _mm256_maskz_alignr_epi8(U, A, B, N) \
2780  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2781                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2782                              (__v32qi)_mm256_setzero_si256()))
2783
2784#define _mm_dbsad_epu8(A, B, imm) \
2785  ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2786                                       (__v16qi)(__m128i)(B), (int)(imm)))
2787
2788#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2789  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2790                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2791                                      (__v8hi)(__m128i)(W)))
2792
2793#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2794  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2795                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2796                                      (__v8hi)_mm_setzero_si128()))
2797
2798#define _mm256_dbsad_epu8(A, B, imm) \
2799  ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2800                                       (__v32qi)(__m256i)(B), (int)(imm)))
2801
2802#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2803  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2804                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2805                                  (__v16hi)(__m256i)(W)))
2806
2807#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2808  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2809                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2810                                  (__v16hi)_mm256_setzero_si256()))
2811
2812static __inline__ short __DEFAULT_FN_ATTRS128
2813_mm_reduce_add_epi16(__m128i __W) {
2814  return __builtin_reduce_add((__v8hi)__W);
2815}
2816
2817static __inline__ short __DEFAULT_FN_ATTRS128
2818_mm_reduce_mul_epi16(__m128i __W) {
2819  return __builtin_reduce_mul((__v8hi)__W);
2820}
2821
2822static __inline__ short __DEFAULT_FN_ATTRS128
2823_mm_reduce_and_epi16(__m128i __W) {
2824  return __builtin_reduce_and((__v8hi)__W);
2825}
2826
2827static __inline__ short __DEFAULT_FN_ATTRS128
2828_mm_reduce_or_epi16(__m128i __W) {
2829  return __builtin_reduce_or((__v8hi)__W);
2830}
2831
2832static __inline__ short __DEFAULT_FN_ATTRS128
2833_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
2834  __W = _mm_maskz_mov_epi16(__M, __W);
2835  return __builtin_reduce_add((__v8hi)__W);
2836}
2837
2838static __inline__ short __DEFAULT_FN_ATTRS128
2839_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
2840  __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
2841  return __builtin_reduce_mul((__v8hi)__W);
2842}
2843
2844static __inline__ short __DEFAULT_FN_ATTRS128
2845_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
2846  __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
2847  return __builtin_reduce_and((__v8hi)__W);
2848}
2849
2850static __inline__ short __DEFAULT_FN_ATTRS128
2851_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
2852  __W = _mm_maskz_mov_epi16(__M, __W);
2853  return __builtin_reduce_or((__v8hi)__W);
2854}
2855
2856static __inline__ short __DEFAULT_FN_ATTRS128
2857_mm_reduce_max_epi16(__m128i __V) {
2858  return __builtin_reduce_max((__v8hi)__V);
2859}
2860
2861static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2862_mm_reduce_max_epu16(__m128i __V) {
2863  return __builtin_reduce_max((__v8hu)__V);
2864}
2865
2866static __inline__ short __DEFAULT_FN_ATTRS128
2867_mm_reduce_min_epi16(__m128i __V) {
2868  return __builtin_reduce_min((__v8hi)__V);
2869}
2870
2871static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2872_mm_reduce_min_epu16(__m128i __V) {
2873  return __builtin_reduce_min((__v8hu)__V);
2874}
2875
2876static __inline__ short __DEFAULT_FN_ATTRS128
2877_mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
2878  __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
2879  return __builtin_reduce_max((__v8hi)__V);
2880}
2881
2882static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2883_mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
2884  __V = _mm_maskz_mov_epi16(__M, __V);
2885  return __builtin_reduce_max((__v8hu)__V);
2886}
2887
2888static __inline__ short __DEFAULT_FN_ATTRS128
2889_mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
2890  __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
2891  return __builtin_reduce_min((__v8hi)__V);
2892}
2893
2894static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2895_mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
2896  __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
2897  return __builtin_reduce_min((__v8hu)__V);
2898}
2899
2900static __inline__ short __DEFAULT_FN_ATTRS256
2901_mm256_reduce_add_epi16(__m256i __W) {
2902  return __builtin_reduce_add((__v16hi)__W);
2903}
2904
2905static __inline__ short __DEFAULT_FN_ATTRS256
2906_mm256_reduce_mul_epi16(__m256i __W) {
2907  return __builtin_reduce_mul((__v16hi)__W);
2908}
2909
2910static __inline__ short __DEFAULT_FN_ATTRS256
2911_mm256_reduce_and_epi16(__m256i __W) {
2912  return __builtin_reduce_and((__v16hi)__W);
2913}
2914
2915static __inline__ short __DEFAULT_FN_ATTRS256
2916_mm256_reduce_or_epi16(__m256i __W) {
2917  return __builtin_reduce_or((__v16hi)__W);
2918}
2919
2920static __inline__ short __DEFAULT_FN_ATTRS256
2921_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
2922  __W = _mm256_maskz_mov_epi16(__M, __W);
2923  return __builtin_reduce_add((__v16hi)__W);
2924}
2925
2926static __inline__ short __DEFAULT_FN_ATTRS256
2927_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
2928  __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
2929  return __builtin_reduce_mul((__v16hi)__W);
2930}
2931
2932static __inline__ short __DEFAULT_FN_ATTRS256
2933_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
2934  __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
2935  return __builtin_reduce_and((__v16hi)__W);
2936}
2937
2938static __inline__ short __DEFAULT_FN_ATTRS256
2939_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
2940  __W = _mm256_maskz_mov_epi16(__M, __W);
2941  return __builtin_reduce_or((__v16hi)__W);
2942}
2943
2944static __inline__ short __DEFAULT_FN_ATTRS256
2945_mm256_reduce_max_epi16(__m256i __V) {
2946  return __builtin_reduce_max((__v16hi)__V);
2947}
2948
2949static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2950_mm256_reduce_max_epu16(__m256i __V) {
2951  return __builtin_reduce_max((__v16hu)__V);
2952}
2953
2954static __inline__ short __DEFAULT_FN_ATTRS256
2955_mm256_reduce_min_epi16(__m256i __V) {
2956  return __builtin_reduce_min((__v16hi)__V);
2957}
2958
2959static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2960_mm256_reduce_min_epu16(__m256i __V) {
2961  return __builtin_reduce_min((__v16hu)__V);
2962}
2963
2964static __inline__ short __DEFAULT_FN_ATTRS256
2965_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
2966  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
2967  return __builtin_reduce_max((__v16hi)__V);
2968}
2969
2970static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2971_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
2972  __V = _mm256_maskz_mov_epi16(__M, __V);
2973  return __builtin_reduce_max((__v16hu)__V);
2974}
2975
2976static __inline__ short __DEFAULT_FN_ATTRS256
2977_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
2978  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
2979  return __builtin_reduce_min((__v16hi)__V);
2980}
2981
2982static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2983_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
2984  __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
2985  return __builtin_reduce_min((__v16hu)__V);
2986}
2987
2988static __inline__ signed char __DEFAULT_FN_ATTRS128
2989_mm_reduce_add_epi8(__m128i __W) {
2990  return __builtin_reduce_add((__v16qs)__W);
2991}
2992
2993static __inline__ signed char __DEFAULT_FN_ATTRS128
2994_mm_reduce_mul_epi8(__m128i __W) {
2995  return __builtin_reduce_mul((__v16qs)__W);
2996}
2997
2998static __inline__ signed char __DEFAULT_FN_ATTRS128
2999_mm_reduce_and_epi8(__m128i __W) {
3000  return __builtin_reduce_and((__v16qs)__W);
3001}
3002
3003static __inline__ signed char __DEFAULT_FN_ATTRS128
3004_mm_reduce_or_epi8(__m128i __W) {
3005  return __builtin_reduce_or((__v16qs)__W);
3006}
3007
3008static __inline__ signed char __DEFAULT_FN_ATTRS128
3009_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
3010  __W = _mm_maskz_mov_epi8(__M, __W);
3011  return __builtin_reduce_add((__v16qs)__W);
3012}
3013
3014static __inline__ signed char __DEFAULT_FN_ATTRS128
3015_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
3016  __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
3017  return __builtin_reduce_mul((__v16qs)__W);
3018}
3019
3020static __inline__ signed char __DEFAULT_FN_ATTRS128
3021_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
3022  __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
3023  return __builtin_reduce_and((__v16qs)__W);
3024}
3025
3026static __inline__ signed char __DEFAULT_FN_ATTRS128
3027_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
3028  __W = _mm_maskz_mov_epi8(__M, __W);
3029  return __builtin_reduce_or((__v16qs)__W);
3030}
3031
3032static __inline__ signed char __DEFAULT_FN_ATTRS128
3033_mm_reduce_max_epi8(__m128i __V) {
3034  return __builtin_reduce_max((__v16qs)__V);
3035}
3036
3037static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3038_mm_reduce_max_epu8(__m128i __V) {
3039  return __builtin_reduce_max((__v16qu)__V);
3040}
3041
3042static __inline__ signed char __DEFAULT_FN_ATTRS128
3043_mm_reduce_min_epi8(__m128i __V) {
3044  return __builtin_reduce_min((__v16qs)__V);
3045}
3046
3047static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3048_mm_reduce_min_epu8(__m128i __V) {
3049  return __builtin_reduce_min((__v16qu)__V);
3050}
3051
3052static __inline__ signed char __DEFAULT_FN_ATTRS128
3053_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
3054  __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
3055  return __builtin_reduce_max((__v16qs)__V);
3056}
3057
3058static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3059_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
3060  __V = _mm_maskz_mov_epi8(__M, __V);
3061  return __builtin_reduce_max((__v16qu)__V);
3062}
3063
3064static __inline__ signed char __DEFAULT_FN_ATTRS128
3065_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
3066  __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
3067  return __builtin_reduce_min((__v16qs)__V);
3068}
3069
3070static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3071_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
3072  __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
3073  return __builtin_reduce_min((__v16qu)__V);
3074}
3075
3076static __inline__ signed char __DEFAULT_FN_ATTRS256
3077_mm256_reduce_add_epi8(__m256i __W) {
3078  return __builtin_reduce_add((__v32qs)__W);
3079}
3080
3081static __inline__ signed char __DEFAULT_FN_ATTRS256
3082_mm256_reduce_mul_epi8(__m256i __W) {
3083  return __builtin_reduce_mul((__v32qs)__W);
3084}
3085
3086static __inline__ signed char __DEFAULT_FN_ATTRS256
3087_mm256_reduce_and_epi8(__m256i __W) {
3088  return __builtin_reduce_and((__v32qs)__W);
3089}
3090
3091static __inline__ signed char __DEFAULT_FN_ATTRS256
3092_mm256_reduce_or_epi8(__m256i __W) {
3093  return __builtin_reduce_or((__v32qs)__W);
3094}
3095
3096static __inline__ signed char __DEFAULT_FN_ATTRS256
3097_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
3098  __W = _mm256_maskz_mov_epi8(__M, __W);
3099  return __builtin_reduce_add((__v32qs)__W);
3100}
3101
3102static __inline__ signed char __DEFAULT_FN_ATTRS256
3103_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
3104  __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
3105  return __builtin_reduce_mul((__v32qs)__W);
3106}
3107
3108static __inline__ signed char __DEFAULT_FN_ATTRS256
3109_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
3110  __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
3111  return __builtin_reduce_and((__v32qs)__W);
3112}
3113
3114static __inline__ signed char __DEFAULT_FN_ATTRS256
3115_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
3116  __W = _mm256_maskz_mov_epi8(__M, __W);
3117  return __builtin_reduce_or((__v32qs)__W);
3118}
3119
3120static __inline__ signed char __DEFAULT_FN_ATTRS256
3121_mm256_reduce_max_epi8(__m256i __V) {
3122  return __builtin_reduce_max((__v32qs)__V);
3123}
3124
3125static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3126_mm256_reduce_max_epu8(__m256i __V) {
3127  return __builtin_reduce_max((__v32qu)__V);
3128}
3129
3130static __inline__ signed char __DEFAULT_FN_ATTRS256
3131_mm256_reduce_min_epi8(__m256i __V) {
3132  return __builtin_reduce_min((__v32qs)__V);
3133}
3134
3135static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3136_mm256_reduce_min_epu8(__m256i __V) {
3137  return __builtin_reduce_min((__v32qu)__V);
3138}
3139
3140static __inline__ signed char __DEFAULT_FN_ATTRS256
3141_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
3142  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
3143  return __builtin_reduce_max((__v32qs)__V);
3144}
3145
3146static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3147_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
3148  __V = _mm256_maskz_mov_epi8(__M, __V);
3149  return __builtin_reduce_max((__v32qu)__V);
3150}
3151
3152static __inline__ signed char __DEFAULT_FN_ATTRS256
3153_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
3154  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
3155  return __builtin_reduce_min((__v32qs)__V);
3156}
3157
3158static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3159_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
3160  __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
3161  return __builtin_reduce_min((__v32qu)__V);
3162}
3163
3164#undef __DEFAULT_FN_ATTRS128
3165#undef __DEFAULT_FN_ATTRS256
3166
3167#endif /* __AVX512VLBWINTRIN_H */