master
   1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
   2 *
   3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 * See https://llvm.org/LICENSE.txt for license information.
   5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 *
   7 *===-----------------------------------------------------------------------===
   8 */
   9
  10#ifndef __IMMINTRIN_H
  11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
  12#endif
  13
  14#ifndef __AVX512VLINTRIN_H
  15#define __AVX512VLINTRIN_H
  16
  17#define __DEFAULT_FN_ATTRS128                                                  \
  18  __attribute__((__always_inline__, __nodebug__,                               \
  19                 __target__("avx512vl,no-evex512"),                            \
  20                 __min_vector_width__(128)))
  21#define __DEFAULT_FN_ATTRS256                                                  \
  22  __attribute__((__always_inline__, __nodebug__,                               \
  23                 __target__("avx512vl,no-evex512"),                            \
  24                 __min_vector_width__(256)))
  25
  26typedef short __v2hi __attribute__((__vector_size__(4)));
  27typedef char __v4qi __attribute__((__vector_size__(4)));
  28typedef char __v2qi __attribute__((__vector_size__(2)));
  29
  30/* Integer compare */
  31
  32#define _mm_cmpeq_epi32_mask(A, B) \
  33    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
  34#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
  35    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
  36#define _mm_cmpge_epi32_mask(A, B) \
  37    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
  38#define _mm_mask_cmpge_epi32_mask(k, A, B) \
  39    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
  40#define _mm_cmpgt_epi32_mask(A, B) \
  41    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
  42#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
  43    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
  44#define _mm_cmple_epi32_mask(A, B) \
  45    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
  46#define _mm_mask_cmple_epi32_mask(k, A, B) \
  47    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
  48#define _mm_cmplt_epi32_mask(A, B) \
  49    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
  50#define _mm_mask_cmplt_epi32_mask(k, A, B) \
  51    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
  52#define _mm_cmpneq_epi32_mask(A, B) \
  53    _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
  54#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
  55    _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
  56
  57#define _mm256_cmpeq_epi32_mask(A, B) \
  58    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
  59#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
  60    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
  61#define _mm256_cmpge_epi32_mask(A, B) \
  62    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
  63#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
  64    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
  65#define _mm256_cmpgt_epi32_mask(A, B) \
  66    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
  67#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
  68    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
  69#define _mm256_cmple_epi32_mask(A, B) \
  70    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
  71#define _mm256_mask_cmple_epi32_mask(k, A, B) \
  72    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
  73#define _mm256_cmplt_epi32_mask(A, B) \
  74    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
  75#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
  76    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
  77#define _mm256_cmpneq_epi32_mask(A, B) \
  78    _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
  79#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
  80    _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
  81
  82#define _mm_cmpeq_epu32_mask(A, B) \
  83    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
  84#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
  85    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
  86#define _mm_cmpge_epu32_mask(A, B) \
  87    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
  88#define _mm_mask_cmpge_epu32_mask(k, A, B) \
  89    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
  90#define _mm_cmpgt_epu32_mask(A, B) \
  91    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
  92#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
  93    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
  94#define _mm_cmple_epu32_mask(A, B) \
  95    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
  96#define _mm_mask_cmple_epu32_mask(k, A, B) \
  97    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
  98#define _mm_cmplt_epu32_mask(A, B) \
  99    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
 100#define _mm_mask_cmplt_epu32_mask(k, A, B) \
 101    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
 102#define _mm_cmpneq_epu32_mask(A, B) \
 103    _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
 104#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
 105    _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
 106
 107#define _mm256_cmpeq_epu32_mask(A, B) \
 108    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
 109#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
 110    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
 111#define _mm256_cmpge_epu32_mask(A, B) \
 112    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
 113#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
 114    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
 115#define _mm256_cmpgt_epu32_mask(A, B) \
 116    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
 117#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
 118    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
 119#define _mm256_cmple_epu32_mask(A, B) \
 120    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
 121#define _mm256_mask_cmple_epu32_mask(k, A, B) \
 122    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
 123#define _mm256_cmplt_epu32_mask(A, B) \
 124    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
 125#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
 126    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
 127#define _mm256_cmpneq_epu32_mask(A, B) \
 128    _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
 129#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
 130    _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
 131
 132#define _mm_cmpeq_epi64_mask(A, B) \
 133    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
 134#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
 135    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
 136#define _mm_cmpge_epi64_mask(A, B) \
 137    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
 138#define _mm_mask_cmpge_epi64_mask(k, A, B) \
 139    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
 140#define _mm_cmpgt_epi64_mask(A, B) \
 141    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
 142#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
 143    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
 144#define _mm_cmple_epi64_mask(A, B) \
 145    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
 146#define _mm_mask_cmple_epi64_mask(k, A, B) \
 147    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
 148#define _mm_cmplt_epi64_mask(A, B) \
 149    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
 150#define _mm_mask_cmplt_epi64_mask(k, A, B) \
 151    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
 152#define _mm_cmpneq_epi64_mask(A, B) \
 153    _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
 154#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
 155    _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
 156
 157#define _mm256_cmpeq_epi64_mask(A, B) \
 158    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
 159#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
 160    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
 161#define _mm256_cmpge_epi64_mask(A, B) \
 162    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
 163#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
 164    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
 165#define _mm256_cmpgt_epi64_mask(A, B) \
 166    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
 167#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
 168    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
 169#define _mm256_cmple_epi64_mask(A, B) \
 170    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
 171#define _mm256_mask_cmple_epi64_mask(k, A, B) \
 172    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
 173#define _mm256_cmplt_epi64_mask(A, B) \
 174    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
 175#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
 176    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
 177#define _mm256_cmpneq_epi64_mask(A, B) \
 178    _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
 179#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
 180    _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
 181
 182#define _mm_cmpeq_epu64_mask(A, B) \
 183    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
 184#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
 185    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
 186#define _mm_cmpge_epu64_mask(A, B) \
 187    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
 188#define _mm_mask_cmpge_epu64_mask(k, A, B) \
 189    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
 190#define _mm_cmpgt_epu64_mask(A, B) \
 191    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
 192#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
 193    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
 194#define _mm_cmple_epu64_mask(A, B) \
 195    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
 196#define _mm_mask_cmple_epu64_mask(k, A, B) \
 197    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
 198#define _mm_cmplt_epu64_mask(A, B) \
 199    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
 200#define _mm_mask_cmplt_epu64_mask(k, A, B) \
 201    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
 202#define _mm_cmpneq_epu64_mask(A, B) \
 203    _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
 204#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
 205    _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 206
 207#define _mm256_cmpeq_epu64_mask(A, B) \
 208    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
 209#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
 210    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
 211#define _mm256_cmpge_epu64_mask(A, B) \
 212    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
 213#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
 214    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
 215#define _mm256_cmpgt_epu64_mask(A, B) \
 216    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
 217#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
 218    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
 219#define _mm256_cmple_epu64_mask(A, B) \
 220    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
 221#define _mm256_mask_cmple_epu64_mask(k, A, B) \
 222    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
 223#define _mm256_cmplt_epu64_mask(A, B) \
 224    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
 225#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
 226    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
 227#define _mm256_cmpneq_epu64_mask(A, B) \
 228    _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
 229#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
 230    _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 231
 232static __inline__ __m256i __DEFAULT_FN_ATTRS256
 233_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 234{
 235  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 236                                             (__v8si)_mm256_add_epi32(__A, __B),
 237                                             (__v8si)__W);
 238}
 239
 240static __inline__ __m256i __DEFAULT_FN_ATTRS256
 241_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 242{
 243  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 244                                             (__v8si)_mm256_add_epi32(__A, __B),
 245                                             (__v8si)_mm256_setzero_si256());
 246}
 247
 248static __inline__ __m256i __DEFAULT_FN_ATTRS256
 249_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 250{
 251  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 252                                             (__v4di)_mm256_add_epi64(__A, __B),
 253                                             (__v4di)__W);
 254}
 255
 256static __inline__ __m256i __DEFAULT_FN_ATTRS256
 257_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 258{
 259  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 260                                             (__v4di)_mm256_add_epi64(__A, __B),
 261                                             (__v4di)_mm256_setzero_si256());
 262}
 263
 264static __inline__ __m256i __DEFAULT_FN_ATTRS256
 265_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 266{
 267  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 268                                             (__v8si)_mm256_sub_epi32(__A, __B),
 269                                             (__v8si)__W);
 270}
 271
 272static __inline__ __m256i __DEFAULT_FN_ATTRS256
 273_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 274{
 275  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 276                                             (__v8si)_mm256_sub_epi32(__A, __B),
 277                                             (__v8si)_mm256_setzero_si256());
 278}
 279
 280static __inline__ __m256i __DEFAULT_FN_ATTRS256
 281_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 282{
 283  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 284                                             (__v4di)_mm256_sub_epi64(__A, __B),
 285                                             (__v4di)__W);
 286}
 287
 288static __inline__ __m256i __DEFAULT_FN_ATTRS256
 289_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 290{
 291  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 292                                             (__v4di)_mm256_sub_epi64(__A, __B),
 293                                             (__v4di)_mm256_setzero_si256());
 294}
 295
 296static __inline__ __m128i __DEFAULT_FN_ATTRS128
 297_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 298{
 299  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 300                                             (__v4si)_mm_add_epi32(__A, __B),
 301                                             (__v4si)__W);
 302}
 303
 304static __inline__ __m128i __DEFAULT_FN_ATTRS128
 305_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 306{
 307  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 308                                             (__v4si)_mm_add_epi32(__A, __B),
 309                                             (__v4si)_mm_setzero_si128());
 310}
 311
 312static __inline__ __m128i __DEFAULT_FN_ATTRS128
 313_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 314{
 315  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 316                                             (__v2di)_mm_add_epi64(__A, __B),
 317                                             (__v2di)__W);
 318}
 319
 320static __inline__ __m128i __DEFAULT_FN_ATTRS128
 321_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 322{
 323  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 324                                             (__v2di)_mm_add_epi64(__A, __B),
 325                                             (__v2di)_mm_setzero_si128());
 326}
 327
 328static __inline__ __m128i __DEFAULT_FN_ATTRS128
 329_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 330{
 331  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 332                                             (__v4si)_mm_sub_epi32(__A, __B),
 333                                             (__v4si)__W);
 334}
 335
 336static __inline__ __m128i __DEFAULT_FN_ATTRS128
 337_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 338{
 339  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 340                                             (__v4si)_mm_sub_epi32(__A, __B),
 341                                             (__v4si)_mm_setzero_si128());
 342}
 343
 344static __inline__ __m128i __DEFAULT_FN_ATTRS128
 345_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 346{
 347  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 348                                             (__v2di)_mm_sub_epi64(__A, __B),
 349                                             (__v2di)__W);
 350}
 351
 352static __inline__ __m128i __DEFAULT_FN_ATTRS128
 353_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 354{
 355  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 356                                             (__v2di)_mm_sub_epi64(__A, __B),
 357                                             (__v2di)_mm_setzero_si128());
 358}
 359
 360static __inline__ __m256i __DEFAULT_FN_ATTRS256
 361_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
 362{
 363  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
 364                                             (__v4di)_mm256_mul_epi32(__X, __Y),
 365                                             (__v4di)__W);
 366}
 367
 368static __inline__ __m256i __DEFAULT_FN_ATTRS256
 369_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
 370{
 371  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
 372                                             (__v4di)_mm256_mul_epi32(__X, __Y),
 373                                             (__v4di)_mm256_setzero_si256());
 374}
 375
 376static __inline__ __m128i __DEFAULT_FN_ATTRS128
 377_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
 378{
 379  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
 380                                             (__v2di)_mm_mul_epi32(__X, __Y),
 381                                             (__v2di)__W);
 382}
 383
 384static __inline__ __m128i __DEFAULT_FN_ATTRS128
 385_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
 386{
 387  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
 388                                             (__v2di)_mm_mul_epi32(__X, __Y),
 389                                             (__v2di)_mm_setzero_si128());
 390}
 391
 392static __inline__ __m256i __DEFAULT_FN_ATTRS256
 393_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
 394{
 395  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
 396                                             (__v4di)_mm256_mul_epu32(__X, __Y),
 397                                             (__v4di)__W);
 398}
 399
 400static __inline__ __m256i __DEFAULT_FN_ATTRS256
 401_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
 402{
 403  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
 404                                             (__v4di)_mm256_mul_epu32(__X, __Y),
 405                                             (__v4di)_mm256_setzero_si256());
 406}
 407
 408static __inline__ __m128i __DEFAULT_FN_ATTRS128
 409_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
 410{
 411  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
 412                                             (__v2di)_mm_mul_epu32(__X, __Y),
 413                                             (__v2di)__W);
 414}
 415
 416static __inline__ __m128i __DEFAULT_FN_ATTRS128
 417_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
 418{
 419  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
 420                                             (__v2di)_mm_mul_epu32(__X, __Y),
 421                                             (__v2di)_mm_setzero_si128());
 422}
 423
 424static __inline__ __m256i __DEFAULT_FN_ATTRS256
 425_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
 426{
 427  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
 428                                             (__v8si)_mm256_mullo_epi32(__A, __B),
 429                                             (__v8si)_mm256_setzero_si256());
 430}
 431
 432static __inline__ __m256i __DEFAULT_FN_ATTRS256
 433_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
 434{
 435  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
 436                                             (__v8si)_mm256_mullo_epi32(__A, __B),
 437                                             (__v8si)__W);
 438}
 439
 440static __inline__ __m128i __DEFAULT_FN_ATTRS128
 441_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
 442{
 443  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
 444                                             (__v4si)_mm_mullo_epi32(__A, __B),
 445                                             (__v4si)_mm_setzero_si128());
 446}
 447
 448static __inline__ __m128i __DEFAULT_FN_ATTRS128
 449_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 450{
 451  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
 452                                             (__v4si)_mm_mullo_epi32(__A, __B),
 453                                             (__v4si)__W);
 454}
 455
 456static __inline__ __m256i __DEFAULT_FN_ATTRS256
 457_mm256_and_epi32(__m256i __a, __m256i __b)
 458{
 459  return (__m256i)((__v8su)__a & (__v8su)__b);
 460}
 461
 462static __inline__ __m256i __DEFAULT_FN_ATTRS256
 463_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 464{
 465  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 466                                             (__v8si)_mm256_and_epi32(__A, __B),
 467                                             (__v8si)__W);
 468}
 469
 470static __inline__ __m256i __DEFAULT_FN_ATTRS256
 471_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 472{
 473  return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
 474}
 475
 476static __inline__ __m128i __DEFAULT_FN_ATTRS128
 477_mm_and_epi32(__m128i __a, __m128i __b)
 478{
 479  return (__m128i)((__v4su)__a & (__v4su)__b);
 480}
 481
 482static __inline__ __m128i __DEFAULT_FN_ATTRS128
 483_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 484{
 485  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 486                                             (__v4si)_mm_and_epi32(__A, __B),
 487                                             (__v4si)__W);
 488}
 489
 490static __inline__ __m128i __DEFAULT_FN_ATTRS128
 491_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 492{
 493  return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
 494}
 495
 496static __inline__ __m256i __DEFAULT_FN_ATTRS256
 497_mm256_andnot_epi32(__m256i __A, __m256i __B)
 498{
 499  return (__m256i)(~(__v8su)__A & (__v8su)__B);
 500}
 501
 502static __inline__ __m256i __DEFAULT_FN_ATTRS256
 503_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 504{
 505  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 506                                          (__v8si)_mm256_andnot_epi32(__A, __B),
 507                                          (__v8si)__W);
 508}
 509
 510static __inline__ __m256i __DEFAULT_FN_ATTRS256
 511_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 512{
 513  return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
 514                                           __U, __A, __B);
 515}
 516
 517static __inline__ __m128i __DEFAULT_FN_ATTRS128
 518_mm_andnot_epi32(__m128i __A, __m128i __B)
 519{
 520  return (__m128i)(~(__v4su)__A & (__v4su)__B);
 521}
 522
 523static __inline__ __m128i __DEFAULT_FN_ATTRS128
 524_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 525{
 526  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 527                                             (__v4si)_mm_andnot_epi32(__A, __B),
 528                                             (__v4si)__W);
 529}
 530
 531static __inline__ __m128i __DEFAULT_FN_ATTRS128
 532_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 533{
 534  return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
 535}
 536
 537static __inline__ __m256i __DEFAULT_FN_ATTRS256
 538_mm256_or_epi32(__m256i __a, __m256i __b)
 539{
 540  return (__m256i)((__v8su)__a | (__v8su)__b);
 541}
 542
 543static __inline__ __m256i __DEFAULT_FN_ATTRS256
 544_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 545{
 546  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 547                                             (__v8si)_mm256_or_epi32(__A, __B),
 548                                             (__v8si)__W);
 549}
 550
 551static __inline__ __m256i __DEFAULT_FN_ATTRS256
 552_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 553{
 554  return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
 555}
 556
 557static __inline__ __m128i __DEFAULT_FN_ATTRS128
 558_mm_or_epi32(__m128i __a, __m128i __b)
 559{
 560  return (__m128i)((__v4su)__a | (__v4su)__b);
 561}
 562
 563static __inline__ __m128i __DEFAULT_FN_ATTRS128
 564_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 565{
 566  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 567                                             (__v4si)_mm_or_epi32(__A, __B),
 568                                             (__v4si)__W);
 569}
 570
 571static __inline__ __m128i __DEFAULT_FN_ATTRS128
 572_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 573{
 574  return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
 575}
 576
 577static __inline__ __m256i __DEFAULT_FN_ATTRS256
 578_mm256_xor_epi32(__m256i __a, __m256i __b)
 579{
 580  return (__m256i)((__v8su)__a ^ (__v8su)__b);
 581}
 582
 583static __inline__ __m256i __DEFAULT_FN_ATTRS256
 584_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 585{
 586  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
 587                                             (__v8si)_mm256_xor_epi32(__A, __B),
 588                                             (__v8si)__W);
 589}
 590
 591static __inline__ __m256i __DEFAULT_FN_ATTRS256
 592_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 593{
 594  return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
 595}
 596
 597static __inline__ __m128i __DEFAULT_FN_ATTRS128
 598_mm_xor_epi32(__m128i __a, __m128i __b)
 599{
 600  return (__m128i)((__v4su)__a ^ (__v4su)__b);
 601}
 602
 603static __inline__ __m128i __DEFAULT_FN_ATTRS128
 604_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 605{
 606  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
 607                                             (__v4si)_mm_xor_epi32(__A, __B),
 608                                             (__v4si)__W);
 609}
 610
 611static __inline__ __m128i __DEFAULT_FN_ATTRS128
 612_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 613{
 614  return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
 615}
 616
 617static __inline__ __m256i __DEFAULT_FN_ATTRS256
 618_mm256_and_epi64(__m256i __a, __m256i __b)
 619{
 620  return (__m256i)((__v4du)__a & (__v4du)__b);
 621}
 622
 623static __inline__ __m256i __DEFAULT_FN_ATTRS256
 624_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 625{
 626  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 627                                             (__v4di)_mm256_and_epi64(__A, __B),
 628                                             (__v4di)__W);
 629}
 630
 631static __inline__ __m256i __DEFAULT_FN_ATTRS256
 632_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 633{
 634  return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
 635}
 636
 637static __inline__ __m128i __DEFAULT_FN_ATTRS128
 638_mm_and_epi64(__m128i __a, __m128i __b)
 639{
 640  return (__m128i)((__v2du)__a & (__v2du)__b);
 641}
 642
 643static __inline__ __m128i __DEFAULT_FN_ATTRS128
 644_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 645{
 646  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 647                                             (__v2di)_mm_and_epi64(__A, __B),
 648                                             (__v2di)__W);
 649}
 650
 651static __inline__ __m128i __DEFAULT_FN_ATTRS128
 652_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 653{
 654  return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
 655}
 656
 657static __inline__ __m256i __DEFAULT_FN_ATTRS256
 658_mm256_andnot_epi64(__m256i __A, __m256i __B)
 659{
 660  return (__m256i)(~(__v4du)__A & (__v4du)__B);
 661}
 662
 663static __inline__ __m256i __DEFAULT_FN_ATTRS256
 664_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 665{
 666  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 667                                          (__v4di)_mm256_andnot_epi64(__A, __B),
 668                                          (__v4di)__W);
 669}
 670
 671static __inline__ __m256i __DEFAULT_FN_ATTRS256
 672_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 673{
 674  return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
 675                                           __U, __A, __B);
 676}
 677
 678static __inline__ __m128i __DEFAULT_FN_ATTRS128
 679_mm_andnot_epi64(__m128i __A, __m128i __B)
 680{
 681  return (__m128i)(~(__v2du)__A & (__v2du)__B);
 682}
 683
 684static __inline__ __m128i __DEFAULT_FN_ATTRS128
 685_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 686{
 687  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 688                                             (__v2di)_mm_andnot_epi64(__A, __B),
 689                                             (__v2di)__W);
 690}
 691
 692static __inline__ __m128i __DEFAULT_FN_ATTRS128
 693_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 694{
 695  return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
 696}
 697
 698static __inline__ __m256i __DEFAULT_FN_ATTRS256
 699_mm256_or_epi64(__m256i __a, __m256i __b)
 700{
 701  return (__m256i)((__v4du)__a | (__v4du)__b);
 702}
 703
 704static __inline__ __m256i __DEFAULT_FN_ATTRS256
 705_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 706{
 707  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 708                                             (__v4di)_mm256_or_epi64(__A, __B),
 709                                             (__v4di)__W);
 710}
 711
 712static __inline__ __m256i __DEFAULT_FN_ATTRS256
 713_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 714{
 715  return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
 716}
 717
 718static __inline__ __m128i __DEFAULT_FN_ATTRS128
 719_mm_or_epi64(__m128i __a, __m128i __b)
 720{
 721  return (__m128i)((__v2du)__a | (__v2du)__b);
 722}
 723
 724static __inline__ __m128i __DEFAULT_FN_ATTRS128
 725_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 726{
 727  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 728                                             (__v2di)_mm_or_epi64(__A, __B),
 729                                             (__v2di)__W);
 730}
 731
 732static __inline__ __m128i __DEFAULT_FN_ATTRS128
 733_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 734{
 735  return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
 736}
 737
 738static __inline__ __m256i __DEFAULT_FN_ATTRS256
 739_mm256_xor_epi64(__m256i __a, __m256i __b)
 740{
 741  return (__m256i)((__v4du)__a ^ (__v4du)__b);
 742}
 743
 744static __inline__ __m256i __DEFAULT_FN_ATTRS256
 745_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 746{
 747  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
 748                                             (__v4di)_mm256_xor_epi64(__A, __B),
 749                                             (__v4di)__W);
 750}
 751
 752static __inline__ __m256i __DEFAULT_FN_ATTRS256
 753_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 754{
 755  return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
 756}
 757
 758static __inline__ __m128i __DEFAULT_FN_ATTRS128
 759_mm_xor_epi64(__m128i __a, __m128i __b)
 760{
 761  return (__m128i)((__v2du)__a ^ (__v2du)__b);
 762}
 763
 764static __inline__ __m128i __DEFAULT_FN_ATTRS128
 765_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
 766        __m128i __B)
 767{
 768  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
 769                                             (__v2di)_mm_xor_epi64(__A, __B),
 770                                             (__v2di)__W);
 771}
 772
 773static __inline__ __m128i __DEFAULT_FN_ATTRS128
 774_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 775{
 776  return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
 777}
 778
 779#define _mm_cmp_epi32_mask(a, b, p) \
 780  ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
 781                                         (__v4si)(__m128i)(b), (int)(p), \
 782                                         (__mmask8)-1))
 783
 784#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
 785  ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
 786                                         (__v4si)(__m128i)(b), (int)(p), \
 787                                         (__mmask8)(m)))
 788
 789#define _mm_cmp_epu32_mask(a, b, p) \
 790  ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
 791                                          (__v4si)(__m128i)(b), (int)(p), \
 792                                          (__mmask8)-1))
 793
 794#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
 795  ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
 796                                          (__v4si)(__m128i)(b), (int)(p), \
 797                                          (__mmask8)(m)))
 798
 799#define _mm256_cmp_epi32_mask(a, b, p) \
 800  ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
 801                                         (__v8si)(__m256i)(b), (int)(p), \
 802                                         (__mmask8)-1))
 803
 804#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
 805  ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
 806                                         (__v8si)(__m256i)(b), (int)(p), \
 807                                         (__mmask8)(m)))
 808
 809#define _mm256_cmp_epu32_mask(a, b, p) \
 810  ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
 811                                          (__v8si)(__m256i)(b), (int)(p), \
 812                                          (__mmask8)-1))
 813
 814#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
 815  ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
 816                                          (__v8si)(__m256i)(b), (int)(p), \
 817                                          (__mmask8)(m)))
 818
 819#define _mm_cmp_epi64_mask(a, b, p) \
 820  ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
 821                                         (__v2di)(__m128i)(b), (int)(p), \
 822                                         (__mmask8)-1))
 823
 824#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
 825  ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
 826                                         (__v2di)(__m128i)(b), (int)(p), \
 827                                         (__mmask8)(m)))
 828
 829#define _mm_cmp_epu64_mask(a, b, p) \
 830  ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
 831                                          (__v2di)(__m128i)(b), (int)(p), \
 832                                          (__mmask8)-1))
 833
 834#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
 835  ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
 836                                          (__v2di)(__m128i)(b), (int)(p), \
 837                                          (__mmask8)(m)))
 838
 839#define _mm256_cmp_epi64_mask(a, b, p) \
 840  ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
 841                                         (__v4di)(__m256i)(b), (int)(p), \
 842                                         (__mmask8)-1))
 843
 844#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
 845  ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
 846                                         (__v4di)(__m256i)(b), (int)(p), \
 847                                         (__mmask8)(m)))
 848
 849#define _mm256_cmp_epu64_mask(a, b, p) \
 850  ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
 851                                          (__v4di)(__m256i)(b), (int)(p), \
 852                                          (__mmask8)-1))
 853
 854#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
 855  ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
 856                                          (__v4di)(__m256i)(b), (int)(p), \
 857                                          (__mmask8)(m)))
 858
 859#define _mm256_cmp_ps_mask(a, b, p)  \
 860  ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
 861                                          (__v8sf)(__m256)(b), (int)(p), \
 862                                          (__mmask8)-1))
 863
 864#define _mm256_mask_cmp_ps_mask(m, a, b, p)  \
 865  ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
 866                                          (__v8sf)(__m256)(b), (int)(p), \
 867                                          (__mmask8)(m)))
 868
 869#define _mm256_cmp_pd_mask(a, b, p)  \
 870  ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
 871                                          (__v4df)(__m256d)(b), (int)(p), \
 872                                          (__mmask8)-1))
 873
 874#define _mm256_mask_cmp_pd_mask(m, a, b, p)  \
 875  ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
 876                                          (__v4df)(__m256d)(b), (int)(p), \
 877                                          (__mmask8)(m)))
 878
 879#define _mm_cmp_ps_mask(a, b, p)  \
 880  ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
 881                                          (__v4sf)(__m128)(b), (int)(p), \
 882                                          (__mmask8)-1))
 883
 884#define _mm_mask_cmp_ps_mask(m, a, b, p)  \
 885  ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
 886                                          (__v4sf)(__m128)(b), (int)(p), \
 887                                          (__mmask8)(m)))
 888
 889#define _mm_cmp_pd_mask(a, b, p)  \
 890  ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
 891                                          (__v2df)(__m128d)(b), (int)(p), \
 892                                          (__mmask8)-1))
 893
 894#define _mm_mask_cmp_pd_mask(m, a, b, p)  \
 895  ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
 896                                          (__v2df)(__m128d)(b), (int)(p), \
 897                                          (__mmask8)(m)))
 898
 899static __inline__ __m128d __DEFAULT_FN_ATTRS128
 900_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 901{
 902  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 903                    __builtin_ia32_vfmaddpd ((__v2df) __A,
 904                                             (__v2df) __B,
 905                                             (__v2df) __C),
 906                    (__v2df) __A);
 907}
 908
 909static __inline__ __m128d __DEFAULT_FN_ATTRS128
 910_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 911{
 912  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 913                    __builtin_ia32_vfmaddpd ((__v2df) __A,
 914                                             (__v2df) __B,
 915                                             (__v2df) __C),
 916                    (__v2df) __C);
 917}
 918
 919static __inline__ __m128d __DEFAULT_FN_ATTRS128
 920_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 921{
 922  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 923                    __builtin_ia32_vfmaddpd ((__v2df) __A,
 924                                             (__v2df) __B,
 925                                             (__v2df) __C),
 926                    (__v2df)_mm_setzero_pd());
 927}
 928
 929static __inline__ __m128d __DEFAULT_FN_ATTRS128
 930_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 931{
 932  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 933                    __builtin_ia32_vfmaddpd ((__v2df) __A,
 934                                             (__v2df) __B,
 935                                             -(__v2df) __C),
 936                    (__v2df) __A);
 937}
 938
 939static __inline__ __m128d __DEFAULT_FN_ATTRS128
 940_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 941{
 942  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 943                    __builtin_ia32_vfmaddpd ((__v2df) __A,
 944                                             (__v2df) __B,
 945                                             -(__v2df) __C),
 946                    (__v2df)_mm_setzero_pd());
 947}
 948
 949static __inline__ __m128d __DEFAULT_FN_ATTRS128
 950_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 951{
 952  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 953                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
 954                                             (__v2df) __B,
 955                                             (__v2df) __C),
 956                    (__v2df) __C);
 957}
 958
 959static __inline__ __m128d __DEFAULT_FN_ATTRS128
 960_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 961{
 962  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 963                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
 964                                             (__v2df) __B,
 965                                             (__v2df) __C),
 966                    (__v2df)_mm_setzero_pd());
 967}
 968
 969static __inline__ __m128d __DEFAULT_FN_ATTRS128
 970_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 971{
 972  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
 973                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
 974                                             (__v2df) __B,
 975                                             -(__v2df) __C),
 976                    (__v2df)_mm_setzero_pd());
 977}
 978
 979static __inline__ __m256d __DEFAULT_FN_ATTRS256
 980_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
 981{
 982  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
 983                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
 984                                                (__v4df) __B,
 985                                                (__v4df) __C),
 986                    (__v4df) __A);
 987}
 988
 989static __inline__ __m256d __DEFAULT_FN_ATTRS256
 990_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
 991{
 992  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
 993                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
 994                                                (__v4df) __B,
 995                                                (__v4df) __C),
 996                    (__v4df) __C);
 997}
 998
 999static __inline__ __m256d __DEFAULT_FN_ATTRS256
1000_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1001{
1002  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1003                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1004                                                (__v4df) __B,
1005                                                (__v4df) __C),
1006                    (__v4df)_mm256_setzero_pd());
1007}
1008
1009static __inline__ __m256d __DEFAULT_FN_ATTRS256
1010_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1011{
1012  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1013                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1014                                                (__v4df) __B,
1015                                                -(__v4df) __C),
1016                    (__v4df) __A);
1017}
1018
1019static __inline__ __m256d __DEFAULT_FN_ATTRS256
1020_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1021{
1022  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1023                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1024                                                (__v4df) __B,
1025                                                -(__v4df) __C),
1026                    (__v4df)_mm256_setzero_pd());
1027}
1028
1029static __inline__ __m256d __DEFAULT_FN_ATTRS256
1030_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1031{
1032  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1033                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1034                                                (__v4df) __B,
1035                                                (__v4df) __C),
1036                    (__v4df) __C);
1037}
1038
1039static __inline__ __m256d __DEFAULT_FN_ATTRS256
1040_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1041{
1042  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1043                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1044                                                (__v4df) __B,
1045                                                (__v4df) __C),
1046                    (__v4df)_mm256_setzero_pd());
1047}
1048
1049static __inline__ __m256d __DEFAULT_FN_ATTRS256
1050_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1051{
1052  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1053                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1054                                                (__v4df) __B,
1055                                                -(__v4df) __C),
1056                    (__v4df)_mm256_setzero_pd());
1057}
1058
1059static __inline__ __m128 __DEFAULT_FN_ATTRS128
1060_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1061{
1062  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1063                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1064                                             (__v4sf) __B,
1065                                             (__v4sf) __C),
1066                    (__v4sf) __A);
1067}
1068
1069static __inline__ __m128 __DEFAULT_FN_ATTRS128
1070_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1071{
1072  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1073                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1074                                             (__v4sf) __B,
1075                                             (__v4sf) __C),
1076                    (__v4sf) __C);
1077}
1078
1079static __inline__ __m128 __DEFAULT_FN_ATTRS128
1080_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1081{
1082  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1083                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1084                                             (__v4sf) __B,
1085                                             (__v4sf) __C),
1086                    (__v4sf)_mm_setzero_ps());
1087}
1088
1089static __inline__ __m128 __DEFAULT_FN_ATTRS128
1090_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1091{
1092  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1093                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1094                                             (__v4sf) __B,
1095                                             -(__v4sf) __C),
1096                    (__v4sf) __A);
1097}
1098
1099static __inline__ __m128 __DEFAULT_FN_ATTRS128
1100_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1101{
1102  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1103                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1104                                             (__v4sf) __B,
1105                                             -(__v4sf) __C),
1106                    (__v4sf)_mm_setzero_ps());
1107}
1108
1109static __inline__ __m128 __DEFAULT_FN_ATTRS128
1110_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1111{
1112  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1113                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1114                                             (__v4sf) __B,
1115                                             (__v4sf) __C),
1116                    (__v4sf) __C);
1117}
1118
1119static __inline__ __m128 __DEFAULT_FN_ATTRS128
1120_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1121{
1122  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1123                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1124                                             (__v4sf) __B,
1125                                             (__v4sf) __C),
1126                    (__v4sf)_mm_setzero_ps());
1127}
1128
1129static __inline__ __m128 __DEFAULT_FN_ATTRS128
1130_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1131{
1132  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1133                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
1134                                             (__v4sf) __B,
1135                                             -(__v4sf) __C),
1136                    (__v4sf)_mm_setzero_ps());
1137}
1138
1139static __inline__ __m256 __DEFAULT_FN_ATTRS256
1140_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1141{
1142  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1143                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1144                                                (__v8sf) __B,
1145                                                (__v8sf) __C),
1146                    (__v8sf) __A);
1147}
1148
1149static __inline__ __m256 __DEFAULT_FN_ATTRS256
1150_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1151{
1152  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1153                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1154                                                (__v8sf) __B,
1155                                                (__v8sf) __C),
1156                    (__v8sf) __C);
1157}
1158
1159static __inline__ __m256 __DEFAULT_FN_ATTRS256
1160_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1161{
1162  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1163                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1164                                                (__v8sf) __B,
1165                                                (__v8sf) __C),
1166                    (__v8sf)_mm256_setzero_ps());
1167}
1168
1169static __inline__ __m256 __DEFAULT_FN_ATTRS256
1170_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1171{
1172  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1173                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1174                                                (__v8sf) __B,
1175                                                -(__v8sf) __C),
1176                    (__v8sf) __A);
1177}
1178
1179static __inline__ __m256 __DEFAULT_FN_ATTRS256
1180_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1181{
1182  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1183                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1184                                                (__v8sf) __B,
1185                                                -(__v8sf) __C),
1186                    (__v8sf)_mm256_setzero_ps());
1187}
1188
1189static __inline__ __m256 __DEFAULT_FN_ATTRS256
1190_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1191{
1192  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1193                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1194                                                (__v8sf) __B,
1195                                                (__v8sf) __C),
1196                    (__v8sf) __C);
1197}
1198
1199static __inline__ __m256 __DEFAULT_FN_ATTRS256
1200_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1201{
1202  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1203                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1204                                                (__v8sf) __B,
1205                                                (__v8sf) __C),
1206                    (__v8sf)_mm256_setzero_ps());
1207}
1208
1209static __inline__ __m256 __DEFAULT_FN_ATTRS256
1210_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1211{
1212  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1213                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1214                                                (__v8sf) __B,
1215                                                -(__v8sf) __C),
1216                    (__v8sf)_mm256_setzero_ps());
1217}
1218
1219static __inline__ __m128d __DEFAULT_FN_ATTRS128
1220_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1221{
1222  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1223                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1224                                                (__v2df) __B,
1225                                                (__v2df) __C),
1226                    (__v2df) __A);
1227}
1228
1229static __inline__ __m128d __DEFAULT_FN_ATTRS128
1230_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1231{
1232  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1233                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1234                                                (__v2df) __B,
1235                                                (__v2df) __C),
1236                    (__v2df) __C);
1237}
1238
1239static __inline__ __m128d __DEFAULT_FN_ATTRS128
1240_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1241{
1242  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1243                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1244                                                (__v2df) __B,
1245                                                (__v2df) __C),
1246                    (__v2df)_mm_setzero_pd());
1247}
1248
1249static __inline__ __m128d __DEFAULT_FN_ATTRS128
1250_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1251{
1252  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1253                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1254                                                (__v2df) __B,
1255                                                -(__v2df) __C),
1256                    (__v2df) __A);
1257}
1258
1259static __inline__ __m128d __DEFAULT_FN_ATTRS128
1260_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1261{
1262  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1263                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1264                                                (__v2df) __B,
1265                                                -(__v2df) __C),
1266                    (__v2df)_mm_setzero_pd());
1267}
1268
1269static __inline__ __m256d __DEFAULT_FN_ATTRS256
1270_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1271{
1272  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1273                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1274                                                   (__v4df) __B,
1275                                                   (__v4df) __C),
1276                    (__v4df) __A);
1277}
1278
1279static __inline__ __m256d __DEFAULT_FN_ATTRS256
1280_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1281{
1282  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1283                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1284                                                   (__v4df) __B,
1285                                                   (__v4df) __C),
1286                    (__v4df) __C);
1287}
1288
1289static __inline__ __m256d __DEFAULT_FN_ATTRS256
1290_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1291{
1292  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1293                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1294                                                   (__v4df) __B,
1295                                                   (__v4df) __C),
1296                    (__v4df)_mm256_setzero_pd());
1297}
1298
1299static __inline__ __m256d __DEFAULT_FN_ATTRS256
1300_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1301{
1302  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1303                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1304                                                   (__v4df) __B,
1305                                                   -(__v4df) __C),
1306                    (__v4df) __A);
1307}
1308
1309static __inline__ __m256d __DEFAULT_FN_ATTRS256
1310_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1311{
1312  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1313                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1314                                                   (__v4df) __B,
1315                                                   -(__v4df) __C),
1316                    (__v4df)_mm256_setzero_pd());
1317}
1318
1319static __inline__ __m128 __DEFAULT_FN_ATTRS128
1320_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1321{
1322  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1323                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1324                                                (__v4sf) __B,
1325                                                (__v4sf) __C),
1326                    (__v4sf) __A);
1327}
1328
1329static __inline__ __m128 __DEFAULT_FN_ATTRS128
1330_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1331{
1332  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1333                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1334                                                (__v4sf) __B,
1335                                                (__v4sf) __C),
1336                    (__v4sf) __C);
1337}
1338
1339static __inline__ __m128 __DEFAULT_FN_ATTRS128
1340_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1341{
1342  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1343                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1344                                                (__v4sf) __B,
1345                                                (__v4sf) __C),
1346                    (__v4sf)_mm_setzero_ps());
1347}
1348
1349static __inline__ __m128 __DEFAULT_FN_ATTRS128
1350_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1351{
1352  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1353                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1354                                                (__v4sf) __B,
1355                                                -(__v4sf) __C),
1356                    (__v4sf) __A);
1357}
1358
1359static __inline__ __m128 __DEFAULT_FN_ATTRS128
1360_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1361{
1362  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1363                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1364                                                (__v4sf) __B,
1365                                                -(__v4sf) __C),
1366                    (__v4sf)_mm_setzero_ps());
1367}
1368
1369static __inline__ __m256 __DEFAULT_FN_ATTRS256
1370_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1371                         __m256 __C)
1372{
1373  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1374                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1375                                                   (__v8sf) __B,
1376                                                   (__v8sf) __C),
1377                    (__v8sf) __A);
1378}
1379
1380static __inline__ __m256 __DEFAULT_FN_ATTRS256
1381_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1382{
1383  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1384                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1385                                                   (__v8sf) __B,
1386                                                   (__v8sf) __C),
1387                    (__v8sf) __C);
1388}
1389
1390static __inline__ __m256 __DEFAULT_FN_ATTRS256
1391_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1392{
1393  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1394                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1395                                                   (__v8sf) __B,
1396                                                   (__v8sf) __C),
1397                    (__v8sf)_mm256_setzero_ps());
1398}
1399
1400static __inline__ __m256 __DEFAULT_FN_ATTRS256
1401_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1402{
1403  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1404                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1405                                                   (__v8sf) __B,
1406                                                   -(__v8sf) __C),
1407                    (__v8sf) __A);
1408}
1409
1410static __inline__ __m256 __DEFAULT_FN_ATTRS256
1411_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1412{
1413  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1414                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1415                                                   (__v8sf) __B,
1416                                                   -(__v8sf) __C),
1417                    (__v8sf)_mm256_setzero_ps());
1418}
1419
1420static __inline__ __m128d __DEFAULT_FN_ATTRS128
1421_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1422{
1423  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1424                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1425                                             (__v2df) __B,
1426                                             -(__v2df) __C),
1427                    (__v2df) __C);
1428}
1429
1430static __inline__ __m256d __DEFAULT_FN_ATTRS256
1431_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1432{
1433  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1434                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1435                                                (__v4df) __B,
1436                                                -(__v4df) __C),
1437                    (__v4df) __C);
1438}
1439
1440static __inline__ __m128 __DEFAULT_FN_ATTRS128
1441_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1442{
1443  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1444                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1445                                             (__v4sf) __B,
1446                                             -(__v4sf) __C),
1447                    (__v4sf) __C);
1448}
1449
1450static __inline__ __m256 __DEFAULT_FN_ATTRS256
1451_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1452{
1453  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1454                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1455                                                (__v8sf) __B,
1456                                                -(__v8sf) __C),
1457                    (__v8sf) __C);
1458}
1459
1460static __inline__ __m128d __DEFAULT_FN_ATTRS128
1461_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1462{
1463  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1464                    __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1465                                                (__v2df) __B,
1466                                                -(__v2df) __C),
1467                    (__v2df) __C);
1468}
1469
1470static __inline__ __m256d __DEFAULT_FN_ATTRS256
1471_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1472{
1473  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1474                    __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1475                                                   (__v4df) __B,
1476                                                   -(__v4df) __C),
1477                    (__v4df) __C);
1478}
1479
1480static __inline__ __m128 __DEFAULT_FN_ATTRS128
1481_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1482{
1483  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1484                    __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1485                                                (__v4sf) __B,
1486                                                -(__v4sf) __C),
1487                    (__v4sf) __C);
1488}
1489
1490static __inline__ __m256 __DEFAULT_FN_ATTRS256
1491_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1492{
1493  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1494                    __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1495                                                   (__v8sf) __B,
1496                                                   -(__v8sf) __C),
1497                    (__v8sf) __C);
1498}
1499
1500static __inline__ __m128d __DEFAULT_FN_ATTRS128
1501_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1502{
1503  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1504                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1505                                             -(__v2df) __B,
1506                                             (__v2df) __C),
1507                    (__v2df) __A);
1508}
1509
1510static __inline__ __m256d __DEFAULT_FN_ATTRS256
1511_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1512{
1513  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1514                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1515                                                -(__v4df) __B,
1516                                                (__v4df) __C),
1517                    (__v4df) __A);
1518}
1519
1520static __inline__ __m128 __DEFAULT_FN_ATTRS128
1521_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1522{
1523  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1524                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1525                                             -(__v4sf) __B,
1526                                             (__v4sf) __C),
1527                    (__v4sf) __A);
1528}
1529
1530static __inline__ __m256 __DEFAULT_FN_ATTRS256
1531_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1532{
1533  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1534                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1535                                                -(__v8sf) __B,
1536                                                (__v8sf) __C),
1537                    (__v8sf) __A);
1538}
1539
1540static __inline__ __m128d __DEFAULT_FN_ATTRS128
1541_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1542{
1543  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1544                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1545                                             -(__v2df) __B,
1546                                             -(__v2df) __C),
1547                    (__v2df) __A);
1548}
1549
1550static __inline__ __m128d __DEFAULT_FN_ATTRS128
1551_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1552{
1553  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1554                    __builtin_ia32_vfmaddpd ((__v2df) __A,
1555                                             -(__v2df) __B,
1556                                             -(__v2df) __C),
1557                    (__v2df) __C);
1558}
1559
1560static __inline__ __m256d __DEFAULT_FN_ATTRS256
1561_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1562{
1563  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1564                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1565                                                -(__v4df) __B,
1566                                                -(__v4df) __C),
1567                    (__v4df) __A);
1568}
1569
1570static __inline__ __m256d __DEFAULT_FN_ATTRS256
1571_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1572{
1573  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1574                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1575                                                -(__v4df) __B,
1576                                                -(__v4df) __C),
1577                    (__v4df) __C);
1578}
1579
1580static __inline__ __m128 __DEFAULT_FN_ATTRS128
1581_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1582{
1583  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1584                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1585                                             -(__v4sf) __B,
1586                                             -(__v4sf) __C),
1587                    (__v4sf) __A);
1588}
1589
1590static __inline__ __m128 __DEFAULT_FN_ATTRS128
1591_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1592{
1593  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1594                    __builtin_ia32_vfmaddps ((__v4sf) __A,
1595                                             -(__v4sf) __B,
1596                                             -(__v4sf) __C),
1597                    (__v4sf) __C);
1598}
1599
1600static __inline__ __m256 __DEFAULT_FN_ATTRS256
1601_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1602{
1603  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1604                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1605                                                -(__v8sf) __B,
1606                                                -(__v8sf) __C),
1607                    (__v8sf) __A);
1608}
1609
1610static __inline__ __m256 __DEFAULT_FN_ATTRS256
1611_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1612{
1613  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1614                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1615                                                -(__v8sf) __B,
1616                                                -(__v8sf) __C),
1617                    (__v8sf) __C);
1618}
1619
1620static __inline__ __m128d __DEFAULT_FN_ATTRS128
1621_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1622  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1623                                              (__v2df)_mm_add_pd(__A, __B),
1624                                              (__v2df)__W);
1625}
1626
1627static __inline__ __m128d __DEFAULT_FN_ATTRS128
1628_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1629  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1630                                              (__v2df)_mm_add_pd(__A, __B),
1631                                              (__v2df)_mm_setzero_pd());
1632}
1633
1634static __inline__ __m256d __DEFAULT_FN_ATTRS256
1635_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1636  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1637                                              (__v4df)_mm256_add_pd(__A, __B),
1638                                              (__v4df)__W);
1639}
1640
1641static __inline__ __m256d __DEFAULT_FN_ATTRS256
1642_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1643  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1644                                              (__v4df)_mm256_add_pd(__A, __B),
1645                                              (__v4df)_mm256_setzero_pd());
1646}
1647
1648static __inline__ __m128 __DEFAULT_FN_ATTRS128
1649_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1650  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1651                                             (__v4sf)_mm_add_ps(__A, __B),
1652                                             (__v4sf)__W);
1653}
1654
1655static __inline__ __m128 __DEFAULT_FN_ATTRS128
1656_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1657  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1658                                             (__v4sf)_mm_add_ps(__A, __B),
1659                                             (__v4sf)_mm_setzero_ps());
1660}
1661
1662static __inline__ __m256 __DEFAULT_FN_ATTRS256
1663_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1664  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1665                                             (__v8sf)_mm256_add_ps(__A, __B),
1666                                             (__v8sf)__W);
1667}
1668
1669static __inline__ __m256 __DEFAULT_FN_ATTRS256
1670_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1671  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1672                                             (__v8sf)_mm256_add_ps(__A, __B),
1673                                             (__v8sf)_mm256_setzero_ps());
1674}
1675
1676static __inline__ __m128i __DEFAULT_FN_ATTRS128
1677_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1678  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1679                (__v4si) __W,
1680                (__v4si) __A);
1681}
1682
1683static __inline__ __m256i __DEFAULT_FN_ATTRS256
1684_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1685  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1686                (__v8si) __W,
1687                (__v8si) __A);
1688}
1689
1690static __inline__ __m128d __DEFAULT_FN_ATTRS128
1691_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1692  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1693                 (__v2df) __W,
1694                 (__v2df) __A);
1695}
1696
1697static __inline__ __m256d __DEFAULT_FN_ATTRS256
1698_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1699  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1700                 (__v4df) __W,
1701                 (__v4df) __A);
1702}
1703
1704static __inline__ __m128 __DEFAULT_FN_ATTRS128
1705_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1706  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1707                (__v4sf) __W,
1708                (__v4sf) __A);
1709}
1710
1711static __inline__ __m256 __DEFAULT_FN_ATTRS256
1712_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1713  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1714                (__v8sf) __W,
1715                (__v8sf) __A);
1716}
1717
1718static __inline__ __m128i __DEFAULT_FN_ATTRS128
1719_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1720  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1721                (__v2di) __W,
1722                (__v2di) __A);
1723}
1724
1725static __inline__ __m256i __DEFAULT_FN_ATTRS256
1726_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1727  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1728                (__v4di) __W,
1729                (__v4di) __A);
1730}
1731
1732static __inline__ __m128d __DEFAULT_FN_ATTRS128
1733_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1734  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1735                  (__v2df) __W,
1736                  (__mmask8) __U);
1737}
1738
1739static __inline__ __m128d __DEFAULT_FN_ATTRS128
1740_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1741  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1742                  (__v2df)
1743                  _mm_setzero_pd (),
1744                  (__mmask8) __U);
1745}
1746
1747static __inline__ __m256d __DEFAULT_FN_ATTRS256
1748_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1749  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1750                  (__v4df) __W,
1751                  (__mmask8) __U);
1752}
1753
1754static __inline__ __m256d __DEFAULT_FN_ATTRS256
1755_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1756  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1757                  (__v4df)
1758                  _mm256_setzero_pd (),
1759                  (__mmask8) __U);
1760}
1761
1762static __inline__ __m128i __DEFAULT_FN_ATTRS128
1763_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1764  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1765                  (__v2di) __W,
1766                  (__mmask8) __U);
1767}
1768
1769static __inline__ __m128i __DEFAULT_FN_ATTRS128
1770_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1771  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1772                  (__v2di)
1773                  _mm_setzero_si128 (),
1774                  (__mmask8) __U);
1775}
1776
1777static __inline__ __m256i __DEFAULT_FN_ATTRS256
1778_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1779  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1780                  (__v4di) __W,
1781                  (__mmask8) __U);
1782}
1783
1784static __inline__ __m256i __DEFAULT_FN_ATTRS256
1785_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1786  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1787                  (__v4di)
1788                  _mm256_setzero_si256 (),
1789                  (__mmask8) __U);
1790}
1791
1792static __inline__ __m128 __DEFAULT_FN_ATTRS128
1793_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1794  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1795                 (__v4sf) __W,
1796                 (__mmask8) __U);
1797}
1798
1799static __inline__ __m128 __DEFAULT_FN_ATTRS128
1800_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1801  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1802                 (__v4sf)
1803                 _mm_setzero_ps (),
1804                 (__mmask8) __U);
1805}
1806
1807static __inline__ __m256 __DEFAULT_FN_ATTRS256
1808_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1809  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1810                 (__v8sf) __W,
1811                 (__mmask8) __U);
1812}
1813
1814static __inline__ __m256 __DEFAULT_FN_ATTRS256
1815_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1816  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1817                 (__v8sf)
1818                 _mm256_setzero_ps (),
1819                 (__mmask8) __U);
1820}
1821
1822static __inline__ __m128i __DEFAULT_FN_ATTRS128
1823_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1824  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1825                  (__v4si) __W,
1826                  (__mmask8) __U);
1827}
1828
1829static __inline__ __m128i __DEFAULT_FN_ATTRS128
1830_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1831  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1832                  (__v4si)
1833                  _mm_setzero_si128 (),
1834                  (__mmask8) __U);
1835}
1836
1837static __inline__ __m256i __DEFAULT_FN_ATTRS256
1838_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1839  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1840                  (__v8si) __W,
1841                  (__mmask8) __U);
1842}
1843
1844static __inline__ __m256i __DEFAULT_FN_ATTRS256
1845_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1846  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1847                  (__v8si)
1848                  _mm256_setzero_si256 (),
1849                  (__mmask8) __U);
1850}
1851
1852static __inline__ void __DEFAULT_FN_ATTRS128
1853_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1854  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1855            (__v2df) __A,
1856            (__mmask8) __U);
1857}
1858
1859static __inline__ void __DEFAULT_FN_ATTRS256
1860_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1861  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1862            (__v4df) __A,
1863            (__mmask8) __U);
1864}
1865
1866static __inline__ void __DEFAULT_FN_ATTRS128
1867_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1868  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1869            (__v2di) __A,
1870            (__mmask8) __U);
1871}
1872
1873static __inline__ void __DEFAULT_FN_ATTRS256
1874_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1875  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1876            (__v4di) __A,
1877            (__mmask8) __U);
1878}
1879
1880static __inline__ void __DEFAULT_FN_ATTRS128
1881_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1882  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1883            (__v4sf) __A,
1884            (__mmask8) __U);
1885}
1886
1887static __inline__ void __DEFAULT_FN_ATTRS256
1888_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1889  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1890            (__v8sf) __A,
1891            (__mmask8) __U);
1892}
1893
1894static __inline__ void __DEFAULT_FN_ATTRS128
1895_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1896  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1897            (__v4si) __A,
1898            (__mmask8) __U);
1899}
1900
1901static __inline__ void __DEFAULT_FN_ATTRS256
1902_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1903  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1904            (__v8si) __A,
1905            (__mmask8) __U);
1906}
1907
1908static __inline__ __m128d __DEFAULT_FN_ATTRS128
1909_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1910  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1911                                              (__v2df)_mm_cvtepi32_pd(__A),
1912                                              (__v2df)__W);
1913}
1914
1915static __inline__ __m128d __DEFAULT_FN_ATTRS128
1916_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1917  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1918                                              (__v2df)_mm_cvtepi32_pd(__A),
1919                                              (__v2df)_mm_setzero_pd());
1920}
1921
1922static __inline__ __m256d __DEFAULT_FN_ATTRS256
1923_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1924  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1925                                              (__v4df)_mm256_cvtepi32_pd(__A),
1926                                              (__v4df)__W);
1927}
1928
1929static __inline__ __m256d __DEFAULT_FN_ATTRS256
1930_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1931  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1932                                              (__v4df)_mm256_cvtepi32_pd(__A),
1933                                              (__v4df)_mm256_setzero_pd());
1934}
1935
1936static __inline__ __m128 __DEFAULT_FN_ATTRS128
1937_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1938  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1939                                             (__v4sf)_mm_cvtepi32_ps(__A),
1940                                             (__v4sf)__W);
1941}
1942
1943static __inline__ __m128 __DEFAULT_FN_ATTRS128
1944_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1945  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1946                                             (__v4sf)_mm_cvtepi32_ps(__A),
1947                                             (__v4sf)_mm_setzero_ps());
1948}
1949
1950static __inline__ __m256 __DEFAULT_FN_ATTRS256
1951_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1952  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1953                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1954                                             (__v8sf)__W);
1955}
1956
1957static __inline__ __m256 __DEFAULT_FN_ATTRS256
1958_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1959  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1960                                             (__v8sf)_mm256_cvtepi32_ps(__A),
1961                                             (__v8sf)_mm256_setzero_ps());
1962}
1963
1964static __inline__ __m128i __DEFAULT_FN_ATTRS128
1965_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1966  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1967                (__v4si) __W,
1968                (__mmask8) __U);
1969}
1970
1971static __inline__ __m128i __DEFAULT_FN_ATTRS128
1972_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1973  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1974                (__v4si)
1975                _mm_setzero_si128 (),
1976                (__mmask8) __U);
1977}
1978
1979static __inline__ __m128i __DEFAULT_FN_ATTRS256
1980_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1981  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1982                                             (__v4si)_mm256_cvtpd_epi32(__A),
1983                                             (__v4si)__W);
1984}
1985
1986static __inline__ __m128i __DEFAULT_FN_ATTRS256
1987_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1988  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1989                                             (__v4si)_mm256_cvtpd_epi32(__A),
1990                                             (__v4si)_mm_setzero_si128());
1991}
1992
1993static __inline__ __m128 __DEFAULT_FN_ATTRS128
1994_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1995  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1996            (__v4sf) __W,
1997            (__mmask8) __U);
1998}
1999
2000static __inline__ __m128 __DEFAULT_FN_ATTRS128
2001_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2002  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2003            (__v4sf)
2004            _mm_setzero_ps (),
2005            (__mmask8) __U);
2006}
2007
2008static __inline__ __m128 __DEFAULT_FN_ATTRS256
2009_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2010  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2011                                             (__v4sf)_mm256_cvtpd_ps(__A),
2012                                             (__v4sf)__W);
2013}
2014
2015static __inline__ __m128 __DEFAULT_FN_ATTRS256
2016_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2017  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2018                                             (__v4sf)_mm256_cvtpd_ps(__A),
2019                                             (__v4sf)_mm_setzero_ps());
2020}
2021
2022static __inline__ __m128i __DEFAULT_FN_ATTRS128
2023_mm_cvtpd_epu32 (__m128d __A) {
2024  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2025                 (__v4si)
2026                 _mm_setzero_si128 (),
2027                 (__mmask8) -1);
2028}
2029
2030static __inline__ __m128i __DEFAULT_FN_ATTRS128
2031_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2032  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2033                 (__v4si) __W,
2034                 (__mmask8) __U);
2035}
2036
2037static __inline__ __m128i __DEFAULT_FN_ATTRS128
2038_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2039  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2040                 (__v4si)
2041                 _mm_setzero_si128 (),
2042                 (__mmask8) __U);
2043}
2044
2045static __inline__ __m128i __DEFAULT_FN_ATTRS256
2046_mm256_cvtpd_epu32 (__m256d __A) {
2047  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2048                 (__v4si)
2049                 _mm_setzero_si128 (),
2050                 (__mmask8) -1);
2051}
2052
2053static __inline__ __m128i __DEFAULT_FN_ATTRS256
2054_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2055  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2056                 (__v4si) __W,
2057                 (__mmask8) __U);
2058}
2059
2060static __inline__ __m128i __DEFAULT_FN_ATTRS256
2061_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2062  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2063                 (__v4si)
2064                 _mm_setzero_si128 (),
2065                 (__mmask8) __U);
2066}
2067
2068static __inline__ __m128i __DEFAULT_FN_ATTRS128
2069_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2070  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2071                                             (__v4si)_mm_cvtps_epi32(__A),
2072                                             (__v4si)__W);
2073}
2074
2075static __inline__ __m128i __DEFAULT_FN_ATTRS128
2076_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2077  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2078                                             (__v4si)_mm_cvtps_epi32(__A),
2079                                             (__v4si)_mm_setzero_si128());
2080}
2081
2082static __inline__ __m256i __DEFAULT_FN_ATTRS256
2083_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2084  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2085                                             (__v8si)_mm256_cvtps_epi32(__A),
2086                                             (__v8si)__W);
2087}
2088
2089static __inline__ __m256i __DEFAULT_FN_ATTRS256
2090_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2091  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2092                                             (__v8si)_mm256_cvtps_epi32(__A),
2093                                             (__v8si)_mm256_setzero_si256());
2094}
2095
2096static __inline__ __m128d __DEFAULT_FN_ATTRS128
2097_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2098  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2099                                              (__v2df)_mm_cvtps_pd(__A),
2100                                              (__v2df)__W);
2101}
2102
2103static __inline__ __m128d __DEFAULT_FN_ATTRS128
2104_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2105  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2106                                              (__v2df)_mm_cvtps_pd(__A),
2107                                              (__v2df)_mm_setzero_pd());
2108}
2109
2110static __inline__ __m256d __DEFAULT_FN_ATTRS256
2111_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2112  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2113                                              (__v4df)_mm256_cvtps_pd(__A),
2114                                              (__v4df)__W);
2115}
2116
2117static __inline__ __m256d __DEFAULT_FN_ATTRS256
2118_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2119  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2120                                              (__v4df)_mm256_cvtps_pd(__A),
2121                                              (__v4df)_mm256_setzero_pd());
2122}
2123
2124static __inline__ __m128i __DEFAULT_FN_ATTRS128
2125_mm_cvtps_epu32 (__m128 __A) {
2126  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2127                 (__v4si)
2128                 _mm_setzero_si128 (),
2129                 (__mmask8) -1);
2130}
2131
2132static __inline__ __m128i __DEFAULT_FN_ATTRS128
2133_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2134  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2135                 (__v4si) __W,
2136                 (__mmask8) __U);
2137}
2138
2139static __inline__ __m128i __DEFAULT_FN_ATTRS128
2140_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2141  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2142                 (__v4si)
2143                 _mm_setzero_si128 (),
2144                 (__mmask8) __U);
2145}
2146
2147static __inline__ __m256i __DEFAULT_FN_ATTRS256
2148_mm256_cvtps_epu32 (__m256 __A) {
2149  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2150                 (__v8si)
2151                 _mm256_setzero_si256 (),
2152                 (__mmask8) -1);
2153}
2154
2155static __inline__ __m256i __DEFAULT_FN_ATTRS256
2156_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2157  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2158                 (__v8si) __W,
2159                 (__mmask8) __U);
2160}
2161
2162static __inline__ __m256i __DEFAULT_FN_ATTRS256
2163_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2164  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2165                 (__v8si)
2166                 _mm256_setzero_si256 (),
2167                 (__mmask8) __U);
2168}
2169
2170static __inline__ __m128i __DEFAULT_FN_ATTRS128
2171_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2172  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2173                 (__v4si) __W,
2174                 (__mmask8) __U);
2175}
2176
2177static __inline__ __m128i __DEFAULT_FN_ATTRS128
2178_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2179  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2180                 (__v4si)
2181                 _mm_setzero_si128 (),
2182                 (__mmask8) __U);
2183}
2184
2185static __inline__ __m128i __DEFAULT_FN_ATTRS256
2186_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2187  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2188                                             (__v4si)_mm256_cvttpd_epi32(__A),
2189                                             (__v4si)__W);
2190}
2191
2192static __inline__ __m128i __DEFAULT_FN_ATTRS256
2193_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2194  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2195                                             (__v4si)_mm256_cvttpd_epi32(__A),
2196                                             (__v4si)_mm_setzero_si128());
2197}
2198
2199static __inline__ __m128i __DEFAULT_FN_ATTRS128
2200_mm_cvttpd_epu32 (__m128d __A) {
2201  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2202                  (__v4si)
2203                  _mm_setzero_si128 (),
2204                  (__mmask8) -1);
2205}
2206
2207static __inline__ __m128i __DEFAULT_FN_ATTRS128
2208_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2209  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2210                  (__v4si) __W,
2211                  (__mmask8) __U);
2212}
2213
2214static __inline__ __m128i __DEFAULT_FN_ATTRS128
2215_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2216  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2217                  (__v4si)
2218                  _mm_setzero_si128 (),
2219                  (__mmask8) __U);
2220}
2221
2222static __inline__ __m128i __DEFAULT_FN_ATTRS256
2223_mm256_cvttpd_epu32 (__m256d __A) {
2224  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2225                  (__v4si)
2226                  _mm_setzero_si128 (),
2227                  (__mmask8) -1);
2228}
2229
2230static __inline__ __m128i __DEFAULT_FN_ATTRS256
2231_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2232  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2233                  (__v4si) __W,
2234                  (__mmask8) __U);
2235}
2236
2237static __inline__ __m128i __DEFAULT_FN_ATTRS256
2238_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2239  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2240                  (__v4si)
2241                  _mm_setzero_si128 (),
2242                  (__mmask8) __U);
2243}
2244
2245static __inline__ __m128i __DEFAULT_FN_ATTRS128
2246_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2247  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2248                                             (__v4si)_mm_cvttps_epi32(__A),
2249                                             (__v4si)__W);
2250}
2251
2252static __inline__ __m128i __DEFAULT_FN_ATTRS128
2253_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2254  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2255                                             (__v4si)_mm_cvttps_epi32(__A),
2256                                             (__v4si)_mm_setzero_si128());
2257}
2258
2259static __inline__ __m256i __DEFAULT_FN_ATTRS256
2260_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2261  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2262                                             (__v8si)_mm256_cvttps_epi32(__A),
2263                                             (__v8si)__W);
2264}
2265
2266static __inline__ __m256i __DEFAULT_FN_ATTRS256
2267_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2268  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2269                                             (__v8si)_mm256_cvttps_epi32(__A),
2270                                             (__v8si)_mm256_setzero_si256());
2271}
2272
2273static __inline__ __m128i __DEFAULT_FN_ATTRS128
2274_mm_cvttps_epu32 (__m128 __A) {
2275  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2276                  (__v4si)
2277                  _mm_setzero_si128 (),
2278                  (__mmask8) -1);
2279}
2280
2281static __inline__ __m128i __DEFAULT_FN_ATTRS128
2282_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2283  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2284                  (__v4si) __W,
2285                  (__mmask8) __U);
2286}
2287
2288static __inline__ __m128i __DEFAULT_FN_ATTRS128
2289_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2290  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2291                  (__v4si)
2292                  _mm_setzero_si128 (),
2293                  (__mmask8) __U);
2294}
2295
2296static __inline__ __m256i __DEFAULT_FN_ATTRS256
2297_mm256_cvttps_epu32 (__m256 __A) {
2298  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2299                  (__v8si)
2300                  _mm256_setzero_si256 (),
2301                  (__mmask8) -1);
2302}
2303
2304static __inline__ __m256i __DEFAULT_FN_ATTRS256
2305_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2306  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2307                  (__v8si) __W,
2308                  (__mmask8) __U);
2309}
2310
2311static __inline__ __m256i __DEFAULT_FN_ATTRS256
2312_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2313  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2314                  (__v8si)
2315                  _mm256_setzero_si256 (),
2316                  (__mmask8) __U);
2317}
2318
2319static __inline__ __m128d __DEFAULT_FN_ATTRS128
2320_mm_cvtepu32_pd (__m128i __A) {
2321  return (__m128d) __builtin_convertvector(
2322      __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2323}
2324
2325static __inline__ __m128d __DEFAULT_FN_ATTRS128
2326_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2327  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2328                                              (__v2df)_mm_cvtepu32_pd(__A),
2329                                              (__v2df)__W);
2330}
2331
2332static __inline__ __m128d __DEFAULT_FN_ATTRS128
2333_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2334  return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2335                                              (__v2df)_mm_cvtepu32_pd(__A),
2336                                              (__v2df)_mm_setzero_pd());
2337}
2338
2339static __inline__ __m256d __DEFAULT_FN_ATTRS256
2340_mm256_cvtepu32_pd (__m128i __A) {
2341  return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2342}
2343
2344static __inline__ __m256d __DEFAULT_FN_ATTRS256
2345_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2346  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2347                                              (__v4df)_mm256_cvtepu32_pd(__A),
2348                                              (__v4df)__W);
2349}
2350
2351static __inline__ __m256d __DEFAULT_FN_ATTRS256
2352_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2353  return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2354                                              (__v4df)_mm256_cvtepu32_pd(__A),
2355                                              (__v4df)_mm256_setzero_pd());
2356}
2357
2358static __inline__ __m128 __DEFAULT_FN_ATTRS128
2359_mm_cvtepu32_ps (__m128i __A) {
2360  return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2361}
2362
2363static __inline__ __m128 __DEFAULT_FN_ATTRS128
2364_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2365  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2366                                             (__v4sf)_mm_cvtepu32_ps(__A),
2367                                             (__v4sf)__W);
2368}
2369
2370static __inline__ __m128 __DEFAULT_FN_ATTRS128
2371_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2372  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2373                                             (__v4sf)_mm_cvtepu32_ps(__A),
2374                                             (__v4sf)_mm_setzero_ps());
2375}
2376
2377static __inline__ __m256 __DEFAULT_FN_ATTRS256
2378_mm256_cvtepu32_ps (__m256i __A) {
2379  return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2380}
2381
2382static __inline__ __m256 __DEFAULT_FN_ATTRS256
2383_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2384  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2385                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2386                                             (__v8sf)__W);
2387}
2388
2389static __inline__ __m256 __DEFAULT_FN_ATTRS256
2390_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2391  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2392                                             (__v8sf)_mm256_cvtepu32_ps(__A),
2393                                             (__v8sf)_mm256_setzero_ps());
2394}
2395
2396static __inline__ __m128d __DEFAULT_FN_ATTRS128
2397_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2398  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2399                                              (__v2df)_mm_div_pd(__A, __B),
2400                                              (__v2df)__W);
2401}
2402
2403static __inline__ __m128d __DEFAULT_FN_ATTRS128
2404_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2405  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2406                                              (__v2df)_mm_div_pd(__A, __B),
2407                                              (__v2df)_mm_setzero_pd());
2408}
2409
2410static __inline__ __m256d __DEFAULT_FN_ATTRS256
2411_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2412  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2413                                              (__v4df)_mm256_div_pd(__A, __B),
2414                                              (__v4df)__W);
2415}
2416
2417static __inline__ __m256d __DEFAULT_FN_ATTRS256
2418_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2419  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2420                                              (__v4df)_mm256_div_pd(__A, __B),
2421                                              (__v4df)_mm256_setzero_pd());
2422}
2423
2424static __inline__ __m128 __DEFAULT_FN_ATTRS128
2425_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2426  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2427                                             (__v4sf)_mm_div_ps(__A, __B),
2428                                             (__v4sf)__W);
2429}
2430
2431static __inline__ __m128 __DEFAULT_FN_ATTRS128
2432_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2433  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2434                                             (__v4sf)_mm_div_ps(__A, __B),
2435                                             (__v4sf)_mm_setzero_ps());
2436}
2437
2438static __inline__ __m256 __DEFAULT_FN_ATTRS256
2439_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2440  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2441                                             (__v8sf)_mm256_div_ps(__A, __B),
2442                                             (__v8sf)__W);
2443}
2444
2445static __inline__ __m256 __DEFAULT_FN_ATTRS256
2446_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2447  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2448                                             (__v8sf)_mm256_div_ps(__A, __B),
2449                                             (__v8sf)_mm256_setzero_ps());
2450}
2451
2452static __inline__ __m128d __DEFAULT_FN_ATTRS128
2453_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2454  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2455                (__v2df) __W,
2456                (__mmask8) __U);
2457}
2458
2459static __inline__ __m128d __DEFAULT_FN_ATTRS128
2460_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2461  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2462                 (__v2df)
2463                 _mm_setzero_pd (),
2464                 (__mmask8) __U);
2465}
2466
2467static __inline__ __m256d __DEFAULT_FN_ATTRS256
2468_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2469  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2470                (__v4df) __W,
2471                (__mmask8) __U);
2472}
2473
2474static __inline__ __m256d __DEFAULT_FN_ATTRS256
2475_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2476  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2477                 (__v4df)
2478                 _mm256_setzero_pd (),
2479                 (__mmask8) __U);
2480}
2481
2482static __inline__ __m128i __DEFAULT_FN_ATTRS128
2483_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2484  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2485                (__v2di) __W,
2486                (__mmask8) __U);
2487}
2488
2489static __inline__ __m128i __DEFAULT_FN_ATTRS128
2490_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2491  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2492                 (__v2di)
2493                 _mm_setzero_si128 (),
2494                 (__mmask8) __U);
2495}
2496
2497static __inline__ __m256i __DEFAULT_FN_ATTRS256
2498_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2499  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2500                (__v4di) __W,
2501                (__mmask8) __U);
2502}
2503
2504static __inline__ __m256i __DEFAULT_FN_ATTRS256
2505_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2506  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2507                 (__v4di)
2508                 _mm256_setzero_si256 (),
2509                 (__mmask8) __U);
2510}
2511
2512static __inline__ __m128d __DEFAULT_FN_ATTRS128
2513_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2514  return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2515              (__v2df) __W,
2516              (__mmask8)
2517              __U);
2518}
2519
2520static __inline__ __m128d __DEFAULT_FN_ATTRS128
2521_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2522  return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2523               (__v2df)
2524               _mm_setzero_pd (),
2525               (__mmask8)
2526               __U);
2527}
2528
2529static __inline__ __m256d __DEFAULT_FN_ATTRS256
2530_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2531  return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2532              (__v4df) __W,
2533              (__mmask8)
2534              __U);
2535}
2536
2537static __inline__ __m256d __DEFAULT_FN_ATTRS256
2538_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2539  return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2540               (__v4df)
2541               _mm256_setzero_pd (),
2542               (__mmask8)
2543               __U);
2544}
2545
2546static __inline__ __m128i __DEFAULT_FN_ATTRS128
2547_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2548  return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2549              (__v2di) __W,
2550              (__mmask8)
2551              __U);
2552}
2553
2554static __inline__ __m128i __DEFAULT_FN_ATTRS128
2555_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2556  return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2557               (__v2di)
2558               _mm_setzero_si128 (),
2559               (__mmask8)
2560               __U);
2561}
2562
2563static __inline__ __m256i __DEFAULT_FN_ATTRS256
2564_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2565             void const *__P) {
2566  return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2567              (__v4di) __W,
2568              (__mmask8)
2569              __U);
2570}
2571
2572static __inline__ __m256i __DEFAULT_FN_ATTRS256
2573_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2574  return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2575               (__v4di)
2576               _mm256_setzero_si256 (),
2577               (__mmask8)
2578               __U);
2579}
2580
2581static __inline__ __m128 __DEFAULT_FN_ATTRS128
2582_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2583  return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2584                   (__v4sf) __W,
2585                   (__mmask8) __U);
2586}
2587
2588static __inline__ __m128 __DEFAULT_FN_ATTRS128
2589_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2590  return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2591              (__v4sf)
2592              _mm_setzero_ps (),
2593              (__mmask8)
2594              __U);
2595}
2596
2597static __inline__ __m256 __DEFAULT_FN_ATTRS256
2598_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2599  return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2600                   (__v8sf) __W,
2601                   (__mmask8) __U);
2602}
2603
2604static __inline__ __m256 __DEFAULT_FN_ATTRS256
2605_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2606  return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2607              (__v8sf)
2608              _mm256_setzero_ps (),
2609              (__mmask8)
2610              __U);
2611}
2612
2613static __inline__ __m128i __DEFAULT_FN_ATTRS128
2614_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2615  return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2616              (__v4si) __W,
2617              (__mmask8)
2618              __U);
2619}
2620
2621static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2623  return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2624               (__v4si)
2625               _mm_setzero_si128 (),
2626               (__mmask8)     __U);
2627}
2628
2629static __inline__ __m256i __DEFAULT_FN_ATTRS256
2630_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2631             void const *__P) {
2632  return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2633              (__v8si) __W,
2634              (__mmask8)
2635              __U);
2636}
2637
2638static __inline__ __m256i __DEFAULT_FN_ATTRS256
2639_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2640  return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2641               (__v8si)
2642               _mm256_setzero_si256 (),
2643               (__mmask8)
2644               __U);
2645}
2646
2647static __inline__ __m128 __DEFAULT_FN_ATTRS128
2648_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2649  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2650               (__v4sf) __W,
2651               (__mmask8) __U);
2652}
2653
2654static __inline__ __m128 __DEFAULT_FN_ATTRS128
2655_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2656  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2657                (__v4sf)
2658                _mm_setzero_ps (),
2659                (__mmask8) __U);
2660}
2661
2662static __inline__ __m256 __DEFAULT_FN_ATTRS256
2663_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2664  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2665               (__v8sf) __W,
2666               (__mmask8) __U);
2667}
2668
2669static __inline__ __m256 __DEFAULT_FN_ATTRS256
2670_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2671  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2672                (__v8sf)
2673                _mm256_setzero_ps (),
2674                (__mmask8) __U);
2675}
2676
2677static __inline__ __m128i __DEFAULT_FN_ATTRS128
2678_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2679  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2680                (__v4si) __W,
2681                (__mmask8) __U);
2682}
2683
2684static __inline__ __m128i __DEFAULT_FN_ATTRS128
2685_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2686  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2687                 (__v4si)
2688                 _mm_setzero_si128 (),
2689                 (__mmask8) __U);
2690}
2691
2692static __inline__ __m256i __DEFAULT_FN_ATTRS256
2693_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2694  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2695                (__v8si) __W,
2696                (__mmask8) __U);
2697}
2698
2699static __inline__ __m256i __DEFAULT_FN_ATTRS256
2700_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2701  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2702                 (__v8si)
2703                 _mm256_setzero_si256 (),
2704                 (__mmask8) __U);
2705}
2706
2707static __inline__ __m128d __DEFAULT_FN_ATTRS128
2708_mm_getexp_pd (__m128d __A) {
2709  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2710                (__v2df)
2711                _mm_setzero_pd (),
2712                (__mmask8) -1);
2713}
2714
2715static __inline__ __m128d __DEFAULT_FN_ATTRS128
2716_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2717  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2718                (__v2df) __W,
2719                (__mmask8) __U);
2720}
2721
2722static __inline__ __m128d __DEFAULT_FN_ATTRS128
2723_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2724  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2725                (__v2df)
2726                _mm_setzero_pd (),
2727                (__mmask8) __U);
2728}
2729
2730static __inline__ __m256d __DEFAULT_FN_ATTRS256
2731_mm256_getexp_pd (__m256d __A) {
2732  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2733                (__v4df)
2734                _mm256_setzero_pd (),
2735                (__mmask8) -1);
2736}
2737
2738static __inline__ __m256d __DEFAULT_FN_ATTRS256
2739_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2740  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2741                (__v4df) __W,
2742                (__mmask8) __U);
2743}
2744
2745static __inline__ __m256d __DEFAULT_FN_ATTRS256
2746_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2747  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2748                (__v4df)
2749                _mm256_setzero_pd (),
2750                (__mmask8) __U);
2751}
2752
2753static __inline__ __m128 __DEFAULT_FN_ATTRS128
2754_mm_getexp_ps (__m128 __A) {
2755  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2756               (__v4sf)
2757               _mm_setzero_ps (),
2758               (__mmask8) -1);
2759}
2760
2761static __inline__ __m128 __DEFAULT_FN_ATTRS128
2762_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2763  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2764               (__v4sf) __W,
2765               (__mmask8) __U);
2766}
2767
2768static __inline__ __m128 __DEFAULT_FN_ATTRS128
2769_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2770  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2771               (__v4sf)
2772               _mm_setzero_ps (),
2773               (__mmask8) __U);
2774}
2775
2776static __inline__ __m256 __DEFAULT_FN_ATTRS256
2777_mm256_getexp_ps (__m256 __A) {
2778  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2779               (__v8sf)
2780               _mm256_setzero_ps (),
2781               (__mmask8) -1);
2782}
2783
2784static __inline__ __m256 __DEFAULT_FN_ATTRS256
2785_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2786  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2787               (__v8sf) __W,
2788               (__mmask8) __U);
2789}
2790
2791static __inline__ __m256 __DEFAULT_FN_ATTRS256
2792_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2793  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2794               (__v8sf)
2795               _mm256_setzero_ps (),
2796               (__mmask8) __U);
2797}
2798
2799static __inline__ __m128d __DEFAULT_FN_ATTRS128
2800_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2801  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2802                                              (__v2df)_mm_max_pd(__A, __B),
2803                                              (__v2df)__W);
2804}
2805
2806static __inline__ __m128d __DEFAULT_FN_ATTRS128
2807_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2808  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2809                                              (__v2df)_mm_max_pd(__A, __B),
2810                                              (__v2df)_mm_setzero_pd());
2811}
2812
2813static __inline__ __m256d __DEFAULT_FN_ATTRS256
2814_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2815  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2816                                              (__v4df)_mm256_max_pd(__A, __B),
2817                                              (__v4df)__W);
2818}
2819
2820static __inline__ __m256d __DEFAULT_FN_ATTRS256
2821_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2822  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2823                                              (__v4df)_mm256_max_pd(__A, __B),
2824                                              (__v4df)_mm256_setzero_pd());
2825}
2826
2827static __inline__ __m128 __DEFAULT_FN_ATTRS128
2828_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2829  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2830                                             (__v4sf)_mm_max_ps(__A, __B),
2831                                             (__v4sf)__W);
2832}
2833
2834static __inline__ __m128 __DEFAULT_FN_ATTRS128
2835_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2836  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2837                                             (__v4sf)_mm_max_ps(__A, __B),
2838                                             (__v4sf)_mm_setzero_ps());
2839}
2840
2841static __inline__ __m256 __DEFAULT_FN_ATTRS256
2842_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2843  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2844                                             (__v8sf)_mm256_max_ps(__A, __B),
2845                                             (__v8sf)__W);
2846}
2847
2848static __inline__ __m256 __DEFAULT_FN_ATTRS256
2849_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2850  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2851                                             (__v8sf)_mm256_max_ps(__A, __B),
2852                                             (__v8sf)_mm256_setzero_ps());
2853}
2854
2855static __inline__ __m128d __DEFAULT_FN_ATTRS128
2856_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2857  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2858                                              (__v2df)_mm_min_pd(__A, __B),
2859                                              (__v2df)__W);
2860}
2861
2862static __inline__ __m128d __DEFAULT_FN_ATTRS128
2863_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2864  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2865                                              (__v2df)_mm_min_pd(__A, __B),
2866                                              (__v2df)_mm_setzero_pd());
2867}
2868
2869static __inline__ __m256d __DEFAULT_FN_ATTRS256
2870_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2871  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2872                                              (__v4df)_mm256_min_pd(__A, __B),
2873                                              (__v4df)__W);
2874}
2875
2876static __inline__ __m256d __DEFAULT_FN_ATTRS256
2877_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2878  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2879                                              (__v4df)_mm256_min_pd(__A, __B),
2880                                              (__v4df)_mm256_setzero_pd());
2881}
2882
2883static __inline__ __m128 __DEFAULT_FN_ATTRS128
2884_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2885  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2886                                             (__v4sf)_mm_min_ps(__A, __B),
2887                                             (__v4sf)__W);
2888}
2889
2890static __inline__ __m128 __DEFAULT_FN_ATTRS128
2891_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2892  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2893                                             (__v4sf)_mm_min_ps(__A, __B),
2894                                             (__v4sf)_mm_setzero_ps());
2895}
2896
2897static __inline__ __m256 __DEFAULT_FN_ATTRS256
2898_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2899  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2900                                             (__v8sf)_mm256_min_ps(__A, __B),
2901                                             (__v8sf)__W);
2902}
2903
2904static __inline__ __m256 __DEFAULT_FN_ATTRS256
2905_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2906  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2907                                             (__v8sf)_mm256_min_ps(__A, __B),
2908                                             (__v8sf)_mm256_setzero_ps());
2909}
2910
2911static __inline__ __m128d __DEFAULT_FN_ATTRS128
2912_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2913  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2914                                              (__v2df)_mm_mul_pd(__A, __B),
2915                                              (__v2df)__W);
2916}
2917
2918static __inline__ __m128d __DEFAULT_FN_ATTRS128
2919_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2920  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2921                                              (__v2df)_mm_mul_pd(__A, __B),
2922                                              (__v2df)_mm_setzero_pd());
2923}
2924
2925static __inline__ __m256d __DEFAULT_FN_ATTRS256
2926_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2927  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2928                                              (__v4df)_mm256_mul_pd(__A, __B),
2929                                              (__v4df)__W);
2930}
2931
2932static __inline__ __m256d __DEFAULT_FN_ATTRS256
2933_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2934  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2935                                              (__v4df)_mm256_mul_pd(__A, __B),
2936                                              (__v4df)_mm256_setzero_pd());
2937}
2938
2939static __inline__ __m128 __DEFAULT_FN_ATTRS128
2940_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2941  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2942                                             (__v4sf)_mm_mul_ps(__A, __B),
2943                                             (__v4sf)__W);
2944}
2945
2946static __inline__ __m128 __DEFAULT_FN_ATTRS128
2947_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2948  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2949                                             (__v4sf)_mm_mul_ps(__A, __B),
2950                                             (__v4sf)_mm_setzero_ps());
2951}
2952
2953static __inline__ __m256 __DEFAULT_FN_ATTRS256
2954_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2955  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2956                                             (__v8sf)_mm256_mul_ps(__A, __B),
2957                                             (__v8sf)__W);
2958}
2959
2960static __inline__ __m256 __DEFAULT_FN_ATTRS256
2961_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2962  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2963                                             (__v8sf)_mm256_mul_ps(__A, __B),
2964                                             (__v8sf)_mm256_setzero_ps());
2965}
2966
2967static __inline__ __m128i __DEFAULT_FN_ATTRS128
2968_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2969  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2970                                             (__v4si)_mm_abs_epi32(__A),
2971                                             (__v4si)__W);
2972}
2973
2974static __inline__ __m128i __DEFAULT_FN_ATTRS128
2975_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2976  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2977                                             (__v4si)_mm_abs_epi32(__A),
2978                                             (__v4si)_mm_setzero_si128());
2979}
2980
2981static __inline__ __m256i __DEFAULT_FN_ATTRS256
2982_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2983  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2984                                             (__v8si)_mm256_abs_epi32(__A),
2985                                             (__v8si)__W);
2986}
2987
2988static __inline__ __m256i __DEFAULT_FN_ATTRS256
2989_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2990  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2991                                             (__v8si)_mm256_abs_epi32(__A),
2992                                             (__v8si)_mm256_setzero_si256());
2993}
2994
2995static __inline__ __m128i __DEFAULT_FN_ATTRS128
2996_mm_abs_epi64 (__m128i __A) {
2997  return (__m128i)__builtin_elementwise_abs((__v2di)__A);
2998}
2999
3000static __inline__ __m128i __DEFAULT_FN_ATTRS128
3001_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3002  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3003                                             (__v2di)_mm_abs_epi64(__A),
3004                                             (__v2di)__W);
3005}
3006
3007static __inline__ __m128i __DEFAULT_FN_ATTRS128
3008_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3009  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3010                                             (__v2di)_mm_abs_epi64(__A),
3011                                             (__v2di)_mm_setzero_si128());
3012}
3013
3014static __inline__ __m256i __DEFAULT_FN_ATTRS256
3015_mm256_abs_epi64 (__m256i __A) {
3016  return (__m256i)__builtin_elementwise_abs((__v4di)__A);
3017}
3018
3019static __inline__ __m256i __DEFAULT_FN_ATTRS256
3020_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3021  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3022                                             (__v4di)_mm256_abs_epi64(__A),
3023                                             (__v4di)__W);
3024}
3025
3026static __inline__ __m256i __DEFAULT_FN_ATTRS256
3027_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3028  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3029                                             (__v4di)_mm256_abs_epi64(__A),
3030                                             (__v4di)_mm256_setzero_si256());
3031}
3032
3033static __inline__ __m128i __DEFAULT_FN_ATTRS128
3034_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3035  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3036                                             (__v4si)_mm_max_epi32(__A, __B),
3037                                             (__v4si)_mm_setzero_si128());
3038}
3039
3040static __inline__ __m128i __DEFAULT_FN_ATTRS128
3041_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3042  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3043                                             (__v4si)_mm_max_epi32(__A, __B),
3044                                             (__v4si)__W);
3045}
3046
3047static __inline__ __m256i __DEFAULT_FN_ATTRS256
3048_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3049  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3050                                             (__v8si)_mm256_max_epi32(__A, __B),
3051                                             (__v8si)_mm256_setzero_si256());
3052}
3053
3054static __inline__ __m256i __DEFAULT_FN_ATTRS256
3055_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3056  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3057                                             (__v8si)_mm256_max_epi32(__A, __B),
3058                                             (__v8si)__W);
3059}
3060
3061static __inline__ __m128i __DEFAULT_FN_ATTRS128
3062_mm_max_epi64 (__m128i __A, __m128i __B) {
3063  return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
3064}
3065
3066static __inline__ __m128i __DEFAULT_FN_ATTRS128
3067_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3068  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3069                                             (__v2di)_mm_max_epi64(__A, __B),
3070                                             (__v2di)_mm_setzero_si128());
3071}
3072
3073static __inline__ __m128i __DEFAULT_FN_ATTRS128
3074_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3075  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3076                                             (__v2di)_mm_max_epi64(__A, __B),
3077                                             (__v2di)__W);
3078}
3079
3080static __inline__ __m256i __DEFAULT_FN_ATTRS256
3081_mm256_max_epi64 (__m256i __A, __m256i __B) {
3082  return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
3083}
3084
3085static __inline__ __m256i __DEFAULT_FN_ATTRS256
3086_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3087  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3088                                             (__v4di)_mm256_max_epi64(__A, __B),
3089                                             (__v4di)_mm256_setzero_si256());
3090}
3091
3092static __inline__ __m256i __DEFAULT_FN_ATTRS256
3093_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3094  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3095                                             (__v4di)_mm256_max_epi64(__A, __B),
3096                                             (__v4di)__W);
3097}
3098
3099static __inline__ __m128i __DEFAULT_FN_ATTRS128
3100_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3101  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3102                                             (__v4si)_mm_max_epu32(__A, __B),
3103                                             (__v4si)_mm_setzero_si128());
3104}
3105
3106static __inline__ __m128i __DEFAULT_FN_ATTRS128
3107_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3108  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3109                                             (__v4si)_mm_max_epu32(__A, __B),
3110                                             (__v4si)__W);
3111}
3112
3113static __inline__ __m256i __DEFAULT_FN_ATTRS256
3114_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3115  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3116                                             (__v8si)_mm256_max_epu32(__A, __B),
3117                                             (__v8si)_mm256_setzero_si256());
3118}
3119
3120static __inline__ __m256i __DEFAULT_FN_ATTRS256
3121_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3122  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3123                                             (__v8si)_mm256_max_epu32(__A, __B),
3124                                             (__v8si)__W);
3125}
3126
3127static __inline__ __m128i __DEFAULT_FN_ATTRS128
3128_mm_max_epu64 (__m128i __A, __m128i __B) {
3129  return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
3130}
3131
3132static __inline__ __m128i __DEFAULT_FN_ATTRS128
3133_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3134  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3135                                             (__v2di)_mm_max_epu64(__A, __B),
3136                                             (__v2di)_mm_setzero_si128());
3137}
3138
3139static __inline__ __m128i __DEFAULT_FN_ATTRS128
3140_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3141  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3142                                             (__v2di)_mm_max_epu64(__A, __B),
3143                                             (__v2di)__W);
3144}
3145
3146static __inline__ __m256i __DEFAULT_FN_ATTRS256
3147_mm256_max_epu64 (__m256i __A, __m256i __B) {
3148  return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
3149}
3150
3151static __inline__ __m256i __DEFAULT_FN_ATTRS256
3152_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3153  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3154                                             (__v4di)_mm256_max_epu64(__A, __B),
3155                                             (__v4di)_mm256_setzero_si256());
3156}
3157
3158static __inline__ __m256i __DEFAULT_FN_ATTRS256
3159_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3160  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3161                                             (__v4di)_mm256_max_epu64(__A, __B),
3162                                             (__v4di)__W);
3163}
3164
3165static __inline__ __m128i __DEFAULT_FN_ATTRS128
3166_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3167  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3168                                             (__v4si)_mm_min_epi32(__A, __B),
3169                                             (__v4si)_mm_setzero_si128());
3170}
3171
3172static __inline__ __m128i __DEFAULT_FN_ATTRS128
3173_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3174  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3175                                             (__v4si)_mm_min_epi32(__A, __B),
3176                                             (__v4si)__W);
3177}
3178
3179static __inline__ __m256i __DEFAULT_FN_ATTRS256
3180_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3181  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3182                                             (__v8si)_mm256_min_epi32(__A, __B),
3183                                             (__v8si)_mm256_setzero_si256());
3184}
3185
3186static __inline__ __m256i __DEFAULT_FN_ATTRS256
3187_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3188  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3189                                             (__v8si)_mm256_min_epi32(__A, __B),
3190                                             (__v8si)__W);
3191}
3192
3193static __inline__ __m128i __DEFAULT_FN_ATTRS128
3194_mm_min_epi64 (__m128i __A, __m128i __B) {
3195  return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
3196}
3197
3198static __inline__ __m128i __DEFAULT_FN_ATTRS128
3199_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3200  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3201                                             (__v2di)_mm_min_epi64(__A, __B),
3202                                             (__v2di)__W);
3203}
3204
3205static __inline__ __m128i __DEFAULT_FN_ATTRS128
3206_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3207  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3208                                             (__v2di)_mm_min_epi64(__A, __B),
3209                                             (__v2di)_mm_setzero_si128());
3210}
3211
3212static __inline__ __m256i __DEFAULT_FN_ATTRS256
3213_mm256_min_epi64 (__m256i __A, __m256i __B) {
3214  return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3215}
3216
3217static __inline__ __m256i __DEFAULT_FN_ATTRS256
3218_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3219  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3220                                             (__v4di)_mm256_min_epi64(__A, __B),
3221                                             (__v4di)__W);
3222}
3223
3224static __inline__ __m256i __DEFAULT_FN_ATTRS256
3225_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3226  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3227                                             (__v4di)_mm256_min_epi64(__A, __B),
3228                                             (__v4di)_mm256_setzero_si256());
3229}
3230
3231static __inline__ __m128i __DEFAULT_FN_ATTRS128
3232_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3233  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3234                                             (__v4si)_mm_min_epu32(__A, __B),
3235                                             (__v4si)_mm_setzero_si128());
3236}
3237
3238static __inline__ __m128i __DEFAULT_FN_ATTRS128
3239_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3240  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3241                                             (__v4si)_mm_min_epu32(__A, __B),
3242                                             (__v4si)__W);
3243}
3244
3245static __inline__ __m256i __DEFAULT_FN_ATTRS256
3246_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3247  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3248                                             (__v8si)_mm256_min_epu32(__A, __B),
3249                                             (__v8si)_mm256_setzero_si256());
3250}
3251
3252static __inline__ __m256i __DEFAULT_FN_ATTRS256
3253_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3254  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3255                                             (__v8si)_mm256_min_epu32(__A, __B),
3256                                             (__v8si)__W);
3257}
3258
3259static __inline__ __m128i __DEFAULT_FN_ATTRS128
3260_mm_min_epu64 (__m128i __A, __m128i __B) {
3261  return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3262}
3263
3264static __inline__ __m128i __DEFAULT_FN_ATTRS128
3265_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3266  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3267                                             (__v2di)_mm_min_epu64(__A, __B),
3268                                             (__v2di)__W);
3269}
3270
3271static __inline__ __m128i __DEFAULT_FN_ATTRS128
3272_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3273  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3274                                             (__v2di)_mm_min_epu64(__A, __B),
3275                                             (__v2di)_mm_setzero_si128());
3276}
3277
3278static __inline__ __m256i __DEFAULT_FN_ATTRS256
3279_mm256_min_epu64 (__m256i __A, __m256i __B) {
3280  return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3281}
3282
3283static __inline__ __m256i __DEFAULT_FN_ATTRS256
3284_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3285  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3286                                             (__v4di)_mm256_min_epu64(__A, __B),
3287                                             (__v4di)__W);
3288}
3289
3290static __inline__ __m256i __DEFAULT_FN_ATTRS256
3291_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3292  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3293                                             (__v4di)_mm256_min_epu64(__A, __B),
3294                                             (__v4di)_mm256_setzero_si256());
3295}
3296
3297#define _mm_roundscale_pd(A, imm) \
3298  ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3299                                               (int)(imm), \
3300                                               (__v2df)_mm_setzero_pd(), \
3301                                               (__mmask8)-1))
3302
3303
3304#define _mm_mask_roundscale_pd(W, U, A, imm) \
3305  ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3306                                               (int)(imm), \
3307                                               (__v2df)(__m128d)(W), \
3308                                               (__mmask8)(U)))
3309
3310
3311#define _mm_maskz_roundscale_pd(U, A, imm) \
3312  ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3313                                               (int)(imm), \
3314                                               (__v2df)_mm_setzero_pd(), \
3315                                               (__mmask8)(U)))
3316
3317
3318#define _mm256_roundscale_pd(A, imm) \
3319  ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3320                                               (int)(imm), \
3321                                               (__v4df)_mm256_setzero_pd(), \
3322                                               (__mmask8)-1))
3323
3324
3325#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3326  ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3327                                               (int)(imm), \
3328                                               (__v4df)(__m256d)(W), \
3329                                               (__mmask8)(U)))
3330
3331
3332#define _mm256_maskz_roundscale_pd(U, A, imm)  \
3333  ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3334                                               (int)(imm), \
3335                                               (__v4df)_mm256_setzero_pd(), \
3336                                               (__mmask8)(U)))
3337
3338#define _mm_roundscale_ps(A, imm)  \
3339  ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340                                              (__v4sf)_mm_setzero_ps(), \
3341                                              (__mmask8)-1))
3342
3343
3344#define _mm_mask_roundscale_ps(W, U, A, imm)  \
3345  ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346                                              (__v4sf)(__m128)(W), \
3347                                              (__mmask8)(U)))
3348
3349
3350#define _mm_maskz_roundscale_ps(U, A, imm)  \
3351  ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3352                                              (__v4sf)_mm_setzero_ps(), \
3353                                              (__mmask8)(U)))
3354
3355#define _mm256_roundscale_ps(A, imm)  \
3356  ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3357                                              (__v8sf)_mm256_setzero_ps(), \
3358                                              (__mmask8)-1))
3359
3360#define _mm256_mask_roundscale_ps(W, U, A, imm)  \
3361  ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362                                              (__v8sf)(__m256)(W), \
3363                                              (__mmask8)(U)))
3364
3365
3366#define _mm256_maskz_roundscale_ps(U, A, imm)  \
3367  ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3368                                              (__v8sf)_mm256_setzero_ps(), \
3369                                              (__mmask8)(U)))
3370
3371static __inline__ __m128d __DEFAULT_FN_ATTRS128
3372_mm_scalef_pd (__m128d __A, __m128d __B) {
3373  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3374                (__v2df) __B,
3375                (__v2df)
3376                _mm_setzero_pd (),
3377                (__mmask8) -1);
3378}
3379
3380static __inline__ __m128d __DEFAULT_FN_ATTRS128
3381_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3382        __m128d __B) {
3383  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3384                (__v2df) __B,
3385                (__v2df) __W,
3386                (__mmask8) __U);
3387}
3388
3389static __inline__ __m128d __DEFAULT_FN_ATTRS128
3390_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3391  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3392                (__v2df) __B,
3393                (__v2df)
3394                _mm_setzero_pd (),
3395                (__mmask8) __U);
3396}
3397
3398static __inline__ __m256d __DEFAULT_FN_ATTRS256
3399_mm256_scalef_pd (__m256d __A, __m256d __B) {
3400  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3401                (__v4df) __B,
3402                (__v4df)
3403                _mm256_setzero_pd (),
3404                (__mmask8) -1);
3405}
3406
3407static __inline__ __m256d __DEFAULT_FN_ATTRS256
3408_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3409           __m256d __B) {
3410  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3411                (__v4df) __B,
3412                (__v4df) __W,
3413                (__mmask8) __U);
3414}
3415
3416static __inline__ __m256d __DEFAULT_FN_ATTRS256
3417_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3418  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3419                (__v4df) __B,
3420                (__v4df)
3421                _mm256_setzero_pd (),
3422                (__mmask8) __U);
3423}
3424
3425static __inline__ __m128 __DEFAULT_FN_ATTRS128
3426_mm_scalef_ps (__m128 __A, __m128 __B) {
3427  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3428               (__v4sf) __B,
3429               (__v4sf)
3430               _mm_setzero_ps (),
3431               (__mmask8) -1);
3432}
3433
3434static __inline__ __m128 __DEFAULT_FN_ATTRS128
3435_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3436  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3437               (__v4sf) __B,
3438               (__v4sf) __W,
3439               (__mmask8) __U);
3440}
3441
3442static __inline__ __m128 __DEFAULT_FN_ATTRS128
3443_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3444  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3445               (__v4sf) __B,
3446               (__v4sf)
3447               _mm_setzero_ps (),
3448               (__mmask8) __U);
3449}
3450
3451static __inline__ __m256 __DEFAULT_FN_ATTRS256
3452_mm256_scalef_ps (__m256 __A, __m256 __B) {
3453  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3454               (__v8sf) __B,
3455               (__v8sf)
3456               _mm256_setzero_ps (),
3457               (__mmask8) -1);
3458}
3459
3460static __inline__ __m256 __DEFAULT_FN_ATTRS256
3461_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3462           __m256 __B) {
3463  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3464               (__v8sf) __B,
3465               (__v8sf) __W,
3466               (__mmask8) __U);
3467}
3468
3469static __inline__ __m256 __DEFAULT_FN_ATTRS256
3470_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3471  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3472               (__v8sf) __B,
3473               (__v8sf)
3474               _mm256_setzero_ps (),
3475               (__mmask8) __U);
3476}
3477
3478#define _mm_i64scatter_pd(addr, index, v1, scale) \
3479  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3480                               (__v2di)(__m128i)(index), \
3481                               (__v2df)(__m128d)(v1), (int)(scale))
3482
3483#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3484  __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3485                               (__v2di)(__m128i)(index), \
3486                               (__v2df)(__m128d)(v1), (int)(scale))
3487
3488#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3489  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3490                               (__v2di)(__m128i)(index), \
3491                               (__v2di)(__m128i)(v1), (int)(scale))
3492
3493#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3494  __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3495                               (__v2di)(__m128i)(index), \
3496                               (__v2di)(__m128i)(v1), (int)(scale))
3497
3498#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3499  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3500                               (__v4di)(__m256i)(index), \
3501                               (__v4df)(__m256d)(v1), (int)(scale))
3502
3503#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3504  __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3505                               (__v4di)(__m256i)(index), \
3506                               (__v4df)(__m256d)(v1), (int)(scale))
3507
3508#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3509  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3510                               (__v4di)(__m256i)(index), \
3511                               (__v4di)(__m256i)(v1), (int)(scale))
3512
3513#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3514  __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3515                               (__v4di)(__m256i)(index), \
3516                               (__v4di)(__m256i)(v1), (int)(scale))
3517
3518#define _mm_i64scatter_ps(addr, index, v1, scale) \
3519  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3520                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3521                               (int)(scale))
3522
3523#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3524  __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3525                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3526                               (int)(scale))
3527
3528#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3529  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3530                               (__v2di)(__m128i)(index), \
3531                               (__v4si)(__m128i)(v1), (int)(scale))
3532
3533#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3534  __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3535                               (__v2di)(__m128i)(index), \
3536                               (__v4si)(__m128i)(v1), (int)(scale))
3537
3538#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3539  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3540                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3541                               (int)(scale))
3542
3543#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3544  __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3545                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3546                               (int)(scale))
3547
3548#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3549  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3550                               (__v4di)(__m256i)(index), \
3551                               (__v4si)(__m128i)(v1), (int)(scale))
3552
3553#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3554  __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3555                               (__v4di)(__m256i)(index), \
3556                               (__v4si)(__m128i)(v1), (int)(scale))
3557
3558#define _mm_i32scatter_pd(addr, index, v1, scale) \
3559  __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3560                               (__v4si)(__m128i)(index), \
3561                               (__v2df)(__m128d)(v1), (int)(scale))
3562
3563#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3564    __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3565                                 (__v4si)(__m128i)(index), \
3566                                 (__v2df)(__m128d)(v1), (int)(scale))
3567
3568#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3569    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3570                                 (__v4si)(__m128i)(index), \
3571                                 (__v2di)(__m128i)(v1), (int)(scale))
3572
3573#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3574    __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3575                                 (__v4si)(__m128i)(index), \
3576                                 (__v2di)(__m128i)(v1), (int)(scale))
3577
3578#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3579    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3580                                 (__v4si)(__m128i)(index), \
3581                                 (__v4df)(__m256d)(v1), (int)(scale))
3582
3583#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3584    __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3585                                 (__v4si)(__m128i)(index), \
3586                                 (__v4df)(__m256d)(v1), (int)(scale))
3587
3588#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3589    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3590                                 (__v4si)(__m128i)(index), \
3591                                 (__v4di)(__m256i)(v1), (int)(scale))
3592
3593#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3594    __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3595                                 (__v4si)(__m128i)(index), \
3596                                 (__v4di)(__m256i)(v1), (int)(scale))
3597
3598#define _mm_i32scatter_ps(addr, index, v1, scale) \
3599    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3600                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3601                                 (int)(scale))
3602
3603#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3604    __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3605                                 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3606                                 (int)(scale))
3607
3608#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3609    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3610                                 (__v4si)(__m128i)(index), \
3611                                 (__v4si)(__m128i)(v1), (int)(scale))
3612
3613#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3614    __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3615                                 (__v4si)(__m128i)(index), \
3616                                 (__v4si)(__m128i)(v1), (int)(scale))
3617
3618#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3619    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3620                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3621                                 (int)(scale))
3622
3623#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3624    __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3625                                 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3626                                 (int)(scale))
3627
3628#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3629    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3630                                 (__v8si)(__m256i)(index), \
3631                                 (__v8si)(__m256i)(v1), (int)(scale))
3632
3633#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3634    __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3635                                 (__v8si)(__m256i)(index), \
3636                                 (__v8si)(__m256i)(v1), (int)(scale))
3637
3638  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3639  _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3640    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3641                                                (__v2df)_mm_sqrt_pd(__A),
3642                                                (__v2df)__W);
3643  }
3644
3645  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3646  _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3647    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3648                                                (__v2df)_mm_sqrt_pd(__A),
3649                                                (__v2df)_mm_setzero_pd());
3650  }
3651
3652  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3653  _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3654    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3655                                                (__v4df)_mm256_sqrt_pd(__A),
3656                                                (__v4df)__W);
3657  }
3658
3659  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3660  _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3661    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3662                                                (__v4df)_mm256_sqrt_pd(__A),
3663                                                (__v4df)_mm256_setzero_pd());
3664  }
3665
3666  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3667  _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3668    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3669                                               (__v4sf)_mm_sqrt_ps(__A),
3670                                               (__v4sf)__W);
3671  }
3672
3673  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3674  _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3675    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3676                                               (__v4sf)_mm_sqrt_ps(__A),
3677                                               (__v4sf)_mm_setzero_ps());
3678  }
3679
3680  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3681  _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3682    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3683                                               (__v8sf)_mm256_sqrt_ps(__A),
3684                                               (__v8sf)__W);
3685  }
3686
3687  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3688  _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3689    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3690                                               (__v8sf)_mm256_sqrt_ps(__A),
3691                                               (__v8sf)_mm256_setzero_ps());
3692  }
3693
3694  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3695  _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3696    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3697                                                (__v2df)_mm_sub_pd(__A, __B),
3698                                                (__v2df)__W);
3699  }
3700
3701  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3702  _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3703    return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3704                                                (__v2df)_mm_sub_pd(__A, __B),
3705                                                (__v2df)_mm_setzero_pd());
3706  }
3707
3708  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3709  _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3710    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3711                                                (__v4df)_mm256_sub_pd(__A, __B),
3712                                                (__v4df)__W);
3713  }
3714
3715  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3716  _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3717    return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3718                                                (__v4df)_mm256_sub_pd(__A, __B),
3719                                                (__v4df)_mm256_setzero_pd());
3720  }
3721
3722  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3723  _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3724    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3725                                               (__v4sf)_mm_sub_ps(__A, __B),
3726                                               (__v4sf)__W);
3727  }
3728
3729  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3730  _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3731    return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3732                                               (__v4sf)_mm_sub_ps(__A, __B),
3733                                               (__v4sf)_mm_setzero_ps());
3734  }
3735
3736  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3737  _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3738    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3739                                               (__v8sf)_mm256_sub_ps(__A, __B),
3740                                               (__v8sf)__W);
3741  }
3742
3743  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3744  _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3745    return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3746                                               (__v8sf)_mm256_sub_ps(__A, __B),
3747                                               (__v8sf)_mm256_setzero_ps());
3748  }
3749
3750  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751  _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3752    return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3753                                                  (__v4si)__B);
3754  }
3755
3756  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3757  _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3758                              __m128i __B) {
3759    return (__m128i)__builtin_ia32_selectd_128(__U,
3760                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3761                                    (__v4si)__A);
3762  }
3763
3764  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3765  _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3766                               __m128i __B) {
3767    return (__m128i)__builtin_ia32_selectd_128(__U,
3768                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3769                                    (__v4si)__I);
3770  }
3771
3772  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3773  _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3774                               __m128i __B) {
3775    return (__m128i)__builtin_ia32_selectd_128(__U,
3776                                    (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3777                                    (__v4si)_mm_setzero_si128());
3778  }
3779
3780  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781  _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3782    return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3783                                                  (__v8si) __B);
3784  }
3785
3786  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3787  _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3788                                 __m256i __B) {
3789    return (__m256i)__builtin_ia32_selectd_256(__U,
3790                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3791                                 (__v8si)__A);
3792  }
3793
3794  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3795  _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3796                                  __m256i __B) {
3797    return (__m256i)__builtin_ia32_selectd_256(__U,
3798                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3799                                 (__v8si)__I);
3800  }
3801
3802  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3803  _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3804                                  __m256i __B) {
3805    return (__m256i)__builtin_ia32_selectd_256(__U,
3806                                 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3807                                 (__v8si)_mm256_setzero_si256());
3808  }
3809
3810  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811  _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3812    return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3813                                                   (__v2df)__B);
3814  }
3815
3816  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3817  _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3818    return (__m128d)__builtin_ia32_selectpd_128(__U,
3819                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3820                                       (__v2df)__A);
3821  }
3822
3823  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3824  _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3825    return (__m128d)__builtin_ia32_selectpd_128(__U,
3826                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3827                                       (__v2df)(__m128d)__I);
3828  }
3829
3830  static __inline__ __m128d __DEFAULT_FN_ATTRS128
3831  _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3832    return (__m128d)__builtin_ia32_selectpd_128(__U,
3833                                       (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3834                                       (__v2df)_mm_setzero_pd());
3835  }
3836
3837  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838  _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3839    return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3840                                                   (__v4df)__B);
3841  }
3842
3843  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3844  _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3845                              __m256d __B) {
3846    return (__m256d)__builtin_ia32_selectpd_256(__U,
3847                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3848                                    (__v4df)__A);
3849  }
3850
3851  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3852  _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3853                               __m256d __B) {
3854    return (__m256d)__builtin_ia32_selectpd_256(__U,
3855                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3856                                    (__v4df)(__m256d)__I);
3857  }
3858
3859  static __inline__ __m256d __DEFAULT_FN_ATTRS256
3860  _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3861                               __m256d __B) {
3862    return (__m256d)__builtin_ia32_selectpd_256(__U,
3863                                    (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3864                                    (__v4df)_mm256_setzero_pd());
3865  }
3866
3867  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868  _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3869    return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3870                                                  (__v4sf)__B);
3871  }
3872
3873  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3874  _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3875    return (__m128)__builtin_ia32_selectps_128(__U,
3876                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3877                                       (__v4sf)__A);
3878  }
3879
3880  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3881  _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3882    return (__m128)__builtin_ia32_selectps_128(__U,
3883                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3884                                       (__v4sf)(__m128)__I);
3885  }
3886
3887  static __inline__ __m128 __DEFAULT_FN_ATTRS128
3888  _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3889    return (__m128)__builtin_ia32_selectps_128(__U,
3890                                       (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3891                                       (__v4sf)_mm_setzero_ps());
3892  }
3893
3894  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895  _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3896    return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3897                                                  (__v8sf) __B);
3898  }
3899
3900  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3901  _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3902    return (__m256)__builtin_ia32_selectps_256(__U,
3903                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3904                                    (__v8sf)__A);
3905  }
3906
3907  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3908  _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3909                               __m256 __B) {
3910    return (__m256)__builtin_ia32_selectps_256(__U,
3911                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3912                                    (__v8sf)(__m256)__I);
3913  }
3914
3915  static __inline__ __m256 __DEFAULT_FN_ATTRS256
3916  _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3917                               __m256 __B) {
3918    return (__m256)__builtin_ia32_selectps_256(__U,
3919                                    (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3920                                    (__v8sf)_mm256_setzero_ps());
3921  }
3922
3923  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924  _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3925    return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3926                                                  (__v2di)__B);
3927  }
3928
3929  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3930  _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3931                              __m128i __B) {
3932    return (__m128i)__builtin_ia32_selectq_128(__U,
3933                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3934                                    (__v2di)__A);
3935  }
3936
3937  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938  _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3939                               __m128i __B) {
3940    return (__m128i)__builtin_ia32_selectq_128(__U,
3941                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3942                                    (__v2di)__I);
3943  }
3944
3945  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3946  _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3947                               __m128i __B) {
3948    return (__m128i)__builtin_ia32_selectq_128(__U,
3949                                    (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3950                                    (__v2di)_mm_setzero_si128());
3951  }
3952
3953
3954  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955  _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3956    return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3957                                                  (__v4di) __B);
3958  }
3959
3960  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3961  _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3962                                 __m256i __B) {
3963    return (__m256i)__builtin_ia32_selectq_256(__U,
3964                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3965                                 (__v4di)__A);
3966  }
3967
3968  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3969  _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3970                                  __m256i __B) {
3971    return (__m256i)__builtin_ia32_selectq_256(__U,
3972                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3973                                 (__v4di)__I);
3974  }
3975
3976  static __inline__ __m256i __DEFAULT_FN_ATTRS256
3977  _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3978                                  __m256i __B) {
3979    return (__m256i)__builtin_ia32_selectq_256(__U,
3980                                 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3981                                 (__v4di)_mm256_setzero_si256());
3982  }
3983
3984  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3985  _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3986  {
3987    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3988                                               (__v4si)_mm_cvtepi8_epi32(__A),
3989                                               (__v4si)__W);
3990  }
3991
3992  static __inline__ __m128i __DEFAULT_FN_ATTRS128
3993  _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
3994  {
3995    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3996                                               (__v4si)_mm_cvtepi8_epi32(__A),
3997                                               (__v4si)_mm_setzero_si128());
3998  }
3999
4000  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4001  _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4002  {
4003    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4004                                               (__v8si)_mm256_cvtepi8_epi32(__A),
4005                                               (__v8si)__W);
4006  }
4007
4008  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4009  _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4010  {
4011    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4012                                               (__v8si)_mm256_cvtepi8_epi32(__A),
4013                                               (__v8si)_mm256_setzero_si256());
4014  }
4015
4016  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4017  _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4018  {
4019    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4020                                               (__v2di)_mm_cvtepi8_epi64(__A),
4021                                               (__v2di)__W);
4022  }
4023
4024  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4025  _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4026  {
4027    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4028                                               (__v2di)_mm_cvtepi8_epi64(__A),
4029                                               (__v2di)_mm_setzero_si128());
4030  }
4031
4032  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4033  _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4034  {
4035    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4036                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4037                                               (__v4di)__W);
4038  }
4039
4040  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4041  _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4042  {
4043    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4044                                               (__v4di)_mm256_cvtepi8_epi64(__A),
4045                                               (__v4di)_mm256_setzero_si256());
4046  }
4047
4048  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4049  _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4050  {
4051    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4052                                               (__v2di)_mm_cvtepi32_epi64(__X),
4053                                               (__v2di)__W);
4054  }
4055
4056  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4057  _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4058  {
4059    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4060                                               (__v2di)_mm_cvtepi32_epi64(__X),
4061                                               (__v2di)_mm_setzero_si128());
4062  }
4063
4064  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4065  _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4066  {
4067    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4068                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4069                                               (__v4di)__W);
4070  }
4071
4072  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4073  _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4074  {
4075    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4076                                               (__v4di)_mm256_cvtepi32_epi64(__X),
4077                                               (__v4di)_mm256_setzero_si256());
4078  }
4079
4080  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4081  _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4082  {
4083    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4084                                               (__v4si)_mm_cvtepi16_epi32(__A),
4085                                               (__v4si)__W);
4086  }
4087
4088  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4089  _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4090  {
4091    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4092                                               (__v4si)_mm_cvtepi16_epi32(__A),
4093                                               (__v4si)_mm_setzero_si128());
4094  }
4095
4096  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4097  _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4098  {
4099    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4100                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4101                                               (__v8si)__W);
4102  }
4103
4104  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4105  _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4106  {
4107    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4108                                               (__v8si)_mm256_cvtepi16_epi32(__A),
4109                                               (__v8si)_mm256_setzero_si256());
4110  }
4111
4112  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4113  _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4114  {
4115    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4116                                               (__v2di)_mm_cvtepi16_epi64(__A),
4117                                               (__v2di)__W);
4118  }
4119
4120  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4121  _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4122  {
4123    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4124                                               (__v2di)_mm_cvtepi16_epi64(__A),
4125                                               (__v2di)_mm_setzero_si128());
4126  }
4127
4128  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4129  _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4130  {
4131    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4132                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4133                                               (__v4di)__W);
4134  }
4135
4136  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4137  _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4138  {
4139    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4140                                               (__v4di)_mm256_cvtepi16_epi64(__A),
4141                                               (__v4di)_mm256_setzero_si256());
4142  }
4143
4144
4145  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4146  _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4147  {
4148    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4149                                               (__v4si)_mm_cvtepu8_epi32(__A),
4150                                               (__v4si)__W);
4151  }
4152
4153  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4154  _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4155  {
4156    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4157                                               (__v4si)_mm_cvtepu8_epi32(__A),
4158                                               (__v4si)_mm_setzero_si128());
4159  }
4160
4161  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4162  _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4163  {
4164    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4165                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4166                                               (__v8si)__W);
4167  }
4168
4169  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4170  _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4171  {
4172    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4173                                               (__v8si)_mm256_cvtepu8_epi32(__A),
4174                                               (__v8si)_mm256_setzero_si256());
4175  }
4176
4177  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4178  _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4179  {
4180    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4181                                               (__v2di)_mm_cvtepu8_epi64(__A),
4182                                               (__v2di)__W);
4183  }
4184
4185  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4186  _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4187  {
4188    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4189                                               (__v2di)_mm_cvtepu8_epi64(__A),
4190                                               (__v2di)_mm_setzero_si128());
4191  }
4192
4193  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4194  _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4195  {
4196    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4197                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4198                                               (__v4di)__W);
4199  }
4200
4201  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4202  _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4203  {
4204    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4205                                               (__v4di)_mm256_cvtepu8_epi64(__A),
4206                                               (__v4di)_mm256_setzero_si256());
4207  }
4208
4209  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4210  _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4211  {
4212    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4213                                               (__v2di)_mm_cvtepu32_epi64(__X),
4214                                               (__v2di)__W);
4215  }
4216
4217  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4218  _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4219  {
4220    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4221                                               (__v2di)_mm_cvtepu32_epi64(__X),
4222                                               (__v2di)_mm_setzero_si128());
4223  }
4224
4225  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4226  _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4227  {
4228    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4229                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4230                                               (__v4di)__W);
4231  }
4232
4233  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4234  _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4235  {
4236    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4237                                               (__v4di)_mm256_cvtepu32_epi64(__X),
4238                                               (__v4di)_mm256_setzero_si256());
4239  }
4240
4241  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4242  _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4243  {
4244    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4245                                               (__v4si)_mm_cvtepu16_epi32(__A),
4246                                               (__v4si)__W);
4247  }
4248
4249  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4250  _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4251  {
4252    return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4253                                               (__v4si)_mm_cvtepu16_epi32(__A),
4254                                               (__v4si)_mm_setzero_si128());
4255  }
4256
4257  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4258  _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4259  {
4260    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4261                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4262                                               (__v8si)__W);
4263  }
4264
4265  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4266  _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4267  {
4268    return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4269                                               (__v8si)_mm256_cvtepu16_epi32(__A),
4270                                               (__v8si)_mm256_setzero_si256());
4271  }
4272
4273  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4274  _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4275  {
4276    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4277                                               (__v2di)_mm_cvtepu16_epi64(__A),
4278                                               (__v2di)__W);
4279  }
4280
4281  static __inline__ __m128i __DEFAULT_FN_ATTRS128
4282  _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4283  {
4284    return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4285                                               (__v2di)_mm_cvtepu16_epi64(__A),
4286                                               (__v2di)_mm_setzero_si128());
4287  }
4288
4289  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4290  _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4291  {
4292    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4293                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4294                                               (__v4di)__W);
4295  }
4296
4297  static __inline__ __m256i __DEFAULT_FN_ATTRS256
4298  _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4299  {
4300    return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4301                                               (__v4di)_mm256_cvtepu16_epi64(__A),
4302                                               (__v4di)_mm256_setzero_si256());
4303  }
4304
4305
4306#define _mm_rol_epi32(a, b) \
4307  ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4308
4309#define _mm_mask_rol_epi32(w, u, a, b) \
4310  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4311                                       (__v4si)_mm_rol_epi32((a), (b)), \
4312                                       (__v4si)(__m128i)(w)))
4313
4314#define _mm_maskz_rol_epi32(u, a, b) \
4315  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4316                                       (__v4si)_mm_rol_epi32((a), (b)), \
4317                                       (__v4si)_mm_setzero_si128()))
4318
4319#define _mm256_rol_epi32(a, b) \
4320  ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4321
4322#define _mm256_mask_rol_epi32(w, u, a, b) \
4323  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4324                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4325                                       (__v8si)(__m256i)(w)))
4326
4327#define _mm256_maskz_rol_epi32(u, a, b) \
4328  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4329                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4330                                       (__v8si)_mm256_setzero_si256()))
4331
4332#define _mm_rol_epi64(a, b) \
4333  ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4334
4335#define _mm_mask_rol_epi64(w, u, a, b) \
4336  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4337                                       (__v2di)_mm_rol_epi64((a), (b)), \
4338                                       (__v2di)(__m128i)(w)))
4339
4340#define _mm_maskz_rol_epi64(u, a, b) \
4341  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4342                                       (__v2di)_mm_rol_epi64((a), (b)), \
4343                                       (__v2di)_mm_setzero_si128()))
4344
4345#define _mm256_rol_epi64(a, b) \
4346  ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4347
4348#define _mm256_mask_rol_epi64(w, u, a, b) \
4349  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4350                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4351                                       (__v4di)(__m256i)(w)))
4352
4353#define _mm256_maskz_rol_epi64(u, a, b) \
4354  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4355                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4356                                       (__v4di)_mm256_setzero_si256()))
4357
4358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_rolv_epi32 (__m128i __A, __m128i __B)
4360{
4361  return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4362}
4363
4364static __inline__ __m128i __DEFAULT_FN_ATTRS128
4365_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4366{
4367  return (__m128i)__builtin_ia32_selectd_128(__U,
4368                                             (__v4si)_mm_rolv_epi32(__A, __B),
4369                                             (__v4si)__W);
4370}
4371
4372static __inline__ __m128i __DEFAULT_FN_ATTRS128
4373_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4374{
4375  return (__m128i)__builtin_ia32_selectd_128(__U,
4376                                             (__v4si)_mm_rolv_epi32(__A, __B),
4377                                             (__v4si)_mm_setzero_si128());
4378}
4379
4380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4382{
4383  return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4384}
4385
4386static __inline__ __m256i __DEFAULT_FN_ATTRS256
4387_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4388{
4389  return (__m256i)__builtin_ia32_selectd_256(__U,
4390                                            (__v8si)_mm256_rolv_epi32(__A, __B),
4391                                            (__v8si)__W);
4392}
4393
4394static __inline__ __m256i __DEFAULT_FN_ATTRS256
4395_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4396{
4397  return (__m256i)__builtin_ia32_selectd_256(__U,
4398                                            (__v8si)_mm256_rolv_epi32(__A, __B),
4399                                            (__v8si)_mm256_setzero_si256());
4400}
4401
4402static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403_mm_rolv_epi64 (__m128i __A, __m128i __B)
4404{
4405  return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4406}
4407
4408static __inline__ __m128i __DEFAULT_FN_ATTRS128
4409_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4410{
4411  return (__m128i)__builtin_ia32_selectq_128(__U,
4412                                             (__v2di)_mm_rolv_epi64(__A, __B),
4413                                             (__v2di)__W);
4414}
4415
4416static __inline__ __m128i __DEFAULT_FN_ATTRS128
4417_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4418{
4419  return (__m128i)__builtin_ia32_selectq_128(__U,
4420                                             (__v2di)_mm_rolv_epi64(__A, __B),
4421                                             (__v2di)_mm_setzero_si128());
4422}
4423
4424static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4426{
4427  return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4428}
4429
4430static __inline__ __m256i __DEFAULT_FN_ATTRS256
4431_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4432{
4433  return (__m256i)__builtin_ia32_selectq_256(__U,
4434                                            (__v4di)_mm256_rolv_epi64(__A, __B),
4435                                            (__v4di)__W);
4436}
4437
4438static __inline__ __m256i __DEFAULT_FN_ATTRS256
4439_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4440{
4441  return (__m256i)__builtin_ia32_selectq_256(__U,
4442                                            (__v4di)_mm256_rolv_epi64(__A, __B),
4443                                            (__v4di)_mm256_setzero_si256());
4444}
4445
4446#define _mm_ror_epi32(a, b) \
4447  ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4448
4449#define _mm_mask_ror_epi32(w, u, a, b) \
4450  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4451                                       (__v4si)_mm_ror_epi32((a), (b)), \
4452                                       (__v4si)(__m128i)(w)))
4453
4454#define _mm_maskz_ror_epi32(u, a, b) \
4455  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4456                                       (__v4si)_mm_ror_epi32((a), (b)), \
4457                                       (__v4si)_mm_setzero_si128()))
4458
4459#define _mm256_ror_epi32(a, b) \
4460  ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4461
4462#define _mm256_mask_ror_epi32(w, u, a, b) \
4463  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4464                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4465                                       (__v8si)(__m256i)(w)))
4466
4467#define _mm256_maskz_ror_epi32(u, a, b) \
4468  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4469                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4470                                       (__v8si)_mm256_setzero_si256()))
4471
4472#define _mm_ror_epi64(a, b) \
4473  ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4474
4475#define _mm_mask_ror_epi64(w, u, a, b) \
4476  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4477                                       (__v2di)_mm_ror_epi64((a), (b)), \
4478                                       (__v2di)(__m128i)(w)))
4479
4480#define _mm_maskz_ror_epi64(u, a, b) \
4481  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4482                                       (__v2di)_mm_ror_epi64((a), (b)), \
4483                                       (__v2di)_mm_setzero_si128()))
4484
4485#define _mm256_ror_epi64(a, b) \
4486  ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4487
4488#define _mm256_mask_ror_epi64(w, u, a, b) \
4489  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4490                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4491                                       (__v4di)(__m256i)(w)))
4492
4493#define _mm256_maskz_ror_epi64(u, a, b) \
4494  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4495                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4496                                       (__v4di)_mm256_setzero_si256()))
4497
4498static __inline__ __m128i __DEFAULT_FN_ATTRS128
4499_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4500{
4501  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4502                                             (__v4si)_mm_sll_epi32(__A, __B),
4503                                             (__v4si)__W);
4504}
4505
4506static __inline__ __m128i __DEFAULT_FN_ATTRS128
4507_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4508{
4509  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4510                                             (__v4si)_mm_sll_epi32(__A, __B),
4511                                             (__v4si)_mm_setzero_si128());
4512}
4513
4514static __inline__ __m256i __DEFAULT_FN_ATTRS256
4515_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4516{
4517  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4518                                             (__v8si)_mm256_sll_epi32(__A, __B),
4519                                             (__v8si)__W);
4520}
4521
4522static __inline__ __m256i __DEFAULT_FN_ATTRS256
4523_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4524{
4525  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4526                                             (__v8si)_mm256_sll_epi32(__A, __B),
4527                                             (__v8si)_mm256_setzero_si256());
4528}
4529
4530static __inline__ __m128i __DEFAULT_FN_ATTRS128
4531_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4532{
4533  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4534                                             (__v4si)_mm_slli_epi32(__A, (int)__B),
4535                                             (__v4si)__W);
4536}
4537
4538static __inline__ __m128i __DEFAULT_FN_ATTRS128
4539_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4540{
4541  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4542                                             (__v4si)_mm_slli_epi32(__A, (int)__B),
4543                                             (__v4si)_mm_setzero_si128());
4544}
4545
4546static __inline__ __m256i __DEFAULT_FN_ATTRS256
4547_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4548{
4549  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4550                                             (__v8si)_mm256_slli_epi32(__A, (int)__B),
4551                                             (__v8si)__W);
4552}
4553
4554static __inline__ __m256i __DEFAULT_FN_ATTRS256
4555_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4556{
4557  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4558                                             (__v8si)_mm256_slli_epi32(__A, (int)__B),
4559                                             (__v8si)_mm256_setzero_si256());
4560}
4561
4562static __inline__ __m128i __DEFAULT_FN_ATTRS128
4563_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4564{
4565  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4566                                             (__v2di)_mm_sll_epi64(__A, __B),
4567                                             (__v2di)__W);
4568}
4569
4570static __inline__ __m128i __DEFAULT_FN_ATTRS128
4571_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4572{
4573  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4574                                             (__v2di)_mm_sll_epi64(__A, __B),
4575                                             (__v2di)_mm_setzero_si128());
4576}
4577
4578static __inline__ __m256i __DEFAULT_FN_ATTRS256
4579_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4580{
4581  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4582                                             (__v4di)_mm256_sll_epi64(__A, __B),
4583                                             (__v4di)__W);
4584}
4585
4586static __inline__ __m256i __DEFAULT_FN_ATTRS256
4587_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4588{
4589  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4590                                             (__v4di)_mm256_sll_epi64(__A, __B),
4591                                             (__v4di)_mm256_setzero_si256());
4592}
4593
4594static __inline__ __m128i __DEFAULT_FN_ATTRS128
4595_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4596{
4597  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4598                                             (__v2di)_mm_slli_epi64(__A, (int)__B),
4599                                             (__v2di)__W);
4600}
4601
4602static __inline__ __m128i __DEFAULT_FN_ATTRS128
4603_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4604{
4605  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4606                                             (__v2di)_mm_slli_epi64(__A, (int)__B),
4607                                             (__v2di)_mm_setzero_si128());
4608}
4609
4610static __inline__ __m256i __DEFAULT_FN_ATTRS256
4611_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4612{
4613  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4614                                             (__v4di)_mm256_slli_epi64(__A, (int)__B),
4615                                             (__v4di)__W);
4616}
4617
4618static __inline__ __m256i __DEFAULT_FN_ATTRS256
4619_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4620{
4621  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4622                                             (__v4di)_mm256_slli_epi64(__A, (int)__B),
4623                                             (__v4di)_mm256_setzero_si256());
4624}
4625
4626static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627_mm_rorv_epi32 (__m128i __A, __m128i __B)
4628{
4629  return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4630}
4631
4632static __inline__ __m128i __DEFAULT_FN_ATTRS128
4633_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4634{
4635  return (__m128i)__builtin_ia32_selectd_128(__U,
4636                                             (__v4si)_mm_rorv_epi32(__A, __B),
4637                                             (__v4si)__W);
4638}
4639
4640static __inline__ __m128i __DEFAULT_FN_ATTRS128
4641_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4642{
4643  return (__m128i)__builtin_ia32_selectd_128(__U,
4644                                             (__v4si)_mm_rorv_epi32(__A, __B),
4645                                             (__v4si)_mm_setzero_si128());
4646}
4647
4648static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4650{
4651  return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4652}
4653
4654static __inline__ __m256i __DEFAULT_FN_ATTRS256
4655_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4656{
4657  return (__m256i)__builtin_ia32_selectd_256(__U,
4658                                            (__v8si)_mm256_rorv_epi32(__A, __B),
4659                                            (__v8si)__W);
4660}
4661
4662static __inline__ __m256i __DEFAULT_FN_ATTRS256
4663_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4664{
4665  return (__m256i)__builtin_ia32_selectd_256(__U,
4666                                            (__v8si)_mm256_rorv_epi32(__A, __B),
4667                                            (__v8si)_mm256_setzero_si256());
4668}
4669
4670static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671_mm_rorv_epi64 (__m128i __A, __m128i __B)
4672{
4673  return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4674}
4675
4676static __inline__ __m128i __DEFAULT_FN_ATTRS128
4677_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4678{
4679  return (__m128i)__builtin_ia32_selectq_128(__U,
4680                                             (__v2di)_mm_rorv_epi64(__A, __B),
4681                                             (__v2di)__W);
4682}
4683
4684static __inline__ __m128i __DEFAULT_FN_ATTRS128
4685_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4686{
4687  return (__m128i)__builtin_ia32_selectq_128(__U,
4688                                             (__v2di)_mm_rorv_epi64(__A, __B),
4689                                             (__v2di)_mm_setzero_si128());
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4694{
4695  return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4696}
4697
4698static __inline__ __m256i __DEFAULT_FN_ATTRS256
4699_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4700{
4701  return (__m256i)__builtin_ia32_selectq_256(__U,
4702                                            (__v4di)_mm256_rorv_epi64(__A, __B),
4703                                            (__v4di)__W);
4704}
4705
4706static __inline__ __m256i __DEFAULT_FN_ATTRS256
4707_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4708{
4709  return (__m256i)__builtin_ia32_selectq_256(__U,
4710                                            (__v4di)_mm256_rorv_epi64(__A, __B),
4711                                            (__v4di)_mm256_setzero_si256());
4712}
4713
4714static __inline__ __m128i __DEFAULT_FN_ATTRS128
4715_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4716{
4717  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4718                                             (__v2di)_mm_sllv_epi64(__X, __Y),
4719                                             (__v2di)__W);
4720}
4721
4722static __inline__ __m128i __DEFAULT_FN_ATTRS128
4723_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4724{
4725  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4726                                             (__v2di)_mm_sllv_epi64(__X, __Y),
4727                                             (__v2di)_mm_setzero_si128());
4728}
4729
4730static __inline__ __m256i __DEFAULT_FN_ATTRS256
4731_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4732{
4733  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4734                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
4735                                            (__v4di)__W);
4736}
4737
4738static __inline__ __m256i __DEFAULT_FN_ATTRS256
4739_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4740{
4741  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4742                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
4743                                            (__v4di)_mm256_setzero_si256());
4744}
4745
4746static __inline__ __m128i __DEFAULT_FN_ATTRS128
4747_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4748{
4749  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4750                                             (__v4si)_mm_sllv_epi32(__X, __Y),
4751                                             (__v4si)__W);
4752}
4753
4754static __inline__ __m128i __DEFAULT_FN_ATTRS128
4755_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4756{
4757  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4758                                             (__v4si)_mm_sllv_epi32(__X, __Y),
4759                                             (__v4si)_mm_setzero_si128());
4760}
4761
4762static __inline__ __m256i __DEFAULT_FN_ATTRS256
4763_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4764{
4765  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4766                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
4767                                            (__v8si)__W);
4768}
4769
4770static __inline__ __m256i __DEFAULT_FN_ATTRS256
4771_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4772{
4773  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4774                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
4775                                            (__v8si)_mm256_setzero_si256());
4776}
4777
4778static __inline__ __m128i __DEFAULT_FN_ATTRS128
4779_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4780{
4781  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4782                                             (__v2di)_mm_srlv_epi64(__X, __Y),
4783                                             (__v2di)__W);
4784}
4785
4786static __inline__ __m128i __DEFAULT_FN_ATTRS128
4787_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4788{
4789  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4790                                             (__v2di)_mm_srlv_epi64(__X, __Y),
4791                                             (__v2di)_mm_setzero_si128());
4792}
4793
4794static __inline__ __m256i __DEFAULT_FN_ATTRS256
4795_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4796{
4797  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4798                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
4799                                            (__v4di)__W);
4800}
4801
4802static __inline__ __m256i __DEFAULT_FN_ATTRS256
4803_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4804{
4805  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4806                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
4807                                            (__v4di)_mm256_setzero_si256());
4808}
4809
4810static __inline__ __m128i __DEFAULT_FN_ATTRS128
4811_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4812{
4813  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4814                                            (__v4si)_mm_srlv_epi32(__X, __Y),
4815                                            (__v4si)__W);
4816}
4817
4818static __inline__ __m128i __DEFAULT_FN_ATTRS128
4819_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4820{
4821  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4822                                            (__v4si)_mm_srlv_epi32(__X, __Y),
4823                                            (__v4si)_mm_setzero_si128());
4824}
4825
4826static __inline__ __m256i __DEFAULT_FN_ATTRS256
4827_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4828{
4829  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4830                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
4831                                            (__v8si)__W);
4832}
4833
4834static __inline__ __m256i __DEFAULT_FN_ATTRS256
4835_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4836{
4837  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4838                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
4839                                            (__v8si)_mm256_setzero_si256());
4840}
4841
4842static __inline__ __m128i __DEFAULT_FN_ATTRS128
4843_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4844{
4845  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4846                                             (__v4si)_mm_srl_epi32(__A, __B),
4847                                             (__v4si)__W);
4848}
4849
4850static __inline__ __m128i __DEFAULT_FN_ATTRS128
4851_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4852{
4853  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4854                                             (__v4si)_mm_srl_epi32(__A, __B),
4855                                             (__v4si)_mm_setzero_si128());
4856}
4857
4858static __inline__ __m256i __DEFAULT_FN_ATTRS256
4859_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4860{
4861  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4862                                             (__v8si)_mm256_srl_epi32(__A, __B),
4863                                             (__v8si)__W);
4864}
4865
4866static __inline__ __m256i __DEFAULT_FN_ATTRS256
4867_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4868{
4869  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4870                                             (__v8si)_mm256_srl_epi32(__A, __B),
4871                                             (__v8si)_mm256_setzero_si256());
4872}
4873
4874static __inline__ __m128i __DEFAULT_FN_ATTRS128
4875_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4876{
4877  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4878                                             (__v4si)_mm_srli_epi32(__A, (int)__B),
4879                                             (__v4si)__W);
4880}
4881
4882static __inline__ __m128i __DEFAULT_FN_ATTRS128
4883_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4884{
4885  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4886                                             (__v4si)_mm_srli_epi32(__A, (int)__B),
4887                                             (__v4si)_mm_setzero_si128());
4888}
4889
4890static __inline__ __m256i __DEFAULT_FN_ATTRS256
4891_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4892{
4893  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4894                                             (__v8si)_mm256_srli_epi32(__A, (int)__B),
4895                                             (__v8si)__W);
4896}
4897
4898static __inline__ __m256i __DEFAULT_FN_ATTRS256
4899_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4900{
4901  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4902                                             (__v8si)_mm256_srli_epi32(__A, (int)__B),
4903                                             (__v8si)_mm256_setzero_si256());
4904}
4905
4906static __inline__ __m128i __DEFAULT_FN_ATTRS128
4907_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4908{
4909  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4910                                             (__v2di)_mm_srl_epi64(__A, __B),
4911                                             (__v2di)__W);
4912}
4913
4914static __inline__ __m128i __DEFAULT_FN_ATTRS128
4915_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4916{
4917  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4918                                             (__v2di)_mm_srl_epi64(__A, __B),
4919                                             (__v2di)_mm_setzero_si128());
4920}
4921
4922static __inline__ __m256i __DEFAULT_FN_ATTRS256
4923_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4924{
4925  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4926                                             (__v4di)_mm256_srl_epi64(__A, __B),
4927                                             (__v4di)__W);
4928}
4929
4930static __inline__ __m256i __DEFAULT_FN_ATTRS256
4931_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4932{
4933  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4934                                             (__v4di)_mm256_srl_epi64(__A, __B),
4935                                             (__v4di)_mm256_setzero_si256());
4936}
4937
4938static __inline__ __m128i __DEFAULT_FN_ATTRS128
4939_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4940{
4941  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4942                                             (__v2di)_mm_srli_epi64(__A, (int)__B),
4943                                             (__v2di)__W);
4944}
4945
4946static __inline__ __m128i __DEFAULT_FN_ATTRS128
4947_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4948{
4949  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4950                                             (__v2di)_mm_srli_epi64(__A, (int)__B),
4951                                             (__v2di)_mm_setzero_si128());
4952}
4953
4954static __inline__ __m256i __DEFAULT_FN_ATTRS256
4955_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4956{
4957  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4958                                             (__v4di)_mm256_srli_epi64(__A, (int)__B),
4959                                             (__v4di)__W);
4960}
4961
4962static __inline__ __m256i __DEFAULT_FN_ATTRS256
4963_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4964{
4965  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4966                                             (__v4di)_mm256_srli_epi64(__A, (int)__B),
4967                                             (__v4di)_mm256_setzero_si256());
4968}
4969
4970static __inline__ __m128i __DEFAULT_FN_ATTRS128
4971_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4972{
4973  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4974                                            (__v4si)_mm_srav_epi32(__X, __Y),
4975                                            (__v4si)__W);
4976}
4977
4978static __inline__ __m128i __DEFAULT_FN_ATTRS128
4979_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4980{
4981  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4982                                            (__v4si)_mm_srav_epi32(__X, __Y),
4983                                            (__v4si)_mm_setzero_si128());
4984}
4985
4986static __inline__ __m256i __DEFAULT_FN_ATTRS256
4987_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4988{
4989  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4990                                            (__v8si)_mm256_srav_epi32(__X, __Y),
4991                                            (__v8si)__W);
4992}
4993
4994static __inline__ __m256i __DEFAULT_FN_ATTRS256
4995_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4996{
4997  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4998                                            (__v8si)_mm256_srav_epi32(__X, __Y),
4999                                            (__v8si)_mm256_setzero_si256());
5000}
5001
5002static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003_mm_srav_epi64(__m128i __X, __m128i __Y)
5004{
5005  return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5006}
5007
5008static __inline__ __m128i __DEFAULT_FN_ATTRS128
5009_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5010{
5011  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5012                                             (__v2di)_mm_srav_epi64(__X, __Y),
5013                                             (__v2di)__W);
5014}
5015
5016static __inline__ __m128i __DEFAULT_FN_ATTRS128
5017_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5018{
5019  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5020                                             (__v2di)_mm_srav_epi64(__X, __Y),
5021                                             (__v2di)_mm_setzero_si128());
5022}
5023
5024static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025_mm256_srav_epi64(__m256i __X, __m256i __Y)
5026{
5027  return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5028}
5029
5030static __inline__ __m256i __DEFAULT_FN_ATTRS256
5031_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5032{
5033  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5034                                             (__v4di)_mm256_srav_epi64(__X, __Y),
5035                                             (__v4di)__W);
5036}
5037
5038static __inline__ __m256i __DEFAULT_FN_ATTRS256
5039_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5040{
5041  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5042                                             (__v4di)_mm256_srav_epi64(__X, __Y),
5043                                             (__v4di)_mm256_setzero_si256());
5044}
5045
5046static __inline__ __m128i __DEFAULT_FN_ATTRS128
5047_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5048{
5049  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5050                 (__v4si) __A,
5051                 (__v4si) __W);
5052}
5053
5054static __inline__ __m128i __DEFAULT_FN_ATTRS128
5055_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5056{
5057  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5058                 (__v4si) __A,
5059                 (__v4si) _mm_setzero_si128 ());
5060}
5061
5062
5063static __inline__ __m256i __DEFAULT_FN_ATTRS256
5064_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5065{
5066  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5067                 (__v8si) __A,
5068                 (__v8si) __W);
5069}
5070
5071static __inline__ __m256i __DEFAULT_FN_ATTRS256
5072_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5073{
5074  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5075                 (__v8si) __A,
5076                 (__v8si) _mm256_setzero_si256 ());
5077}
5078
5079static __inline __m128i __DEFAULT_FN_ATTRS128
5080_mm_load_epi32 (void const *__P)
5081{
5082  return *(const __m128i *) __P;
5083}
5084
5085static __inline__ __m128i __DEFAULT_FN_ATTRS128
5086_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5087{
5088  return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5089              (__v4si) __W,
5090              (__mmask8)
5091              __U);
5092}
5093
5094static __inline__ __m128i __DEFAULT_FN_ATTRS128
5095_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5096{
5097  return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5098              (__v4si)
5099              _mm_setzero_si128 (),
5100              (__mmask8)
5101              __U);
5102}
5103
5104static __inline __m256i __DEFAULT_FN_ATTRS256
5105_mm256_load_epi32 (void const *__P)
5106{
5107  return *(const __m256i *) __P;
5108}
5109
5110static __inline__ __m256i __DEFAULT_FN_ATTRS256
5111_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5112{
5113  return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5114              (__v8si) __W,
5115              (__mmask8)
5116              __U);
5117}
5118
5119static __inline__ __m256i __DEFAULT_FN_ATTRS256
5120_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5121{
5122  return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5123              (__v8si)
5124              _mm256_setzero_si256 (),
5125              (__mmask8)
5126              __U);
5127}
5128
5129static __inline void __DEFAULT_FN_ATTRS128
5130_mm_store_epi32 (void *__P, __m128i __A)
5131{
5132  *(__m128i *) __P = __A;
5133}
5134
5135static __inline__ void __DEFAULT_FN_ATTRS128
5136_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5137{
5138  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5139          (__v4si) __A,
5140          (__mmask8) __U);
5141}
5142
5143static __inline void __DEFAULT_FN_ATTRS256
5144_mm256_store_epi32 (void *__P, __m256i __A)
5145{
5146  *(__m256i *) __P = __A;
5147}
5148
5149static __inline__ void __DEFAULT_FN_ATTRS256
5150_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5151{
5152  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5153          (__v8si) __A,
5154          (__mmask8) __U);
5155}
5156
5157static __inline__ __m128i __DEFAULT_FN_ATTRS128
5158_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5159{
5160  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5161                 (__v2di) __A,
5162                 (__v2di) __W);
5163}
5164
5165static __inline__ __m128i __DEFAULT_FN_ATTRS128
5166_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5167{
5168  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5169                 (__v2di) __A,
5170                 (__v2di) _mm_setzero_si128 ());
5171}
5172
5173static __inline__ __m256i __DEFAULT_FN_ATTRS256
5174_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5175{
5176  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5177                 (__v4di) __A,
5178                 (__v4di) __W);
5179}
5180
5181static __inline__ __m256i __DEFAULT_FN_ATTRS256
5182_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5183{
5184  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5185                 (__v4di) __A,
5186                 (__v4di) _mm256_setzero_si256 ());
5187}
5188
5189static __inline __m128i __DEFAULT_FN_ATTRS128
5190_mm_load_epi64 (void const *__P)
5191{
5192  return *(const __m128i *) __P;
5193}
5194
5195static __inline__ __m128i __DEFAULT_FN_ATTRS128
5196_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5197{
5198  return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5199              (__v2di) __W,
5200              (__mmask8)
5201              __U);
5202}
5203
5204static __inline__ __m128i __DEFAULT_FN_ATTRS128
5205_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5206{
5207  return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5208              (__v2di)
5209              _mm_setzero_si128 (),
5210              (__mmask8)
5211              __U);
5212}
5213
5214static __inline __m256i __DEFAULT_FN_ATTRS256
5215_mm256_load_epi64 (void const *__P)
5216{
5217  return *(const __m256i *) __P;
5218}
5219
5220static __inline__ __m256i __DEFAULT_FN_ATTRS256
5221_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5222{
5223  return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5224              (__v4di) __W,
5225              (__mmask8)
5226              __U);
5227}
5228
5229static __inline__ __m256i __DEFAULT_FN_ATTRS256
5230_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5231{
5232  return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5233              (__v4di)
5234              _mm256_setzero_si256 (),
5235              (__mmask8)
5236              __U);
5237}
5238
5239static __inline void __DEFAULT_FN_ATTRS128
5240_mm_store_epi64 (void *__P, __m128i __A)
5241{
5242  *(__m128i *) __P = __A;
5243}
5244
5245static __inline__ void __DEFAULT_FN_ATTRS128
5246_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5247{
5248  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5249          (__v2di) __A,
5250          (__mmask8) __U);
5251}
5252
5253static __inline void __DEFAULT_FN_ATTRS256
5254_mm256_store_epi64 (void *__P, __m256i __A)
5255{
5256  *(__m256i *) __P = __A;
5257}
5258
5259static __inline__ void __DEFAULT_FN_ATTRS256
5260_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5261{
5262  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5263          (__v4di) __A,
5264          (__mmask8) __U);
5265}
5266
5267static __inline__ __m128d __DEFAULT_FN_ATTRS128
5268_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5269{
5270  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5271                                              (__v2df)_mm_movedup_pd(__A),
5272                                              (__v2df)__W);
5273}
5274
5275static __inline__ __m128d __DEFAULT_FN_ATTRS128
5276_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5277{
5278  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5279                                              (__v2df)_mm_movedup_pd(__A),
5280                                              (__v2df)_mm_setzero_pd());
5281}
5282
5283static __inline__ __m256d __DEFAULT_FN_ATTRS256
5284_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5285{
5286  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5287                                              (__v4df)_mm256_movedup_pd(__A),
5288                                              (__v4df)__W);
5289}
5290
5291static __inline__ __m256d __DEFAULT_FN_ATTRS256
5292_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5293{
5294  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5295                                              (__v4df)_mm256_movedup_pd(__A),
5296                                              (__v4df)_mm256_setzero_pd());
5297}
5298
5299static __inline__ __m128i __DEFAULT_FN_ATTRS128
5300_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5301{
5302   return (__m128i)__builtin_ia32_selectd_128(__M,
5303                                              (__v4si) _mm_set1_epi32(__A),
5304                                              (__v4si)__O);
5305}
5306
5307static __inline__ __m128i __DEFAULT_FN_ATTRS128
5308_mm_maskz_set1_epi32( __mmask8 __M, int __A)
5309{
5310   return (__m128i)__builtin_ia32_selectd_128(__M,
5311                                              (__v4si) _mm_set1_epi32(__A),
5312                                              (__v4si)_mm_setzero_si128());
5313}
5314
5315static __inline__ __m256i __DEFAULT_FN_ATTRS256
5316_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5317{
5318   return (__m256i)__builtin_ia32_selectd_256(__M,
5319                                              (__v8si) _mm256_set1_epi32(__A),
5320                                              (__v8si)__O);
5321}
5322
5323static __inline__ __m256i __DEFAULT_FN_ATTRS256
5324_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5325{
5326   return (__m256i)__builtin_ia32_selectd_256(__M,
5327                                              (__v8si) _mm256_set1_epi32(__A),
5328                                              (__v8si)_mm256_setzero_si256());
5329}
5330
5331
5332static __inline__ __m128i __DEFAULT_FN_ATTRS128
5333_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5334{
5335  return (__m128i) __builtin_ia32_selectq_128(__M,
5336                                              (__v2di) _mm_set1_epi64x(__A),
5337                                              (__v2di) __O);
5338}
5339
5340static __inline__ __m128i __DEFAULT_FN_ATTRS128
5341_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5342{
5343  return (__m128i) __builtin_ia32_selectq_128(__M,
5344                                              (__v2di) _mm_set1_epi64x(__A),
5345                                              (__v2di) _mm_setzero_si128());
5346}
5347
5348static __inline__ __m256i __DEFAULT_FN_ATTRS256
5349_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5350{
5351  return (__m256i) __builtin_ia32_selectq_256(__M,
5352                                              (__v4di) _mm256_set1_epi64x(__A),
5353                                              (__v4di) __O) ;
5354}
5355
5356static __inline__ __m256i __DEFAULT_FN_ATTRS256
5357_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5358{
5359   return (__m256i) __builtin_ia32_selectq_256(__M,
5360                                               (__v4di) _mm256_set1_epi64x(__A),
5361                                               (__v4di) _mm256_setzero_si256());
5362}
5363
5364#define _mm_fixupimm_pd(A, B, C, imm) \
5365  ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366                                              (__v2df)(__m128d)(B), \
5367                                              (__v2di)(__m128i)(C), (int)(imm), \
5368                                              (__mmask8)-1))
5369
5370#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5371  ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5372                                              (__v2df)(__m128d)(B), \
5373                                              (__v2di)(__m128i)(C), (int)(imm), \
5374                                              (__mmask8)(U)))
5375
5376#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5377  ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5378                                               (__v2df)(__m128d)(B), \
5379                                               (__v2di)(__m128i)(C), \
5380                                               (int)(imm), (__mmask8)(U)))
5381
5382#define _mm256_fixupimm_pd(A, B, C, imm) \
5383  ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384                                              (__v4df)(__m256d)(B), \
5385                                              (__v4di)(__m256i)(C), (int)(imm), \
5386                                              (__mmask8)-1))
5387
5388#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5389  ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5390                                              (__v4df)(__m256d)(B), \
5391                                              (__v4di)(__m256i)(C), (int)(imm), \
5392                                              (__mmask8)(U)))
5393
5394#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5395  ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5396                                               (__v4df)(__m256d)(B), \
5397                                               (__v4di)(__m256i)(C), \
5398                                               (int)(imm), (__mmask8)(U)))
5399
5400#define _mm_fixupimm_ps(A, B, C, imm) \
5401  ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402                                             (__v4sf)(__m128)(B), \
5403                                             (__v4si)(__m128i)(C), (int)(imm), \
5404                                             (__mmask8)-1))
5405
5406#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5407  ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5408                                             (__v4sf)(__m128)(B), \
5409                                             (__v4si)(__m128i)(C), (int)(imm), \
5410                                             (__mmask8)(U)))
5411
5412#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5413  ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5414                                              (__v4sf)(__m128)(B), \
5415                                              (__v4si)(__m128i)(C), (int)(imm), \
5416                                              (__mmask8)(U)))
5417
5418#define _mm256_fixupimm_ps(A, B, C, imm) \
5419  ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420                                             (__v8sf)(__m256)(B), \
5421                                             (__v8si)(__m256i)(C), (int)(imm), \
5422                                             (__mmask8)-1))
5423
5424#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5425  ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5426                                             (__v8sf)(__m256)(B), \
5427                                             (__v8si)(__m256i)(C), (int)(imm), \
5428                                             (__mmask8)(U)))
5429
5430#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5431  ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5432                                              (__v8sf)(__m256)(B), \
5433                                              (__v8si)(__m256i)(C), (int)(imm), \
5434                                              (__mmask8)(U)))
5435
5436static __inline__ __m128d __DEFAULT_FN_ATTRS128
5437_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5438{
5439  return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5440               (__v2df) __W,
5441               (__mmask8) __U);
5442}
5443
5444static __inline__ __m128d __DEFAULT_FN_ATTRS128
5445_mm_maskz_load_pd (__mmask8 __U, void const *__P)
5446{
5447  return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5448               (__v2df)
5449               _mm_setzero_pd (),
5450               (__mmask8) __U);
5451}
5452
5453static __inline__ __m256d __DEFAULT_FN_ATTRS256
5454_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5455{
5456  return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5457               (__v4df) __W,
5458               (__mmask8) __U);
5459}
5460
5461static __inline__ __m256d __DEFAULT_FN_ATTRS256
5462_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5463{
5464  return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5465               (__v4df)
5466               _mm256_setzero_pd (),
5467               (__mmask8) __U);
5468}
5469
5470static __inline__ __m128 __DEFAULT_FN_ATTRS128
5471_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5472{
5473  return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5474              (__v4sf) __W,
5475              (__mmask8) __U);
5476}
5477
5478static __inline__ __m128 __DEFAULT_FN_ATTRS128
5479_mm_maskz_load_ps (__mmask8 __U, void const *__P)
5480{
5481  return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5482              (__v4sf)
5483              _mm_setzero_ps (),
5484              (__mmask8) __U);
5485}
5486
5487static __inline__ __m256 __DEFAULT_FN_ATTRS256
5488_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5489{
5490  return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5491              (__v8sf) __W,
5492              (__mmask8) __U);
5493}
5494
5495static __inline__ __m256 __DEFAULT_FN_ATTRS256
5496_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5497{
5498  return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5499              (__v8sf)
5500              _mm256_setzero_ps (),
5501              (__mmask8) __U);
5502}
5503
5504static __inline __m128i __DEFAULT_FN_ATTRS128
5505_mm_loadu_epi64 (void const *__P)
5506{
5507  struct __loadu_epi64 {
5508    __m128i_u __v;
5509  } __attribute__((__packed__, __may_alias__));
5510  return ((const struct __loadu_epi64*)__P)->__v;
5511}
5512
5513static __inline__ __m128i __DEFAULT_FN_ATTRS128
5514_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5515{
5516  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5517                 (__v2di) __W,
5518                 (__mmask8) __U);
5519}
5520
5521static __inline__ __m128i __DEFAULT_FN_ATTRS128
5522_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5523{
5524  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5525                 (__v2di)
5526                 _mm_setzero_si128 (),
5527                 (__mmask8) __U);
5528}
5529
5530static __inline __m256i __DEFAULT_FN_ATTRS256
5531_mm256_loadu_epi64 (void const *__P)
5532{
5533  struct __loadu_epi64 {
5534    __m256i_u __v;
5535  } __attribute__((__packed__, __may_alias__));
5536  return ((const struct __loadu_epi64*)__P)->__v;
5537}
5538
5539static __inline__ __m256i __DEFAULT_FN_ATTRS256
5540_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5541{
5542  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5543                 (__v4di) __W,
5544                 (__mmask8) __U);
5545}
5546
5547static __inline__ __m256i __DEFAULT_FN_ATTRS256
5548_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5549{
5550  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5551                 (__v4di)
5552                 _mm256_setzero_si256 (),
5553                 (__mmask8) __U);
5554}
5555
5556static __inline __m128i __DEFAULT_FN_ATTRS128
5557_mm_loadu_epi32 (void const *__P)
5558{
5559  struct __loadu_epi32 {
5560    __m128i_u __v;
5561  } __attribute__((__packed__, __may_alias__));
5562  return ((const struct __loadu_epi32*)__P)->__v;
5563}
5564
5565static __inline__ __m128i __DEFAULT_FN_ATTRS128
5566_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5567{
5568  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5569                 (__v4si) __W,
5570                 (__mmask8) __U);
5571}
5572
5573static __inline__ __m128i __DEFAULT_FN_ATTRS128
5574_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5575{
5576  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5577                 (__v4si)
5578                 _mm_setzero_si128 (),
5579                 (__mmask8) __U);
5580}
5581
5582static __inline __m256i __DEFAULT_FN_ATTRS256
5583_mm256_loadu_epi32 (void const *__P)
5584{
5585  struct __loadu_epi32 {
5586    __m256i_u __v;
5587  } __attribute__((__packed__, __may_alias__));
5588  return ((const struct __loadu_epi32*)__P)->__v;
5589}
5590
5591static __inline__ __m256i __DEFAULT_FN_ATTRS256
5592_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5593{
5594  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5595                 (__v8si) __W,
5596                 (__mmask8) __U);
5597}
5598
5599static __inline__ __m256i __DEFAULT_FN_ATTRS256
5600_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5601{
5602  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5603                 (__v8si)
5604                 _mm256_setzero_si256 (),
5605                 (__mmask8) __U);
5606}
5607
5608static __inline__ __m128d __DEFAULT_FN_ATTRS128
5609_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5610{
5611  return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5612               (__v2df) __W,
5613               (__mmask8) __U);
5614}
5615
5616static __inline__ __m128d __DEFAULT_FN_ATTRS128
5617_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5618{
5619  return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5620               (__v2df)
5621               _mm_setzero_pd (),
5622               (__mmask8) __U);
5623}
5624
5625static __inline__ __m256d __DEFAULT_FN_ATTRS256
5626_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5627{
5628  return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5629               (__v4df) __W,
5630               (__mmask8) __U);
5631}
5632
5633static __inline__ __m256d __DEFAULT_FN_ATTRS256
5634_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5635{
5636  return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5637               (__v4df)
5638               _mm256_setzero_pd (),
5639               (__mmask8) __U);
5640}
5641
5642static __inline__ __m128 __DEFAULT_FN_ATTRS128
5643_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5644{
5645  return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5646              (__v4sf) __W,
5647              (__mmask8) __U);
5648}
5649
5650static __inline__ __m128 __DEFAULT_FN_ATTRS128
5651_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5652{
5653  return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5654              (__v4sf)
5655              _mm_setzero_ps (),
5656              (__mmask8) __U);
5657}
5658
5659static __inline__ __m256 __DEFAULT_FN_ATTRS256
5660_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5661{
5662  return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5663              (__v8sf) __W,
5664              (__mmask8) __U);
5665}
5666
5667static __inline__ __m256 __DEFAULT_FN_ATTRS256
5668_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5669{
5670  return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5671              (__v8sf)
5672              _mm256_setzero_ps (),
5673              (__mmask8) __U);
5674}
5675
5676static __inline__ void __DEFAULT_FN_ATTRS128
5677_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5678{
5679  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5680           (__v2df) __A,
5681           (__mmask8) __U);
5682}
5683
5684static __inline__ void __DEFAULT_FN_ATTRS256
5685_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5686{
5687  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5688           (__v4df) __A,
5689           (__mmask8) __U);
5690}
5691
5692static __inline__ void __DEFAULT_FN_ATTRS128
5693_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5694{
5695  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5696           (__v4sf) __A,
5697           (__mmask8) __U);
5698}
5699
5700static __inline__ void __DEFAULT_FN_ATTRS256
5701_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5702{
5703  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5704           (__v8sf) __A,
5705           (__mmask8) __U);
5706}
5707
5708static __inline void __DEFAULT_FN_ATTRS128
5709_mm_storeu_epi64 (void *__P, __m128i __A)
5710{
5711  struct __storeu_epi64 {
5712    __m128i_u __v;
5713  } __attribute__((__packed__, __may_alias__));
5714  ((struct __storeu_epi64*)__P)->__v = __A;
5715}
5716
5717static __inline__ void __DEFAULT_FN_ATTRS128
5718_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5719{
5720  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5721             (__v2di) __A,
5722             (__mmask8) __U);
5723}
5724
5725static __inline void __DEFAULT_FN_ATTRS256
5726_mm256_storeu_epi64 (void *__P, __m256i __A)
5727{
5728  struct __storeu_epi64 {
5729    __m256i_u __v;
5730  } __attribute__((__packed__, __may_alias__));
5731  ((struct __storeu_epi64*)__P)->__v = __A;
5732}
5733
5734static __inline__ void __DEFAULT_FN_ATTRS256
5735_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5736{
5737  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5738             (__v4di) __A,
5739             (__mmask8) __U);
5740}
5741
5742static __inline void __DEFAULT_FN_ATTRS128
5743_mm_storeu_epi32 (void *__P, __m128i __A)
5744{
5745  struct __storeu_epi32 {
5746    __m128i_u __v;
5747  } __attribute__((__packed__, __may_alias__));
5748  ((struct __storeu_epi32*)__P)->__v = __A;
5749}
5750
5751static __inline__ void __DEFAULT_FN_ATTRS128
5752_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5753{
5754  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5755             (__v4si) __A,
5756             (__mmask8) __U);
5757}
5758
5759static __inline void __DEFAULT_FN_ATTRS256
5760_mm256_storeu_epi32 (void *__P, __m256i __A)
5761{
5762  struct __storeu_epi32 {
5763    __m256i_u __v;
5764  } __attribute__((__packed__, __may_alias__));
5765  ((struct __storeu_epi32*)__P)->__v = __A;
5766}
5767
5768static __inline__ void __DEFAULT_FN_ATTRS256
5769_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5770{
5771  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5772             (__v8si) __A,
5773             (__mmask8) __U);
5774}
5775
5776static __inline__ void __DEFAULT_FN_ATTRS128
5777_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5778{
5779  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5780           (__v2df) __A,
5781           (__mmask8) __U);
5782}
5783
5784static __inline__ void __DEFAULT_FN_ATTRS256
5785_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5786{
5787  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5788           (__v4df) __A,
5789           (__mmask8) __U);
5790}
5791
5792static __inline__ void __DEFAULT_FN_ATTRS128
5793_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5794{
5795  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5796           (__v4sf) __A,
5797           (__mmask8) __U);
5798}
5799
5800static __inline__ void __DEFAULT_FN_ATTRS256
5801_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5802{
5803  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5804           (__v8sf) __A,
5805           (__mmask8) __U);
5806}
5807
5808
5809static __inline__ __m128d __DEFAULT_FN_ATTRS128
5810_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5811{
5812  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5813                                              (__v2df)_mm_unpackhi_pd(__A, __B),
5814                                              (__v2df)__W);
5815}
5816
5817static __inline__ __m128d __DEFAULT_FN_ATTRS128
5818_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5819{
5820  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5821                                              (__v2df)_mm_unpackhi_pd(__A, __B),
5822                                              (__v2df)_mm_setzero_pd());
5823}
5824
5825static __inline__ __m256d __DEFAULT_FN_ATTRS256
5826_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5827{
5828  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5829                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
5830                                           (__v4df)__W);
5831}
5832
5833static __inline__ __m256d __DEFAULT_FN_ATTRS256
5834_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5835{
5836  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5837                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
5838                                           (__v4df)_mm256_setzero_pd());
5839}
5840
5841static __inline__ __m128 __DEFAULT_FN_ATTRS128
5842_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5843{
5844  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5845                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
5846                                             (__v4sf)__W);
5847}
5848
5849static __inline__ __m128 __DEFAULT_FN_ATTRS128
5850_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5851{
5852  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5853                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
5854                                             (__v4sf)_mm_setzero_ps());
5855}
5856
5857static __inline__ __m256 __DEFAULT_FN_ATTRS256
5858_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5859{
5860  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5861                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
5862                                           (__v8sf)__W);
5863}
5864
5865static __inline__ __m256 __DEFAULT_FN_ATTRS256
5866_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5867{
5868  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5869                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
5870                                           (__v8sf)_mm256_setzero_ps());
5871}
5872
5873static __inline__ __m128d __DEFAULT_FN_ATTRS128
5874_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5875{
5876  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5877                                              (__v2df)_mm_unpacklo_pd(__A, __B),
5878                                              (__v2df)__W);
5879}
5880
5881static __inline__ __m128d __DEFAULT_FN_ATTRS128
5882_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5883{
5884  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5885                                              (__v2df)_mm_unpacklo_pd(__A, __B),
5886                                              (__v2df)_mm_setzero_pd());
5887}
5888
5889static __inline__ __m256d __DEFAULT_FN_ATTRS256
5890_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5891{
5892  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5893                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
5894                                           (__v4df)__W);
5895}
5896
5897static __inline__ __m256d __DEFAULT_FN_ATTRS256
5898_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5899{
5900  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5901                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
5902                                           (__v4df)_mm256_setzero_pd());
5903}
5904
5905static __inline__ __m128 __DEFAULT_FN_ATTRS128
5906_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5907{
5908  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5909                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
5910                                             (__v4sf)__W);
5911}
5912
5913static __inline__ __m128 __DEFAULT_FN_ATTRS128
5914_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5915{
5916  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5917                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
5918                                             (__v4sf)_mm_setzero_ps());
5919}
5920
5921static __inline__ __m256 __DEFAULT_FN_ATTRS256
5922_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5923{
5924  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5925                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
5926                                           (__v8sf)__W);
5927}
5928
5929static __inline__ __m256 __DEFAULT_FN_ATTRS256
5930_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5931{
5932  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5933                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
5934                                           (__v8sf)_mm256_setzero_ps());
5935}
5936
5937static __inline__ __m128d __DEFAULT_FN_ATTRS128
5938_mm_rcp14_pd (__m128d __A)
5939{
5940  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5941                (__v2df)
5942                _mm_setzero_pd (),
5943                (__mmask8) -1);
5944}
5945
5946static __inline__ __m128d __DEFAULT_FN_ATTRS128
5947_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5948{
5949  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5950                (__v2df) __W,
5951                (__mmask8) __U);
5952}
5953
5954static __inline__ __m128d __DEFAULT_FN_ATTRS128
5955_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5956{
5957  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5958                (__v2df)
5959                _mm_setzero_pd (),
5960                (__mmask8) __U);
5961}
5962
5963static __inline__ __m256d __DEFAULT_FN_ATTRS256
5964_mm256_rcp14_pd (__m256d __A)
5965{
5966  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5967                (__v4df)
5968                _mm256_setzero_pd (),
5969                (__mmask8) -1);
5970}
5971
5972static __inline__ __m256d __DEFAULT_FN_ATTRS256
5973_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5974{
5975  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5976                (__v4df) __W,
5977                (__mmask8) __U);
5978}
5979
5980static __inline__ __m256d __DEFAULT_FN_ATTRS256
5981_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5982{
5983  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5984                (__v4df)
5985                _mm256_setzero_pd (),
5986                (__mmask8) __U);
5987}
5988
5989static __inline__ __m128 __DEFAULT_FN_ATTRS128
5990_mm_rcp14_ps (__m128 __A)
5991{
5992  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5993               (__v4sf)
5994               _mm_setzero_ps (),
5995               (__mmask8) -1);
5996}
5997
5998static __inline__ __m128 __DEFAULT_FN_ATTRS128
5999_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6000{
6001  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6002               (__v4sf) __W,
6003               (__mmask8) __U);
6004}
6005
6006static __inline__ __m128 __DEFAULT_FN_ATTRS128
6007_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6008{
6009  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6010               (__v4sf)
6011               _mm_setzero_ps (),
6012               (__mmask8) __U);
6013}
6014
6015static __inline__ __m256 __DEFAULT_FN_ATTRS256
6016_mm256_rcp14_ps (__m256 __A)
6017{
6018  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6019               (__v8sf)
6020               _mm256_setzero_ps (),
6021               (__mmask8) -1);
6022}
6023
6024static __inline__ __m256 __DEFAULT_FN_ATTRS256
6025_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6026{
6027  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6028               (__v8sf) __W,
6029               (__mmask8) __U);
6030}
6031
6032static __inline__ __m256 __DEFAULT_FN_ATTRS256
6033_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6034{
6035  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6036               (__v8sf)
6037               _mm256_setzero_ps (),
6038               (__mmask8) __U);
6039}
6040
6041#define _mm_mask_permute_pd(W, U, X, C) \
6042  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6043                                        (__v2df)_mm_permute_pd((X), (C)), \
6044                                        (__v2df)(__m128d)(W)))
6045
6046#define _mm_maskz_permute_pd(U, X, C) \
6047  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6048                                        (__v2df)_mm_permute_pd((X), (C)), \
6049                                        (__v2df)_mm_setzero_pd()))
6050
6051#define _mm256_mask_permute_pd(W, U, X, C) \
6052  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6053                                        (__v4df)_mm256_permute_pd((X), (C)), \
6054                                        (__v4df)(__m256d)(W)))
6055
6056#define _mm256_maskz_permute_pd(U, X, C) \
6057  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6058                                        (__v4df)_mm256_permute_pd((X), (C)), \
6059                                        (__v4df)_mm256_setzero_pd()))
6060
6061#define _mm_mask_permute_ps(W, U, X, C) \
6062  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6063                                       (__v4sf)_mm_permute_ps((X), (C)), \
6064                                       (__v4sf)(__m128)(W)))
6065
6066#define _mm_maskz_permute_ps(U, X, C) \
6067  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6068                                       (__v4sf)_mm_permute_ps((X), (C)), \
6069                                       (__v4sf)_mm_setzero_ps()))
6070
6071#define _mm256_mask_permute_ps(W, U, X, C) \
6072  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6073                                       (__v8sf)_mm256_permute_ps((X), (C)), \
6074                                       (__v8sf)(__m256)(W)))
6075
6076#define _mm256_maskz_permute_ps(U, X, C) \
6077  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6078                                       (__v8sf)_mm256_permute_ps((X), (C)), \
6079                                       (__v8sf)_mm256_setzero_ps()))
6080
6081static __inline__ __m128d __DEFAULT_FN_ATTRS128
6082_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6083{
6084  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6085                                            (__v2df)_mm_permutevar_pd(__A, __C),
6086                                            (__v2df)__W);
6087}
6088
6089static __inline__ __m128d __DEFAULT_FN_ATTRS128
6090_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6091{
6092  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6093                                            (__v2df)_mm_permutevar_pd(__A, __C),
6094                                            (__v2df)_mm_setzero_pd());
6095}
6096
6097static __inline__ __m256d __DEFAULT_FN_ATTRS256
6098_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6099{
6100  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6101                                         (__v4df)_mm256_permutevar_pd(__A, __C),
6102                                         (__v4df)__W);
6103}
6104
6105static __inline__ __m256d __DEFAULT_FN_ATTRS256
6106_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6107{
6108  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6109                                         (__v4df)_mm256_permutevar_pd(__A, __C),
6110                                         (__v4df)_mm256_setzero_pd());
6111}
6112
6113static __inline__ __m128 __DEFAULT_FN_ATTRS128
6114_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6115{
6116  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6117                                            (__v4sf)_mm_permutevar_ps(__A, __C),
6118                                            (__v4sf)__W);
6119}
6120
6121static __inline__ __m128 __DEFAULT_FN_ATTRS128
6122_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6123{
6124  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6125                                            (__v4sf)_mm_permutevar_ps(__A, __C),
6126                                            (__v4sf)_mm_setzero_ps());
6127}
6128
6129static __inline__ __m256 __DEFAULT_FN_ATTRS256
6130_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6131{
6132  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6133                                          (__v8sf)_mm256_permutevar_ps(__A, __C),
6134                                          (__v8sf)__W);
6135}
6136
6137static __inline__ __m256 __DEFAULT_FN_ATTRS256
6138_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6139{
6140  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6141                                          (__v8sf)_mm256_permutevar_ps(__A, __C),
6142                                          (__v8sf)_mm256_setzero_ps());
6143}
6144
6145static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6146_mm_test_epi32_mask (__m128i __A, __m128i __B)
6147{
6148  return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6149}
6150
6151static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6152_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6153{
6154  return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6155                                     _mm_setzero_si128());
6156}
6157
6158static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6159_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6160{
6161  return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6162                                   _mm256_setzero_si256());
6163}
6164
6165static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6166_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6167{
6168  return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6169                                        _mm256_setzero_si256());
6170}
6171
6172static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6173_mm_test_epi64_mask (__m128i __A, __m128i __B)
6174{
6175  return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6176}
6177
6178static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6179_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6180{
6181  return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6182                                     _mm_setzero_si128());
6183}
6184
6185static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6186_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6187{
6188  return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6189                                   _mm256_setzero_si256());
6190}
6191
6192static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6193_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6194{
6195  return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6196                                        _mm256_setzero_si256());
6197}
6198
6199static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6200_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6201{
6202  return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6203}
6204
6205static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6206_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6207{
6208  return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6209                                    _mm_setzero_si128());
6210}
6211
6212static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6213_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6214{
6215  return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6216                                  _mm256_setzero_si256());
6217}
6218
6219static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6220_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6221{
6222  return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6223                                       _mm256_setzero_si256());
6224}
6225
6226static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6227_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6228{
6229  return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6230}
6231
6232static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6233_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6234{
6235  return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6236                                    _mm_setzero_si128());
6237}
6238
6239static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6240_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6241{
6242  return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6243                                  _mm256_setzero_si256());
6244}
6245
6246static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6247_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6248{
6249  return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6250                                       _mm256_setzero_si256());
6251}
6252
6253static __inline__ __m128i __DEFAULT_FN_ATTRS128
6254_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6255{
6256  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6257                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6258                                           (__v4si)__W);
6259}
6260
6261static __inline__ __m128i __DEFAULT_FN_ATTRS128
6262_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6263{
6264  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6265                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6266                                           (__v4si)_mm_setzero_si128());
6267}
6268
6269static __inline__ __m256i __DEFAULT_FN_ATTRS256
6270_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6271{
6272  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6273                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6274                                        (__v8si)__W);
6275}
6276
6277static __inline__ __m256i __DEFAULT_FN_ATTRS256
6278_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6279{
6280  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6281                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6282                                        (__v8si)_mm256_setzero_si256());
6283}
6284
6285static __inline__ __m128i __DEFAULT_FN_ATTRS128
6286_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6287{
6288  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6289                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6290                                           (__v2di)__W);
6291}
6292
6293static __inline__ __m128i __DEFAULT_FN_ATTRS128
6294_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6295{
6296  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6297                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6298                                           (__v2di)_mm_setzero_si128());
6299}
6300
6301static __inline__ __m256i __DEFAULT_FN_ATTRS256
6302_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6303{
6304  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6305                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6306                                        (__v4di)__W);
6307}
6308
6309static __inline__ __m256i __DEFAULT_FN_ATTRS256
6310_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6311{
6312  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6313                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6314                                        (__v4di)_mm256_setzero_si256());
6315}
6316
6317static __inline__ __m128i __DEFAULT_FN_ATTRS128
6318_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6319{
6320  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6321                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6322                                           (__v4si)__W);
6323}
6324
6325static __inline__ __m128i __DEFAULT_FN_ATTRS128
6326_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6327{
6328  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6329                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6330                                           (__v4si)_mm_setzero_si128());
6331}
6332
6333static __inline__ __m256i __DEFAULT_FN_ATTRS256
6334_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6335{
6336  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6337                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6338                                        (__v8si)__W);
6339}
6340
6341static __inline__ __m256i __DEFAULT_FN_ATTRS256
6342_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6343{
6344  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6345                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6346                                        (__v8si)_mm256_setzero_si256());
6347}
6348
6349static __inline__ __m128i __DEFAULT_FN_ATTRS128
6350_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6351{
6352  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6353                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6354                                           (__v2di)__W);
6355}
6356
6357static __inline__ __m128i __DEFAULT_FN_ATTRS128
6358_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6359{
6360  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6361                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6362                                           (__v2di)_mm_setzero_si128());
6363}
6364
6365static __inline__ __m256i __DEFAULT_FN_ATTRS256
6366_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6367{
6368  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6369                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6370                                        (__v4di)__W);
6371}
6372
6373static __inline__ __m256i __DEFAULT_FN_ATTRS256
6374_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6375{
6376  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6377                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6378                                        (__v4di)_mm256_setzero_si256());
6379}
6380
6381static __inline__ __m128i __DEFAULT_FN_ATTRS128
6382_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6383{
6384  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6385                                             (__v4si)_mm_sra_epi32(__A, __B),
6386                                             (__v4si)__W);
6387}
6388
6389static __inline__ __m128i __DEFAULT_FN_ATTRS128
6390_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6391{
6392  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6393                                             (__v4si)_mm_sra_epi32(__A, __B),
6394                                             (__v4si)_mm_setzero_si128());
6395}
6396
6397static __inline__ __m256i __DEFAULT_FN_ATTRS256
6398_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6399{
6400  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6401                                             (__v8si)_mm256_sra_epi32(__A, __B),
6402                                             (__v8si)__W);
6403}
6404
6405static __inline__ __m256i __DEFAULT_FN_ATTRS256
6406_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6407{
6408  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6409                                             (__v8si)_mm256_sra_epi32(__A, __B),
6410                                             (__v8si)_mm256_setzero_si256());
6411}
6412
6413static __inline__ __m128i __DEFAULT_FN_ATTRS128
6414_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
6415{
6416  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6417                                             (__v4si)_mm_srai_epi32(__A, (int)__B),
6418                                             (__v4si)__W);
6419}
6420
6421static __inline__ __m128i __DEFAULT_FN_ATTRS128
6422_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
6423{
6424  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6425                                             (__v4si)_mm_srai_epi32(__A, (int)__B),
6426                                             (__v4si)_mm_setzero_si128());
6427}
6428
6429static __inline__ __m256i __DEFAULT_FN_ATTRS256
6430_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
6431{
6432  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6433                                             (__v8si)_mm256_srai_epi32(__A, (int)__B),
6434                                             (__v8si)__W);
6435}
6436
6437static __inline__ __m256i __DEFAULT_FN_ATTRS256
6438_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
6439{
6440  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6441                                             (__v8si)_mm256_srai_epi32(__A, (int)__B),
6442                                             (__v8si)_mm256_setzero_si256());
6443}
6444
6445static __inline__ __m128i __DEFAULT_FN_ATTRS128
6446_mm_sra_epi64(__m128i __A, __m128i __B)
6447{
6448  return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6449}
6450
6451static __inline__ __m128i __DEFAULT_FN_ATTRS128
6452_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6453{
6454  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6455                                             (__v2di)_mm_sra_epi64(__A, __B), \
6456                                             (__v2di)__W);
6457}
6458
6459static __inline__ __m128i __DEFAULT_FN_ATTRS128
6460_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6461{
6462  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6463                                             (__v2di)_mm_sra_epi64(__A, __B), \
6464                                             (__v2di)_mm_setzero_si128());
6465}
6466
6467static __inline__ __m256i __DEFAULT_FN_ATTRS256
6468_mm256_sra_epi64(__m256i __A, __m128i __B)
6469{
6470  return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6471}
6472
6473static __inline__ __m256i __DEFAULT_FN_ATTRS256
6474_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6475{
6476  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6477                                           (__v4di)_mm256_sra_epi64(__A, __B), \
6478                                           (__v4di)__W);
6479}
6480
6481static __inline__ __m256i __DEFAULT_FN_ATTRS256
6482_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6483{
6484  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6485                                           (__v4di)_mm256_sra_epi64(__A, __B), \
6486                                           (__v4di)_mm256_setzero_si256());
6487}
6488
6489static __inline__ __m128i __DEFAULT_FN_ATTRS128
6490_mm_srai_epi64(__m128i __A, unsigned int __imm)
6491{
6492  return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
6493}
6494
6495static __inline__ __m128i __DEFAULT_FN_ATTRS128
6496_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
6497{
6498  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6499                                           (__v2di)_mm_srai_epi64(__A, __imm), \
6500                                           (__v2di)__W);
6501}
6502
6503static __inline__ __m128i __DEFAULT_FN_ATTRS128
6504_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
6505{
6506  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6507                                           (__v2di)_mm_srai_epi64(__A, __imm), \
6508                                           (__v2di)_mm_setzero_si128());
6509}
6510
6511static __inline__ __m256i __DEFAULT_FN_ATTRS256
6512_mm256_srai_epi64(__m256i __A, unsigned int __imm)
6513{
6514  return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
6515}
6516
6517static __inline__ __m256i __DEFAULT_FN_ATTRS256
6518_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6519                       unsigned int __imm)
6520{
6521  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6522                                        (__v4di)_mm256_srai_epi64(__A, __imm), \
6523                                        (__v4di)__W);
6524}
6525
6526static __inline__ __m256i __DEFAULT_FN_ATTRS256
6527_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
6528{
6529  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6530                                        (__v4di)_mm256_srai_epi64(__A, __imm), \
6531                                        (__v4di)_mm256_setzero_si256());
6532}
6533
6534#define _mm_ternarylogic_epi32(A, B, C, imm)                                   \
6535  ((__m128i)__builtin_ia32_pternlogd128_mask(                                  \
6536      (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
6537      (unsigned char)(imm), (__mmask8)-1))
6538
6539#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm)                           \
6540  ((__m128i)__builtin_ia32_pternlogd128_mask(                                  \
6541      (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
6542      (unsigned char)(imm), (__mmask8)(U)))
6543
6544#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm)                          \
6545  ((__m128i)__builtin_ia32_pternlogd128_maskz(                                 \
6546      (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
6547      (unsigned char)(imm), (__mmask8)(U)))
6548
6549#define _mm256_ternarylogic_epi32(A, B, C, imm)                                \
6550  ((__m256i)__builtin_ia32_pternlogd256_mask(                                  \
6551      (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
6552      (unsigned char)(imm), (__mmask8)-1))
6553
6554#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm)                        \
6555  ((__m256i)__builtin_ia32_pternlogd256_mask(                                  \
6556      (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
6557      (unsigned char)(imm), (__mmask8)(U)))
6558
6559#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm)                       \
6560  ((__m256i)__builtin_ia32_pternlogd256_maskz(                                 \
6561      (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
6562      (unsigned char)(imm), (__mmask8)(U)))
6563
6564#define _mm_ternarylogic_epi64(A, B, C, imm)                                   \
6565  ((__m128i)__builtin_ia32_pternlogq128_mask(                                  \
6566      (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
6567      (unsigned char)(imm), (__mmask8)-1))
6568
6569#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm)                           \
6570  ((__m128i)__builtin_ia32_pternlogq128_mask(                                  \
6571      (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
6572      (unsigned char)(imm), (__mmask8)(U)))
6573
6574#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm)                          \
6575  ((__m128i)__builtin_ia32_pternlogq128_maskz(                                 \
6576      (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
6577      (unsigned char)(imm), (__mmask8)(U)))
6578
6579#define _mm256_ternarylogic_epi64(A, B, C, imm)                                \
6580  ((__m256i)__builtin_ia32_pternlogq256_mask(                                  \
6581      (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
6582      (unsigned char)(imm), (__mmask8)-1))
6583
6584#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm)                        \
6585  ((__m256i)__builtin_ia32_pternlogq256_mask(                                  \
6586      (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
6587      (unsigned char)(imm), (__mmask8)(U)))
6588
6589#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm)                       \
6590  ((__m256i)__builtin_ia32_pternlogq256_maskz(                                 \
6591      (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
6592      (unsigned char)(imm), (__mmask8)(U)))
6593
6594#define _mm256_shuffle_f32x4(A, B, imm) \
6595  ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6596                                         (__v8sf)(__m256)(B), (int)(imm)))
6597
6598#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6599  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6600                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6601                                       (__v8sf)(__m256)(W)))
6602
6603#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6604  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6605                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6606                                       (__v8sf)_mm256_setzero_ps()))
6607
6608#define _mm256_shuffle_f64x2(A, B, imm) \
6609  ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6610                                          (__v4df)(__m256d)(B), (int)(imm)))
6611
6612#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6613  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6614                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6615                                       (__v4df)(__m256d)(W)))
6616
6617#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6618  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6619                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6620                                       (__v4df)_mm256_setzero_pd()))
6621
6622#define _mm256_shuffle_i32x4(A, B, imm) \
6623  ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6624                                          (__v8si)(__m256i)(B), (int)(imm)))
6625
6626#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6627  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6628                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6629                                       (__v8si)(__m256i)(W)))
6630
6631#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6632  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6633                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6634                                       (__v8si)_mm256_setzero_si256()))
6635
6636#define _mm256_shuffle_i64x2(A, B, imm) \
6637  ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6638                                          (__v4di)(__m256i)(B), (int)(imm)))
6639
6640#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6641  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6642                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6643                                       (__v4di)(__m256i)(W)))
6644
6645
6646#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6647  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6648                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6649                                       (__v4di)_mm256_setzero_si256()))
6650
6651#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6652  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6653                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6654                                        (__v2df)(__m128d)(W)))
6655
6656#define _mm_maskz_shuffle_pd(U, A, B, M) \
6657  ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6658                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6659                                        (__v2df)_mm_setzero_pd()))
6660
6661#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6662  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6663                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6664                                        (__v4df)(__m256d)(W)))
6665
6666#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6667  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6668                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6669                                        (__v4df)_mm256_setzero_pd()))
6670
6671#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6672  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6673                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6674                                       (__v4sf)(__m128)(W)))
6675
6676#define _mm_maskz_shuffle_ps(U, A, B, M) \
6677  ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6678                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6679                                       (__v4sf)_mm_setzero_ps()))
6680
6681#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6682  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6683                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6684                                       (__v8sf)(__m256)(W)))
6685
6686#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6687  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6688                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6689                                       (__v8sf)_mm256_setzero_ps()))
6690
6691static __inline__ __m128d __DEFAULT_FN_ATTRS128
6692_mm_rsqrt14_pd (__m128d __A)
6693{
6694  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6695                 (__v2df)
6696                 _mm_setzero_pd (),
6697                 (__mmask8) -1);
6698}
6699
6700static __inline__ __m128d __DEFAULT_FN_ATTRS128
6701_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6702{
6703  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6704                 (__v2df) __W,
6705                 (__mmask8) __U);
6706}
6707
6708static __inline__ __m128d __DEFAULT_FN_ATTRS128
6709_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6710{
6711  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6712                 (__v2df)
6713                 _mm_setzero_pd (),
6714                 (__mmask8) __U);
6715}
6716
6717static __inline__ __m256d __DEFAULT_FN_ATTRS256
6718_mm256_rsqrt14_pd (__m256d __A)
6719{
6720  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6721                 (__v4df)
6722                 _mm256_setzero_pd (),
6723                 (__mmask8) -1);
6724}
6725
6726static __inline__ __m256d __DEFAULT_FN_ATTRS256
6727_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6728{
6729  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6730                 (__v4df) __W,
6731                 (__mmask8) __U);
6732}
6733
6734static __inline__ __m256d __DEFAULT_FN_ATTRS256
6735_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6736{
6737  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6738                 (__v4df)
6739                 _mm256_setzero_pd (),
6740                 (__mmask8) __U);
6741}
6742
6743static __inline__ __m128 __DEFAULT_FN_ATTRS128
6744_mm_rsqrt14_ps (__m128 __A)
6745{
6746  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6747                (__v4sf)
6748                _mm_setzero_ps (),
6749                (__mmask8) -1);
6750}
6751
6752static __inline__ __m128 __DEFAULT_FN_ATTRS128
6753_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6754{
6755  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6756                (__v4sf) __W,
6757                (__mmask8) __U);
6758}
6759
6760static __inline__ __m128 __DEFAULT_FN_ATTRS128
6761_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6762{
6763  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6764                (__v4sf)
6765                _mm_setzero_ps (),
6766                (__mmask8) __U);
6767}
6768
6769static __inline__ __m256 __DEFAULT_FN_ATTRS256
6770_mm256_rsqrt14_ps (__m256 __A)
6771{
6772  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6773                (__v8sf)
6774                _mm256_setzero_ps (),
6775                (__mmask8) -1);
6776}
6777
6778static __inline__ __m256 __DEFAULT_FN_ATTRS256
6779_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6780{
6781  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6782                (__v8sf) __W,
6783                (__mmask8) __U);
6784}
6785
6786static __inline__ __m256 __DEFAULT_FN_ATTRS256
6787_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6788{
6789  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6790                (__v8sf)
6791                _mm256_setzero_ps (),
6792                (__mmask8) __U);
6793}
6794
6795static __inline__ __m256 __DEFAULT_FN_ATTRS256
6796_mm256_broadcast_f32x4(__m128 __A)
6797{
6798  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6799                                         0, 1, 2, 3, 0, 1, 2, 3);
6800}
6801
6802static __inline__ __m256 __DEFAULT_FN_ATTRS256
6803_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6804{
6805  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6806                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6807                                            (__v8sf)__O);
6808}
6809
6810static __inline__ __m256 __DEFAULT_FN_ATTRS256
6811_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6812{
6813  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6814                                            (__v8sf)_mm256_broadcast_f32x4(__A),
6815                                            (__v8sf)_mm256_setzero_ps());
6816}
6817
6818static __inline__ __m256i __DEFAULT_FN_ATTRS256
6819_mm256_broadcast_i32x4(__m128i __A)
6820{
6821  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6822                                          0, 1, 2, 3, 0, 1, 2, 3);
6823}
6824
6825static __inline__ __m256i __DEFAULT_FN_ATTRS256
6826_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6827{
6828  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6829                                            (__v8si)_mm256_broadcast_i32x4(__A),
6830                                            (__v8si)__O);
6831}
6832
6833static __inline__ __m256i __DEFAULT_FN_ATTRS256
6834_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6835{
6836  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6837                                            (__v8si)_mm256_broadcast_i32x4(__A),
6838                                            (__v8si)_mm256_setzero_si256());
6839}
6840
6841static __inline__ __m256d __DEFAULT_FN_ATTRS256
6842_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6843{
6844  return (__m256d)__builtin_ia32_selectpd_256(__M,
6845                                              (__v4df) _mm256_broadcastsd_pd(__A),
6846                                              (__v4df) __O);
6847}
6848
6849static __inline__ __m256d __DEFAULT_FN_ATTRS256
6850_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6851{
6852  return (__m256d)__builtin_ia32_selectpd_256(__M,
6853                                              (__v4df) _mm256_broadcastsd_pd(__A),
6854                                              (__v4df) _mm256_setzero_pd());
6855}
6856
6857static __inline__ __m128 __DEFAULT_FN_ATTRS128
6858_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6859{
6860  return (__m128)__builtin_ia32_selectps_128(__M,
6861                                             (__v4sf) _mm_broadcastss_ps(__A),
6862                                             (__v4sf) __O);
6863}
6864
6865static __inline__ __m128 __DEFAULT_FN_ATTRS128
6866_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6867{
6868  return (__m128)__builtin_ia32_selectps_128(__M,
6869                                             (__v4sf) _mm_broadcastss_ps(__A),
6870                                             (__v4sf) _mm_setzero_ps());
6871}
6872
6873static __inline__ __m256 __DEFAULT_FN_ATTRS256
6874_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6875{
6876  return (__m256)__builtin_ia32_selectps_256(__M,
6877                                             (__v8sf) _mm256_broadcastss_ps(__A),
6878                                             (__v8sf) __O);
6879}
6880
6881static __inline__ __m256 __DEFAULT_FN_ATTRS256
6882_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6883{
6884  return (__m256)__builtin_ia32_selectps_256(__M,
6885                                             (__v8sf) _mm256_broadcastss_ps(__A),
6886                                             (__v8sf) _mm256_setzero_ps());
6887}
6888
6889static __inline__ __m128i __DEFAULT_FN_ATTRS128
6890_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6891{
6892  return (__m128i)__builtin_ia32_selectd_128(__M,
6893                                             (__v4si) _mm_broadcastd_epi32(__A),
6894                                             (__v4si) __O);
6895}
6896
6897static __inline__ __m128i __DEFAULT_FN_ATTRS128
6898_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6899{
6900  return (__m128i)__builtin_ia32_selectd_128(__M,
6901                                             (__v4si) _mm_broadcastd_epi32(__A),
6902                                             (__v4si) _mm_setzero_si128());
6903}
6904
6905static __inline__ __m256i __DEFAULT_FN_ATTRS256
6906_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6907{
6908  return (__m256i)__builtin_ia32_selectd_256(__M,
6909                                             (__v8si) _mm256_broadcastd_epi32(__A),
6910                                             (__v8si) __O);
6911}
6912
6913static __inline__ __m256i __DEFAULT_FN_ATTRS256
6914_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6915{
6916  return (__m256i)__builtin_ia32_selectd_256(__M,
6917                                             (__v8si) _mm256_broadcastd_epi32(__A),
6918                                             (__v8si) _mm256_setzero_si256());
6919}
6920
6921static __inline__ __m128i __DEFAULT_FN_ATTRS128
6922_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6923{
6924  return (__m128i)__builtin_ia32_selectq_128(__M,
6925                                             (__v2di) _mm_broadcastq_epi64(__A),
6926                                             (__v2di) __O);
6927}
6928
6929static __inline__ __m128i __DEFAULT_FN_ATTRS128
6930_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6931{
6932  return (__m128i)__builtin_ia32_selectq_128(__M,
6933                                             (__v2di) _mm_broadcastq_epi64(__A),
6934                                             (__v2di) _mm_setzero_si128());
6935}
6936
6937static __inline__ __m256i __DEFAULT_FN_ATTRS256
6938_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6939{
6940  return (__m256i)__builtin_ia32_selectq_256(__M,
6941                                             (__v4di) _mm256_broadcastq_epi64(__A),
6942                                             (__v4di) __O);
6943}
6944
6945static __inline__ __m256i __DEFAULT_FN_ATTRS256
6946_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6947{
6948  return (__m256i)__builtin_ia32_selectq_256(__M,
6949                                             (__v4di) _mm256_broadcastq_epi64(__A),
6950                                             (__v4di) _mm256_setzero_si256());
6951}
6952
6953static __inline__ __m128i __DEFAULT_FN_ATTRS128
6954_mm_cvtsepi32_epi8 (__m128i __A)
6955{
6956  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6957               (__v16qi)_mm_undefined_si128(),
6958               (__mmask8) -1);
6959}
6960
6961static __inline__ __m128i __DEFAULT_FN_ATTRS128
6962_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6963{
6964  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6965               (__v16qi) __O, __M);
6966}
6967
6968static __inline__ __m128i __DEFAULT_FN_ATTRS128
6969_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6970{
6971  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6972               (__v16qi) _mm_setzero_si128 (),
6973               __M);
6974}
6975
6976static __inline__ void __DEFAULT_FN_ATTRS128
6977_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6978{
6979  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6980}
6981
6982static __inline__ __m128i __DEFAULT_FN_ATTRS256
6983_mm256_cvtsepi32_epi8 (__m256i __A)
6984{
6985  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6986               (__v16qi)_mm_undefined_si128(),
6987               (__mmask8) -1);
6988}
6989
6990static __inline__ __m128i __DEFAULT_FN_ATTRS256
6991_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6992{
6993  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6994               (__v16qi) __O, __M);
6995}
6996
6997static __inline__ __m128i __DEFAULT_FN_ATTRS256
6998_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
6999{
7000  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7001               (__v16qi) _mm_setzero_si128 (),
7002               __M);
7003}
7004
7005static __inline__ void __DEFAULT_FN_ATTRS256
7006_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7007{
7008  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7009}
7010
7011static __inline__ __m128i __DEFAULT_FN_ATTRS128
7012_mm_cvtsepi32_epi16 (__m128i __A)
7013{
7014  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7015               (__v8hi)_mm_setzero_si128 (),
7016               (__mmask8) -1);
7017}
7018
7019static __inline__ __m128i __DEFAULT_FN_ATTRS128
7020_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7021{
7022  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7023               (__v8hi)__O,
7024               __M);
7025}
7026
7027static __inline__ __m128i __DEFAULT_FN_ATTRS128
7028_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7029{
7030  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7031               (__v8hi) _mm_setzero_si128 (),
7032               __M);
7033}
7034
7035static __inline__ void __DEFAULT_FN_ATTRS128
7036_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7037{
7038  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7039}
7040
7041static __inline__ __m128i __DEFAULT_FN_ATTRS256
7042_mm256_cvtsepi32_epi16 (__m256i __A)
7043{
7044  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7045               (__v8hi)_mm_undefined_si128(),
7046               (__mmask8) -1);
7047}
7048
7049static __inline__ __m128i __DEFAULT_FN_ATTRS256
7050_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7051{
7052  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7053               (__v8hi) __O, __M);
7054}
7055
7056static __inline__ __m128i __DEFAULT_FN_ATTRS256
7057_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7058{
7059  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7060               (__v8hi) _mm_setzero_si128 (),
7061               __M);
7062}
7063
7064static __inline__ void __DEFAULT_FN_ATTRS256
7065_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7066{
7067  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7068}
7069
7070static __inline__ __m128i __DEFAULT_FN_ATTRS128
7071_mm_cvtsepi64_epi8 (__m128i __A)
7072{
7073  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7074               (__v16qi)_mm_undefined_si128(),
7075               (__mmask8) -1);
7076}
7077
7078static __inline__ __m128i __DEFAULT_FN_ATTRS128
7079_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7080{
7081  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7082               (__v16qi) __O, __M);
7083}
7084
7085static __inline__ __m128i __DEFAULT_FN_ATTRS128
7086_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7087{
7088  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7089               (__v16qi) _mm_setzero_si128 (),
7090               __M);
7091}
7092
7093static __inline__ void __DEFAULT_FN_ATTRS128
7094_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7095{
7096  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7097}
7098
7099static __inline__ __m128i __DEFAULT_FN_ATTRS256
7100_mm256_cvtsepi64_epi8 (__m256i __A)
7101{
7102  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7103               (__v16qi)_mm_undefined_si128(),
7104               (__mmask8) -1);
7105}
7106
7107static __inline__ __m128i __DEFAULT_FN_ATTRS256
7108_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7109{
7110  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7111               (__v16qi) __O, __M);
7112}
7113
7114static __inline__ __m128i __DEFAULT_FN_ATTRS256
7115_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7116{
7117  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7118               (__v16qi) _mm_setzero_si128 (),
7119               __M);
7120}
7121
7122static __inline__ void __DEFAULT_FN_ATTRS256
7123_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7124{
7125  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7126}
7127
7128static __inline__ __m128i __DEFAULT_FN_ATTRS128
7129_mm_cvtsepi64_epi32 (__m128i __A)
7130{
7131  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7132               (__v4si)_mm_undefined_si128(),
7133               (__mmask8) -1);
7134}
7135
7136static __inline__ __m128i __DEFAULT_FN_ATTRS128
7137_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7138{
7139  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7140               (__v4si) __O, __M);
7141}
7142
7143static __inline__ __m128i __DEFAULT_FN_ATTRS128
7144_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7145{
7146  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7147               (__v4si) _mm_setzero_si128 (),
7148               __M);
7149}
7150
7151static __inline__ void __DEFAULT_FN_ATTRS128
7152_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7153{
7154  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7155}
7156
7157static __inline__ __m128i __DEFAULT_FN_ATTRS256
7158_mm256_cvtsepi64_epi32 (__m256i __A)
7159{
7160  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7161               (__v4si)_mm_undefined_si128(),
7162               (__mmask8) -1);
7163}
7164
7165static __inline__ __m128i __DEFAULT_FN_ATTRS256
7166_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7167{
7168  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7169               (__v4si)__O,
7170               __M);
7171}
7172
7173static __inline__ __m128i __DEFAULT_FN_ATTRS256
7174_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7175{
7176  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7177               (__v4si) _mm_setzero_si128 (),
7178               __M);
7179}
7180
7181static __inline__ void __DEFAULT_FN_ATTRS256
7182_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7183{
7184  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7185}
7186
7187static __inline__ __m128i __DEFAULT_FN_ATTRS128
7188_mm_cvtsepi64_epi16 (__m128i __A)
7189{
7190  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7191               (__v8hi)_mm_undefined_si128(),
7192               (__mmask8) -1);
7193}
7194
7195static __inline__ __m128i __DEFAULT_FN_ATTRS128
7196_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7197{
7198  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7199               (__v8hi) __O, __M);
7200}
7201
7202static __inline__ __m128i __DEFAULT_FN_ATTRS128
7203_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7204{
7205  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7206               (__v8hi) _mm_setzero_si128 (),
7207               __M);
7208}
7209
7210static __inline__ void __DEFAULT_FN_ATTRS128
7211_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7212{
7213  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7214}
7215
7216static __inline__ __m128i __DEFAULT_FN_ATTRS256
7217_mm256_cvtsepi64_epi16 (__m256i __A)
7218{
7219  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7220               (__v8hi)_mm_undefined_si128(),
7221               (__mmask8) -1);
7222}
7223
7224static __inline__ __m128i __DEFAULT_FN_ATTRS256
7225_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7226{
7227  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7228               (__v8hi) __O, __M);
7229}
7230
7231static __inline__ __m128i __DEFAULT_FN_ATTRS256
7232_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7233{
7234  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7235               (__v8hi) _mm_setzero_si128 (),
7236               __M);
7237}
7238
7239static __inline__ void __DEFAULT_FN_ATTRS256
7240_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7241{
7242  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7243}
7244
7245static __inline__ __m128i __DEFAULT_FN_ATTRS128
7246_mm_cvtusepi32_epi8 (__m128i __A)
7247{
7248  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7249                (__v16qi)_mm_undefined_si128(),
7250                (__mmask8) -1);
7251}
7252
7253static __inline__ __m128i __DEFAULT_FN_ATTRS128
7254_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7255{
7256  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7257                (__v16qi) __O,
7258                __M);
7259}
7260
7261static __inline__ __m128i __DEFAULT_FN_ATTRS128
7262_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7263{
7264  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7265                (__v16qi) _mm_setzero_si128 (),
7266                __M);
7267}
7268
7269static __inline__ void __DEFAULT_FN_ATTRS128
7270_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7271{
7272  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7273}
7274
7275static __inline__ __m128i __DEFAULT_FN_ATTRS256
7276_mm256_cvtusepi32_epi8 (__m256i __A)
7277{
7278  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7279                (__v16qi)_mm_undefined_si128(),
7280                (__mmask8) -1);
7281}
7282
7283static __inline__ __m128i __DEFAULT_FN_ATTRS256
7284_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7285{
7286  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7287                (__v16qi) __O,
7288                __M);
7289}
7290
7291static __inline__ __m128i __DEFAULT_FN_ATTRS256
7292_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7293{
7294  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7295                (__v16qi) _mm_setzero_si128 (),
7296                __M);
7297}
7298
7299static __inline__ void __DEFAULT_FN_ATTRS256
7300_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7301{
7302  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7303}
7304
7305static __inline__ __m128i __DEFAULT_FN_ATTRS128
7306_mm_cvtusepi32_epi16 (__m128i __A)
7307{
7308  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7309                (__v8hi)_mm_undefined_si128(),
7310                (__mmask8) -1);
7311}
7312
7313static __inline__ __m128i __DEFAULT_FN_ATTRS128
7314_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7315{
7316  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7317                (__v8hi) __O, __M);
7318}
7319
7320static __inline__ __m128i __DEFAULT_FN_ATTRS128
7321_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7322{
7323  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7324                (__v8hi) _mm_setzero_si128 (),
7325                __M);
7326}
7327
7328static __inline__ void __DEFAULT_FN_ATTRS128
7329_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7330{
7331  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7332}
7333
7334static __inline__ __m128i __DEFAULT_FN_ATTRS256
7335_mm256_cvtusepi32_epi16 (__m256i __A)
7336{
7337  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7338                (__v8hi) _mm_undefined_si128(),
7339                (__mmask8) -1);
7340}
7341
7342static __inline__ __m128i __DEFAULT_FN_ATTRS256
7343_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7344{
7345  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7346                (__v8hi) __O, __M);
7347}
7348
7349static __inline__ __m128i __DEFAULT_FN_ATTRS256
7350_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7351{
7352  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7353                (__v8hi) _mm_setzero_si128 (),
7354                __M);
7355}
7356
7357static __inline__ void __DEFAULT_FN_ATTRS256
7358_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7359{
7360  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7361}
7362
7363static __inline__ __m128i __DEFAULT_FN_ATTRS128
7364_mm_cvtusepi64_epi8 (__m128i __A)
7365{
7366  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7367                (__v16qi)_mm_undefined_si128(),
7368                (__mmask8) -1);
7369}
7370
7371static __inline__ __m128i __DEFAULT_FN_ATTRS128
7372_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7373{
7374  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7375                (__v16qi) __O,
7376                __M);
7377}
7378
7379static __inline__ __m128i __DEFAULT_FN_ATTRS128
7380_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7381{
7382  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7383                (__v16qi) _mm_setzero_si128 (),
7384                __M);
7385}
7386
7387static __inline__ void __DEFAULT_FN_ATTRS128
7388_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7389{
7390  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7391}
7392
7393static __inline__ __m128i __DEFAULT_FN_ATTRS256
7394_mm256_cvtusepi64_epi8 (__m256i __A)
7395{
7396  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7397                (__v16qi)_mm_undefined_si128(),
7398                (__mmask8) -1);
7399}
7400
7401static __inline__ __m128i __DEFAULT_FN_ATTRS256
7402_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7403{
7404  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7405                (__v16qi) __O,
7406                __M);
7407}
7408
7409static __inline__ __m128i __DEFAULT_FN_ATTRS256
7410_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7411{
7412  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7413                (__v16qi) _mm_setzero_si128 (),
7414                __M);
7415}
7416
7417static __inline__ void __DEFAULT_FN_ATTRS256
7418_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7419{
7420  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7421}
7422
7423static __inline__ __m128i __DEFAULT_FN_ATTRS128
7424_mm_cvtusepi64_epi32 (__m128i __A)
7425{
7426  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7427                (__v4si)_mm_undefined_si128(),
7428                (__mmask8) -1);
7429}
7430
7431static __inline__ __m128i __DEFAULT_FN_ATTRS128
7432_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7433{
7434  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7435                (__v4si) __O, __M);
7436}
7437
7438static __inline__ __m128i __DEFAULT_FN_ATTRS128
7439_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7440{
7441  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7442                (__v4si) _mm_setzero_si128 (),
7443                __M);
7444}
7445
7446static __inline__ void __DEFAULT_FN_ATTRS128
7447_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7448{
7449  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7450}
7451
7452static __inline__ __m128i __DEFAULT_FN_ATTRS256
7453_mm256_cvtusepi64_epi32 (__m256i __A)
7454{
7455  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7456                (__v4si)_mm_undefined_si128(),
7457                (__mmask8) -1);
7458}
7459
7460static __inline__ __m128i __DEFAULT_FN_ATTRS256
7461_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7462{
7463  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7464                (__v4si) __O, __M);
7465}
7466
7467static __inline__ __m128i __DEFAULT_FN_ATTRS256
7468_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7469{
7470  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7471                (__v4si) _mm_setzero_si128 (),
7472                __M);
7473}
7474
7475static __inline__ void __DEFAULT_FN_ATTRS256
7476_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7477{
7478  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7479}
7480
7481static __inline__ __m128i __DEFAULT_FN_ATTRS128
7482_mm_cvtusepi64_epi16 (__m128i __A)
7483{
7484  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7485                (__v8hi)_mm_undefined_si128(),
7486                (__mmask8) -1);
7487}
7488
7489static __inline__ __m128i __DEFAULT_FN_ATTRS128
7490_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7491{
7492  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7493                (__v8hi) __O, __M);
7494}
7495
7496static __inline__ __m128i __DEFAULT_FN_ATTRS128
7497_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7498{
7499  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7500                (__v8hi) _mm_setzero_si128 (),
7501                __M);
7502}
7503
7504static __inline__ void __DEFAULT_FN_ATTRS128
7505_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7506{
7507  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7508}
7509
7510static __inline__ __m128i __DEFAULT_FN_ATTRS256
7511_mm256_cvtusepi64_epi16 (__m256i __A)
7512{
7513  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7514                (__v8hi)_mm_undefined_si128(),
7515                (__mmask8) -1);
7516}
7517
7518static __inline__ __m128i __DEFAULT_FN_ATTRS256
7519_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7520{
7521  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7522                (__v8hi) __O, __M);
7523}
7524
7525static __inline__ __m128i __DEFAULT_FN_ATTRS256
7526_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7527{
7528  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7529                (__v8hi) _mm_setzero_si128 (),
7530                __M);
7531}
7532
7533static __inline__ void __DEFAULT_FN_ATTRS256
7534_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7535{
7536  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7537}
7538
7539static __inline__ __m128i __DEFAULT_FN_ATTRS128
7540_mm_cvtepi32_epi8 (__m128i __A)
7541{
7542  return (__m128i)__builtin_shufflevector(
7543      __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7544      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7545}
7546
7547static __inline__ __m128i __DEFAULT_FN_ATTRS128
7548_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7549{
7550  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7551              (__v16qi) __O, __M);
7552}
7553
7554static __inline__ __m128i __DEFAULT_FN_ATTRS128
7555_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7556{
7557  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7558              (__v16qi)
7559              _mm_setzero_si128 (),
7560              __M);
7561}
7562
7563static __inline__ void __DEFAULT_FN_ATTRS128
7564_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7565{
7566  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7567}
7568
7569static __inline__ __m128i __DEFAULT_FN_ATTRS256
7570_mm256_cvtepi32_epi8 (__m256i __A)
7571{
7572  return (__m128i)__builtin_shufflevector(
7573      __builtin_convertvector((__v8si)__A, __v8qi),
7574      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7575      12, 13, 14, 15);
7576}
7577
7578static __inline__ __m128i __DEFAULT_FN_ATTRS256
7579_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7580{
7581  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7582              (__v16qi) __O, __M);
7583}
7584
7585static __inline__ __m128i __DEFAULT_FN_ATTRS256
7586_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7587{
7588  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7589              (__v16qi) _mm_setzero_si128 (),
7590              __M);
7591}
7592
7593static __inline__ void __DEFAULT_FN_ATTRS256
7594_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7595{
7596  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7597}
7598
7599static __inline__ __m128i __DEFAULT_FN_ATTRS128
7600_mm_cvtepi32_epi16 (__m128i __A)
7601{
7602  return (__m128i)__builtin_shufflevector(
7603      __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7604      2, 3, 4, 5, 6, 7);
7605}
7606
7607static __inline__ __m128i __DEFAULT_FN_ATTRS128
7608_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7609{
7610  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7611              (__v8hi) __O, __M);
7612}
7613
7614static __inline__ __m128i __DEFAULT_FN_ATTRS128
7615_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7616{
7617  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7618              (__v8hi) _mm_setzero_si128 (),
7619              __M);
7620}
7621
7622static __inline__ void __DEFAULT_FN_ATTRS128
7623_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7624{
7625  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7626}
7627
7628static __inline__ __m128i __DEFAULT_FN_ATTRS256
7629_mm256_cvtepi32_epi16 (__m256i __A)
7630{
7631  return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7632}
7633
7634static __inline__ __m128i __DEFAULT_FN_ATTRS256
7635_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7636{
7637  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7638              (__v8hi) __O, __M);
7639}
7640
7641static __inline__ __m128i __DEFAULT_FN_ATTRS256
7642_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7643{
7644  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7645              (__v8hi) _mm_setzero_si128 (),
7646              __M);
7647}
7648
7649static __inline__ void __DEFAULT_FN_ATTRS256
7650_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
7651{
7652  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7653}
7654
7655static __inline__ __m128i __DEFAULT_FN_ATTRS128
7656_mm_cvtepi64_epi8 (__m128i __A)
7657{
7658  return (__m128i)__builtin_shufflevector(
7659      __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7660      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7661}
7662
7663static __inline__ __m128i __DEFAULT_FN_ATTRS128
7664_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7665{
7666  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7667              (__v16qi) __O, __M);
7668}
7669
7670static __inline__ __m128i __DEFAULT_FN_ATTRS128
7671_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7672{
7673  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7674              (__v16qi) _mm_setzero_si128 (),
7675              __M);
7676}
7677
7678static __inline__ void __DEFAULT_FN_ATTRS128
7679_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7680{
7681  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7682}
7683
7684static __inline__ __m128i __DEFAULT_FN_ATTRS256
7685_mm256_cvtepi64_epi8 (__m256i __A)
7686{
7687  return (__m128i)__builtin_shufflevector(
7688      __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7689      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7690}
7691
7692static __inline__ __m128i __DEFAULT_FN_ATTRS256
7693_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7694{
7695  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7696              (__v16qi) __O, __M);
7697}
7698
7699static __inline__ __m128i __DEFAULT_FN_ATTRS256
7700_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7701{
7702  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7703              (__v16qi) _mm_setzero_si128 (),
7704              __M);
7705}
7706
7707static __inline__ void __DEFAULT_FN_ATTRS256
7708_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7709{
7710  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7711}
7712
7713static __inline__ __m128i __DEFAULT_FN_ATTRS128
7714_mm_cvtepi64_epi32 (__m128i __A)
7715{
7716  return (__m128i)__builtin_shufflevector(
7717      __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7718}
7719
7720static __inline__ __m128i __DEFAULT_FN_ATTRS128
7721_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7722{
7723  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7724              (__v4si) __O, __M);
7725}
7726
7727static __inline__ __m128i __DEFAULT_FN_ATTRS128
7728_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7729{
7730  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7731              (__v4si) _mm_setzero_si128 (),
7732              __M);
7733}
7734
7735static __inline__ void __DEFAULT_FN_ATTRS128
7736_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7737{
7738  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7739}
7740
7741static __inline__ __m128i __DEFAULT_FN_ATTRS256
7742_mm256_cvtepi64_epi32 (__m256i __A)
7743{
7744  return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7745}
7746
7747static __inline__ __m128i __DEFAULT_FN_ATTRS256
7748_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7749{
7750  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7751                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7752                                             (__v4si)__O);
7753}
7754
7755static __inline__ __m128i __DEFAULT_FN_ATTRS256
7756_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7757{
7758  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7759                                             (__v4si)_mm256_cvtepi64_epi32(__A),
7760                                             (__v4si)_mm_setzero_si128());
7761}
7762
7763static __inline__ void __DEFAULT_FN_ATTRS256
7764_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7765{
7766  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7767}
7768
7769static __inline__ __m128i __DEFAULT_FN_ATTRS128
7770_mm_cvtepi64_epi16 (__m128i __A)
7771{
7772  return (__m128i)__builtin_shufflevector(
7773      __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7774      3, 3, 3, 3);
7775}
7776
7777static __inline__ __m128i __DEFAULT_FN_ATTRS128
7778_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7779{
7780  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7781              (__v8hi)__O,
7782              __M);
7783}
7784
7785static __inline__ __m128i __DEFAULT_FN_ATTRS128
7786_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7787{
7788  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7789              (__v8hi) _mm_setzero_si128 (),
7790              __M);
7791}
7792
7793static __inline__ void __DEFAULT_FN_ATTRS128
7794_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7795{
7796  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7797}
7798
7799static __inline__ __m128i __DEFAULT_FN_ATTRS256
7800_mm256_cvtepi64_epi16 (__m256i __A)
7801{
7802  return (__m128i)__builtin_shufflevector(
7803      __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7804      2, 3, 4, 5, 6, 7);
7805}
7806
7807static __inline__ __m128i __DEFAULT_FN_ATTRS256
7808_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7809{
7810  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7811              (__v8hi) __O, __M);
7812}
7813
7814static __inline__ __m128i __DEFAULT_FN_ATTRS256
7815_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7816{
7817  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7818              (__v8hi) _mm_setzero_si128 (),
7819              __M);
7820}
7821
7822static __inline__ void __DEFAULT_FN_ATTRS256
7823_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7824{
7825  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7826}
7827
7828#define _mm256_extractf32x4_ps(A, imm) \
7829  ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7830                                                (int)(imm), \
7831                                                (__v4sf)_mm_undefined_ps(), \
7832                                                (__mmask8)-1))
7833
7834#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7835  ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7836                                                (int)(imm), \
7837                                                (__v4sf)(__m128)(W), \
7838                                                (__mmask8)(U)))
7839
7840#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7841  ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7842                                                (int)(imm), \
7843                                                (__v4sf)_mm_setzero_ps(), \
7844                                                (__mmask8)(U)))
7845
7846#define _mm256_extracti32x4_epi32(A, imm) \
7847  ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7848                                                 (int)(imm), \
7849                                                 (__v4si)_mm_undefined_si128(), \
7850                                                 (__mmask8)-1))
7851
7852#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7853  ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7854                                                 (int)(imm), \
7855                                                 (__v4si)(__m128i)(W), \
7856                                                 (__mmask8)(U)))
7857
7858#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7859  ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7860                                                 (int)(imm), \
7861                                                 (__v4si)_mm_setzero_si128(), \
7862                                                 (__mmask8)(U)))
7863
7864#define _mm256_insertf32x4(A, B, imm) \
7865  ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7866                                          (__v4sf)(__m128)(B), (int)(imm)))
7867
7868#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7869  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7870                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7871                                  (__v8sf)(__m256)(W)))
7872
7873#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7874  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7875                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7876                                  (__v8sf)_mm256_setzero_ps()))
7877
7878#define _mm256_inserti32x4(A, B, imm) \
7879  ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7880                                           (__v4si)(__m128i)(B), (int)(imm)))
7881
7882#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7883  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7884                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7885                                  (__v8si)(__m256i)(W)))
7886
7887#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7888  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7889                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7890                                  (__v8si)_mm256_setzero_si256()))
7891
7892#define _mm_getmant_pd(A, B, C) \
7893  ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7894                                             (int)(((C)<<2) | (B)), \
7895                                             (__v2df)_mm_setzero_pd(), \
7896                                             (__mmask8)-1))
7897
7898#define _mm_mask_getmant_pd(W, U, A, B, C) \
7899  ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7900                                             (int)(((C)<<2) | (B)), \
7901                                             (__v2df)(__m128d)(W), \
7902                                             (__mmask8)(U)))
7903
7904#define _mm_maskz_getmant_pd(U, A, B, C) \
7905  ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7906                                             (int)(((C)<<2) | (B)), \
7907                                             (__v2df)_mm_setzero_pd(), \
7908                                             (__mmask8)(U)))
7909
7910#define _mm256_getmant_pd(A, B, C) \
7911  ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7912                                             (int)(((C)<<2) | (B)), \
7913                                             (__v4df)_mm256_setzero_pd(), \
7914                                             (__mmask8)-1))
7915
7916#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7917  ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7918                                             (int)(((C)<<2) | (B)), \
7919                                             (__v4df)(__m256d)(W), \
7920                                             (__mmask8)(U)))
7921
7922#define _mm256_maskz_getmant_pd(U, A, B, C) \
7923  ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7924                                             (int)(((C)<<2) | (B)), \
7925                                             (__v4df)_mm256_setzero_pd(), \
7926                                             (__mmask8)(U)))
7927
7928#define _mm_getmant_ps(A, B, C) \
7929  ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7930                                            (int)(((C)<<2) | (B)), \
7931                                            (__v4sf)_mm_setzero_ps(), \
7932                                            (__mmask8)-1))
7933
7934#define _mm_mask_getmant_ps(W, U, A, B, C) \
7935  ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7936                                            (int)(((C)<<2) | (B)), \
7937                                            (__v4sf)(__m128)(W), \
7938                                            (__mmask8)(U)))
7939
7940#define _mm_maskz_getmant_ps(U, A, B, C) \
7941  ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7942                                            (int)(((C)<<2) | (B)), \
7943                                            (__v4sf)_mm_setzero_ps(), \
7944                                            (__mmask8)(U)))
7945
7946#define _mm256_getmant_ps(A, B, C) \
7947  ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7948                                            (int)(((C)<<2) | (B)), \
7949                                            (__v8sf)_mm256_setzero_ps(), \
7950                                            (__mmask8)-1))
7951
7952#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7953  ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7954                                            (int)(((C)<<2) | (B)), \
7955                                            (__v8sf)(__m256)(W), \
7956                                            (__mmask8)(U)))
7957
7958#define _mm256_maskz_getmant_ps(U, A, B, C) \
7959  ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7960                                            (int)(((C)<<2) | (B)), \
7961                                            (__v8sf)_mm256_setzero_ps(), \
7962                                            (__mmask8)(U)))
7963
7964#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7965  ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7966                                         (void const *)(addr), \
7967                                         (__v2di)(__m128i)(index), \
7968                                         (__mmask8)(mask), (int)(scale)))
7969
7970#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7971  ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7972                                         (void const *)(addr), \
7973                                         (__v2di)(__m128i)(index), \
7974                                         (__mmask8)(mask), (int)(scale)))
7975
7976#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7977  ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7978                                         (void const *)(addr), \
7979                                         (__v4di)(__m256i)(index), \
7980                                         (__mmask8)(mask), (int)(scale)))
7981
7982#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7983  ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7984                                         (void const *)(addr), \
7985                                         (__v4di)(__m256i)(index), \
7986                                         (__mmask8)(mask), (int)(scale)))
7987
7988#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7989  ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7990                                        (void const *)(addr), \
7991                                        (__v2di)(__m128i)(index), \
7992                                        (__mmask8)(mask), (int)(scale)))
7993
7994#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7995  ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7996                                         (void const *)(addr), \
7997                                         (__v2di)(__m128i)(index), \
7998                                         (__mmask8)(mask), (int)(scale)))
7999
8000#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8001  ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8002                                        (void const *)(addr), \
8003                                        (__v4di)(__m256i)(index), \
8004                                        (__mmask8)(mask), (int)(scale)))
8005
8006#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8007  ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8008                                         (void const *)(addr), \
8009                                         (__v4di)(__m256i)(index), \
8010                                         (__mmask8)(mask), (int)(scale)))
8011
8012#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8013  ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8014                                         (void const *)(addr), \
8015                                         (__v4si)(__m128i)(index), \
8016                                         (__mmask8)(mask), (int)(scale)))
8017
8018#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8019  ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8020                                         (void const *)(addr), \
8021                                         (__v4si)(__m128i)(index), \
8022                                         (__mmask8)(mask), (int)(scale)))
8023
8024#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8025  ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8026                                         (void const *)(addr), \
8027                                         (__v4si)(__m128i)(index), \
8028                                         (__mmask8)(mask), (int)(scale)))
8029
8030#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8031  ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8032                                         (void const *)(addr), \
8033                                         (__v4si)(__m128i)(index), \
8034                                         (__mmask8)(mask), (int)(scale)))
8035
8036#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8037  ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8038                                        (void const *)(addr), \
8039                                        (__v4si)(__m128i)(index), \
8040                                        (__mmask8)(mask), (int)(scale)))
8041
8042#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8043  ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8044                                         (void const *)(addr), \
8045                                         (__v4si)(__m128i)(index), \
8046                                         (__mmask8)(mask), (int)(scale)))
8047
8048#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8049  ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8050                                        (void const *)(addr), \
8051                                        (__v8si)(__m256i)(index), \
8052                                        (__mmask8)(mask), (int)(scale)))
8053
8054#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8055  ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8056                                         (void const *)(addr), \
8057                                         (__v8si)(__m256i)(index), \
8058                                         (__mmask8)(mask), (int)(scale)))
8059
8060#define _mm256_permutex_pd(X, C) \
8061  ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
8062
8063#define _mm256_mask_permutex_pd(W, U, X, C) \
8064  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8065                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8066                                       (__v4df)(__m256d)(W)))
8067
8068#define _mm256_maskz_permutex_pd(U, X, C) \
8069  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8070                                        (__v4df)_mm256_permutex_pd((X), (C)), \
8071                                        (__v4df)_mm256_setzero_pd()))
8072
8073#define _mm256_permutex_epi64(X, C) \
8074  ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
8075
8076#define _mm256_mask_permutex_epi64(W, U, X, C) \
8077  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8078                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8079                                      (__v4di)(__m256i)(W)))
8080
8081#define _mm256_maskz_permutex_epi64(U, X, C) \
8082  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8083                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8084                                      (__v4di)_mm256_setzero_si256()))
8085
8086static __inline__ __m256d __DEFAULT_FN_ATTRS256
8087_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8088{
8089  return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8090}
8091
8092static __inline__ __m256d __DEFAULT_FN_ATTRS256
8093_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8094          __m256d __Y)
8095{
8096  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8097                                        (__v4df)_mm256_permutexvar_pd(__X, __Y),
8098                                        (__v4df)__W);
8099}
8100
8101static __inline__ __m256d __DEFAULT_FN_ATTRS256
8102_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8103{
8104  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8105                                        (__v4df)_mm256_permutexvar_pd(__X, __Y),
8106                                        (__v4df)_mm256_setzero_pd());
8107}
8108
8109static __inline__ __m256i __DEFAULT_FN_ATTRS256
8110_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8111{
8112  return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8113}
8114
8115static __inline__ __m256i __DEFAULT_FN_ATTRS256
8116_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8117{
8118  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8119                                     (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8120                                     (__v4di)_mm256_setzero_si256());
8121}
8122
8123static __inline__ __m256i __DEFAULT_FN_ATTRS256
8124_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8125             __m256i __Y)
8126{
8127  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8128                                     (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8129                                     (__v4di)__W);
8130}
8131
8132#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8133
8134static __inline__ __m256 __DEFAULT_FN_ATTRS256
8135_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8136{
8137  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8138                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8139                                        (__v8sf)__W);
8140}
8141
8142static __inline__ __m256 __DEFAULT_FN_ATTRS256
8143_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
8144{
8145  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8146                                        (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8147                                        (__v8sf)_mm256_setzero_ps());
8148}
8149
8150#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8151
8152static __inline__ __m256i __DEFAULT_FN_ATTRS256
8153_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8154                              __m256i __Y)
8155{
8156  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8157                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8158                                     (__v8si)__W);
8159}
8160
8161static __inline__ __m256i __DEFAULT_FN_ATTRS256
8162_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
8163{
8164  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8165                                     (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8166                                     (__v8si)_mm256_setzero_si256());
8167}
8168
8169#define _mm_alignr_epi32(A, B, imm) \
8170  ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8171                                     (__v4si)(__m128i)(B), (int)(imm)))
8172
8173#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8174  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8175                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8176                                    (__v4si)(__m128i)(W)))
8177
8178#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8179  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8180                                    (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8181                                    (__v4si)_mm_setzero_si128()))
8182
8183#define _mm256_alignr_epi32(A, B, imm) \
8184  ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8185                                     (__v8si)(__m256i)(B), (int)(imm)))
8186
8187#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8188  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8189                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8190                                 (__v8si)(__m256i)(W)))
8191
8192#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8193  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8194                                 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8195                                 (__v8si)_mm256_setzero_si256()))
8196
8197#define _mm_alignr_epi64(A, B, imm) \
8198  ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8199                                     (__v2di)(__m128i)(B), (int)(imm)))
8200
8201#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8202  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8203                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8204                                    (__v2di)(__m128i)(W)))
8205
8206#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8207  ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8208                                    (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8209                                    (__v2di)_mm_setzero_si128()))
8210
8211#define _mm256_alignr_epi64(A, B, imm) \
8212  ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8213                                     (__v4di)(__m256i)(B), (int)(imm)))
8214
8215#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8216  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8217                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8218                                 (__v4di)(__m256i)(W)))
8219
8220#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8221  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8222                                 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8223                                 (__v4di)_mm256_setzero_si256()))
8224
8225static __inline__ __m128 __DEFAULT_FN_ATTRS128
8226_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8227{
8228  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8229                                             (__v4sf)_mm_movehdup_ps(__A),
8230                                             (__v4sf)__W);
8231}
8232
8233static __inline__ __m128 __DEFAULT_FN_ATTRS128
8234_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8235{
8236  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8237                                             (__v4sf)_mm_movehdup_ps(__A),
8238                                             (__v4sf)_mm_setzero_ps());
8239}
8240
8241static __inline__ __m256 __DEFAULT_FN_ATTRS256
8242_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8243{
8244  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8245                                             (__v8sf)_mm256_movehdup_ps(__A),
8246                                             (__v8sf)__W);
8247}
8248
8249static __inline__ __m256 __DEFAULT_FN_ATTRS256
8250_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8251{
8252  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8253                                             (__v8sf)_mm256_movehdup_ps(__A),
8254                                             (__v8sf)_mm256_setzero_ps());
8255}
8256
8257static __inline__ __m128 __DEFAULT_FN_ATTRS128
8258_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8259{
8260  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8261                                             (__v4sf)_mm_moveldup_ps(__A),
8262                                             (__v4sf)__W);
8263}
8264
8265static __inline__ __m128 __DEFAULT_FN_ATTRS128
8266_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8267{
8268  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8269                                             (__v4sf)_mm_moveldup_ps(__A),
8270                                             (__v4sf)_mm_setzero_ps());
8271}
8272
8273static __inline__ __m256 __DEFAULT_FN_ATTRS256
8274_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8275{
8276  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8277                                             (__v8sf)_mm256_moveldup_ps(__A),
8278                                             (__v8sf)__W);
8279}
8280
8281static __inline__ __m256 __DEFAULT_FN_ATTRS256
8282_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8283{
8284  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8285                                             (__v8sf)_mm256_moveldup_ps(__A),
8286                                             (__v8sf)_mm256_setzero_ps());
8287}
8288
8289#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8290  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8291                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8292                                       (__v8si)(__m256i)(W)))
8293
8294#define _mm256_maskz_shuffle_epi32(U, A, I) \
8295  ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8296                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8297                                       (__v8si)_mm256_setzero_si256()))
8298
8299#define _mm_mask_shuffle_epi32(W, U, A, I) \
8300  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8301                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8302                                       (__v4si)(__m128i)(W)))
8303
8304#define _mm_maskz_shuffle_epi32(U, A, I) \
8305  ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8306                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8307                                       (__v4si)_mm_setzero_si128()))
8308
8309static __inline__ __m128d __DEFAULT_FN_ATTRS128
8310_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8311{
8312  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8313              (__v2df) __A,
8314              (__v2df) __W);
8315}
8316
8317static __inline__ __m128d __DEFAULT_FN_ATTRS128
8318_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8319{
8320  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8321              (__v2df) __A,
8322              (__v2df) _mm_setzero_pd ());
8323}
8324
8325static __inline__ __m256d __DEFAULT_FN_ATTRS256
8326_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8327{
8328  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8329              (__v4df) __A,
8330              (__v4df) __W);
8331}
8332
8333static __inline__ __m256d __DEFAULT_FN_ATTRS256
8334_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8335{
8336  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8337              (__v4df) __A,
8338              (__v4df) _mm256_setzero_pd ());
8339}
8340
8341static __inline__ __m128 __DEFAULT_FN_ATTRS128
8342_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8343{
8344  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8345             (__v4sf) __A,
8346             (__v4sf) __W);
8347}
8348
8349static __inline__ __m128 __DEFAULT_FN_ATTRS128
8350_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8351{
8352  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8353             (__v4sf) __A,
8354             (__v4sf) _mm_setzero_ps ());
8355}
8356
8357static __inline__ __m256 __DEFAULT_FN_ATTRS256
8358_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8359{
8360  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8361             (__v8sf) __A,
8362             (__v8sf) __W);
8363}
8364
8365static __inline__ __m256 __DEFAULT_FN_ATTRS256
8366_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8367{
8368  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8369             (__v8sf) __A,
8370             (__v8sf) _mm256_setzero_ps ());
8371}
8372
8373static __inline__ __m128 __DEFAULT_FN_ATTRS128
8374_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8375{
8376  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8377             (__v4sf) __W,
8378             (__mmask8) __U);
8379}
8380
8381static __inline__ __m128 __DEFAULT_FN_ATTRS128
8382_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8383{
8384  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8385             (__v4sf)
8386             _mm_setzero_ps (),
8387             (__mmask8) __U);
8388}
8389
8390static __inline__ __m256 __DEFAULT_FN_ATTRS256
8391_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8392{
8393  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8394                (__v8sf) __W,
8395                (__mmask8) __U);
8396}
8397
8398static __inline__ __m256 __DEFAULT_FN_ATTRS256
8399_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8400{
8401  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8402                (__v8sf)
8403                _mm256_setzero_ps (),
8404                (__mmask8) __U);
8405}
8406
8407#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8408  ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8409                                          (__v8hi)(__m128i)(W), \
8410                                          (__mmask8)(U)))
8411
8412#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8413  ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8414                                          (__v8hi)_mm_setzero_si128(), \
8415                                          (__mmask8)(U)))
8416
8417#define _mm_mask_cvtps_ph  _mm_mask_cvt_roundps_ph
8418#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8419
8420#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8421  ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8422                                             (__v8hi)(__m128i)(W), \
8423                                             (__mmask8)(U)))
8424
8425#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8426  ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8427                                             (__v8hi)_mm_setzero_si128(), \
8428                                             (__mmask8)(U)))
8429
8430#define _mm256_mask_cvtps_ph  _mm256_mask_cvt_roundps_ph
8431#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8432
8433
8434#undef __DEFAULT_FN_ATTRS128
8435#undef __DEFAULT_FN_ATTRS256
8436
8437#endif /* __AVX512VLINTRIN_H */