master
   1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
   2 *
   3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 * See https://llvm.org/LICENSE.txt for license information.
   5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 *
   7 *===-----------------------------------------------------------------------===
   8 */
   9
  10#ifndef __IMMINTRIN_H
  11#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
  12#endif
  13
  14#ifndef __AVX512VLDQINTRIN_H
  15#define __AVX512VLDQINTRIN_H
  16
  17/* Define the default attributes for the functions in this file. */
  18#define __DEFAULT_FN_ATTRS128                                                  \
  19  __attribute__((__always_inline__, __nodebug__,                               \
  20                 __target__("avx512vl,avx512dq,no-evex512"),                   \
  21                 __min_vector_width__(128)))
  22#define __DEFAULT_FN_ATTRS256                                                  \
  23  __attribute__((__always_inline__, __nodebug__,                               \
  24                 __target__("avx512vl,avx512dq,no-evex512"),                   \
  25                 __min_vector_width__(256)))
  26
  27static __inline__ __m256i __DEFAULT_FN_ATTRS256
  28_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
  29  return (__m256i) ((__v4du) __A * (__v4du) __B);
  30}
  31
  32static __inline__ __m256i __DEFAULT_FN_ATTRS256
  33_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  34  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  35                                             (__v4di)_mm256_mullo_epi64(__A, __B),
  36                                             (__v4di)__W);
  37}
  38
  39static __inline__ __m256i __DEFAULT_FN_ATTRS256
  40_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
  41  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  42                                             (__v4di)_mm256_mullo_epi64(__A, __B),
  43                                             (__v4di)_mm256_setzero_si256());
  44}
  45
  46static __inline__ __m128i __DEFAULT_FN_ATTRS128
  47_mm_mullo_epi64 (__m128i __A, __m128i __B) {
  48  return (__m128i) ((__v2du) __A * (__v2du) __B);
  49}
  50
  51static __inline__ __m128i __DEFAULT_FN_ATTRS128
  52_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  53  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  54                                             (__v2di)_mm_mullo_epi64(__A, __B),
  55                                             (__v2di)__W);
  56}
  57
  58static __inline__ __m128i __DEFAULT_FN_ATTRS128
  59_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
  60  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  61                                             (__v2di)_mm_mullo_epi64(__A, __B),
  62                                             (__v2di)_mm_setzero_si128());
  63}
  64
  65static __inline__ __m256d __DEFAULT_FN_ATTRS256
  66_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  67  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  68                                              (__v4df)_mm256_andnot_pd(__A, __B),
  69                                              (__v4df)__W);
  70}
  71
  72static __inline__ __m256d __DEFAULT_FN_ATTRS256
  73_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
  74  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  75                                              (__v4df)_mm256_andnot_pd(__A, __B),
  76                                              (__v4df)_mm256_setzero_pd());
  77}
  78
  79static __inline__ __m128d __DEFAULT_FN_ATTRS128
  80_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  81  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  82                                              (__v2df)_mm_andnot_pd(__A, __B),
  83                                              (__v2df)__W);
  84}
  85
  86static __inline__ __m128d __DEFAULT_FN_ATTRS128
  87_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
  88  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  89                                              (__v2df)_mm_andnot_pd(__A, __B),
  90                                              (__v2df)_mm_setzero_pd());
  91}
  92
  93static __inline__ __m256 __DEFAULT_FN_ATTRS256
  94_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  95  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  96                                             (__v8sf)_mm256_andnot_ps(__A, __B),
  97                                             (__v8sf)__W);
  98}
  99
 100static __inline__ __m256 __DEFAULT_FN_ATTRS256
 101_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 102  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
 103                                             (__v8sf)_mm256_andnot_ps(__A, __B),
 104                                             (__v8sf)_mm256_setzero_ps());
 105}
 106
 107static __inline__ __m128 __DEFAULT_FN_ATTRS128
 108_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
 109  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 110                                             (__v4sf)_mm_andnot_ps(__A, __B),
 111                                             (__v4sf)__W);
 112}
 113
 114static __inline__ __m128 __DEFAULT_FN_ATTRS128
 115_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
 116  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 117                                             (__v4sf)_mm_andnot_ps(__A, __B),
 118                                             (__v4sf)_mm_setzero_ps());
 119}
 120
 121static __inline__ __m256d __DEFAULT_FN_ATTRS256
 122_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
 123  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 124                                              (__v4df)_mm256_and_pd(__A, __B),
 125                                              (__v4df)__W);
 126}
 127
 128static __inline__ __m256d __DEFAULT_FN_ATTRS256
 129_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
 130  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 131                                              (__v4df)_mm256_and_pd(__A, __B),
 132                                              (__v4df)_mm256_setzero_pd());
 133}
 134
 135static __inline__ __m128d __DEFAULT_FN_ATTRS128
 136_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
 137  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 138                                              (__v2df)_mm_and_pd(__A, __B),
 139                                              (__v2df)__W);
 140}
 141
 142static __inline__ __m128d __DEFAULT_FN_ATTRS128
 143_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
 144  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 145                                              (__v2df)_mm_and_pd(__A, __B),
 146                                              (__v2df)_mm_setzero_pd());
 147}
 148
 149static __inline__ __m256 __DEFAULT_FN_ATTRS256
 150_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
 151  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
 152                                             (__v8sf)_mm256_and_ps(__A, __B),
 153                                             (__v8sf)__W);
 154}
 155
 156static __inline__ __m256 __DEFAULT_FN_ATTRS256
 157_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 158  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
 159                                             (__v8sf)_mm256_and_ps(__A, __B),
 160                                             (__v8sf)_mm256_setzero_ps());
 161}
 162
 163static __inline__ __m128 __DEFAULT_FN_ATTRS128
 164_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
 165  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 166                                             (__v4sf)_mm_and_ps(__A, __B),
 167                                             (__v4sf)__W);
 168}
 169
 170static __inline__ __m128 __DEFAULT_FN_ATTRS128
 171_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
 172  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 173                                             (__v4sf)_mm_and_ps(__A, __B),
 174                                             (__v4sf)_mm_setzero_ps());
 175}
 176
 177static __inline__ __m256d __DEFAULT_FN_ATTRS256
 178_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
 179  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 180                                              (__v4df)_mm256_xor_pd(__A, __B),
 181                                              (__v4df)__W);
 182}
 183
 184static __inline__ __m256d __DEFAULT_FN_ATTRS256
 185_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
 186  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 187                                              (__v4df)_mm256_xor_pd(__A, __B),
 188                                              (__v4df)_mm256_setzero_pd());
 189}
 190
 191static __inline__ __m128d __DEFAULT_FN_ATTRS128
 192_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
 193  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 194                                              (__v2df)_mm_xor_pd(__A, __B),
 195                                              (__v2df)__W);
 196}
 197
 198static __inline__ __m128d __DEFAULT_FN_ATTRS128
 199_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
 200  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 201                                              (__v2df)_mm_xor_pd(__A, __B),
 202                                              (__v2df)_mm_setzero_pd());
 203}
 204
 205static __inline__ __m256 __DEFAULT_FN_ATTRS256
 206_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
 207  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
 208                                             (__v8sf)_mm256_xor_ps(__A, __B),
 209                                             (__v8sf)__W);
 210}
 211
 212static __inline__ __m256 __DEFAULT_FN_ATTRS256
 213_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 214  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
 215                                             (__v8sf)_mm256_xor_ps(__A, __B),
 216                                             (__v8sf)_mm256_setzero_ps());
 217}
 218
 219static __inline__ __m128 __DEFAULT_FN_ATTRS128
 220_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
 221  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 222                                             (__v4sf)_mm_xor_ps(__A, __B),
 223                                             (__v4sf)__W);
 224}
 225
 226static __inline__ __m128 __DEFAULT_FN_ATTRS128
 227_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
 228  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 229                                             (__v4sf)_mm_xor_ps(__A, __B),
 230                                             (__v4sf)_mm_setzero_ps());
 231}
 232
 233static __inline__ __m256d __DEFAULT_FN_ATTRS256
 234_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
 235  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 236                                              (__v4df)_mm256_or_pd(__A, __B),
 237                                              (__v4df)__W);
 238}
 239
 240static __inline__ __m256d __DEFAULT_FN_ATTRS256
 241_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
 242  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 243                                              (__v4df)_mm256_or_pd(__A, __B),
 244                                              (__v4df)_mm256_setzero_pd());
 245}
 246
 247static __inline__ __m128d __DEFAULT_FN_ATTRS128
 248_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
 249  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 250                                              (__v2df)_mm_or_pd(__A, __B),
 251                                              (__v2df)__W);
 252}
 253
 254static __inline__ __m128d __DEFAULT_FN_ATTRS128
 255_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
 256  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 257                                              (__v2df)_mm_or_pd(__A, __B),
 258                                              (__v2df)_mm_setzero_pd());
 259}
 260
 261static __inline__ __m256 __DEFAULT_FN_ATTRS256
 262_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
 263  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
 264                                             (__v8sf)_mm256_or_ps(__A, __B),
 265                                             (__v8sf)__W);
 266}
 267
 268static __inline__ __m256 __DEFAULT_FN_ATTRS256
 269_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 270  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
 271                                             (__v8sf)_mm256_or_ps(__A, __B),
 272                                             (__v8sf)_mm256_setzero_ps());
 273}
 274
 275static __inline__ __m128 __DEFAULT_FN_ATTRS128
 276_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
 277  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 278                                             (__v4sf)_mm_or_ps(__A, __B),
 279                                             (__v4sf)__W);
 280}
 281
 282static __inline__ __m128 __DEFAULT_FN_ATTRS128
 283_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
 284  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 285                                             (__v4sf)_mm_or_ps(__A, __B),
 286                                             (__v4sf)_mm_setzero_ps());
 287}
 288
 289static __inline__ __m128i __DEFAULT_FN_ATTRS128
 290_mm_cvtpd_epi64 (__m128d __A) {
 291  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
 292                (__v2di) _mm_setzero_si128(),
 293                (__mmask8) -1);
 294}
 295
 296static __inline__ __m128i __DEFAULT_FN_ATTRS128
 297_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
 298  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
 299                (__v2di) __W,
 300                (__mmask8) __U);
 301}
 302
 303static __inline__ __m128i __DEFAULT_FN_ATTRS128
 304_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
 305  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
 306                (__v2di) _mm_setzero_si128(),
 307                (__mmask8) __U);
 308}
 309
 310static __inline__ __m256i __DEFAULT_FN_ATTRS256
 311_mm256_cvtpd_epi64 (__m256d __A) {
 312  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
 313                (__v4di) _mm256_setzero_si256(),
 314                (__mmask8) -1);
 315}
 316
 317static __inline__ __m256i __DEFAULT_FN_ATTRS256
 318_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
 319  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
 320                (__v4di) __W,
 321                (__mmask8) __U);
 322}
 323
 324static __inline__ __m256i __DEFAULT_FN_ATTRS256
 325_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
 326  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
 327                (__v4di) _mm256_setzero_si256(),
 328                (__mmask8) __U);
 329}
 330
 331static __inline__ __m128i __DEFAULT_FN_ATTRS128
 332_mm_cvtpd_epu64 (__m128d __A) {
 333  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
 334                (__v2di) _mm_setzero_si128(),
 335                (__mmask8) -1);
 336}
 337
 338static __inline__ __m128i __DEFAULT_FN_ATTRS128
 339_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
 340  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
 341                (__v2di) __W,
 342                (__mmask8) __U);
 343}
 344
 345static __inline__ __m128i __DEFAULT_FN_ATTRS128
 346_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
 347  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
 348                (__v2di) _mm_setzero_si128(),
 349                (__mmask8) __U);
 350}
 351
 352static __inline__ __m256i __DEFAULT_FN_ATTRS256
 353_mm256_cvtpd_epu64 (__m256d __A) {
 354  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
 355                (__v4di) _mm256_setzero_si256(),
 356                (__mmask8) -1);
 357}
 358
 359static __inline__ __m256i __DEFAULT_FN_ATTRS256
 360_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
 361  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
 362                (__v4di) __W,
 363                (__mmask8) __U);
 364}
 365
 366static __inline__ __m256i __DEFAULT_FN_ATTRS256
 367_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
 368  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
 369                (__v4di) _mm256_setzero_si256(),
 370                (__mmask8) __U);
 371}
 372
 373static __inline__ __m128i __DEFAULT_FN_ATTRS128
 374_mm_cvtps_epi64 (__m128 __A) {
 375  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
 376                (__v2di) _mm_setzero_si128(),
 377                (__mmask8) -1);
 378}
 379
 380static __inline__ __m128i __DEFAULT_FN_ATTRS128
 381_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
 382  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
 383                (__v2di) __W,
 384                (__mmask8) __U);
 385}
 386
 387static __inline__ __m128i __DEFAULT_FN_ATTRS128
 388_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
 389  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
 390                (__v2di) _mm_setzero_si128(),
 391                (__mmask8) __U);
 392}
 393
 394static __inline__ __m256i __DEFAULT_FN_ATTRS256
 395_mm256_cvtps_epi64 (__m128 __A) {
 396  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
 397                (__v4di) _mm256_setzero_si256(),
 398                (__mmask8) -1);
 399}
 400
 401static __inline__ __m256i __DEFAULT_FN_ATTRS256
 402_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
 403  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
 404                (__v4di) __W,
 405                (__mmask8) __U);
 406}
 407
 408static __inline__ __m256i __DEFAULT_FN_ATTRS256
 409_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
 410  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
 411                (__v4di) _mm256_setzero_si256(),
 412                (__mmask8) __U);
 413}
 414
 415static __inline__ __m128i __DEFAULT_FN_ATTRS128
 416_mm_cvtps_epu64 (__m128 __A) {
 417  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
 418                (__v2di) _mm_setzero_si128(),
 419                (__mmask8) -1);
 420}
 421
 422static __inline__ __m128i __DEFAULT_FN_ATTRS128
 423_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
 424  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
 425                (__v2di) __W,
 426                (__mmask8) __U);
 427}
 428
 429static __inline__ __m128i __DEFAULT_FN_ATTRS128
 430_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
 431  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
 432                (__v2di) _mm_setzero_si128(),
 433                (__mmask8) __U);
 434}
 435
 436static __inline__ __m256i __DEFAULT_FN_ATTRS256
 437_mm256_cvtps_epu64 (__m128 __A) {
 438  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
 439                (__v4di) _mm256_setzero_si256(),
 440                (__mmask8) -1);
 441}
 442
 443static __inline__ __m256i __DEFAULT_FN_ATTRS256
 444_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
 445  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
 446                (__v4di) __W,
 447                (__mmask8) __U);
 448}
 449
 450static __inline__ __m256i __DEFAULT_FN_ATTRS256
 451_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
 452  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
 453                (__v4di) _mm256_setzero_si256(),
 454                (__mmask8) __U);
 455}
 456
 457static __inline__ __m128d __DEFAULT_FN_ATTRS128
 458_mm_cvtepi64_pd (__m128i __A) {
 459  return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
 460}
 461
 462static __inline__ __m128d __DEFAULT_FN_ATTRS128
 463_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
 464  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 465                                              (__v2df)_mm_cvtepi64_pd(__A),
 466                                              (__v2df)__W);
 467}
 468
 469static __inline__ __m128d __DEFAULT_FN_ATTRS128
 470_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
 471  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 472                                              (__v2df)_mm_cvtepi64_pd(__A),
 473                                              (__v2df)_mm_setzero_pd());
 474}
 475
 476static __inline__ __m256d __DEFAULT_FN_ATTRS256
 477_mm256_cvtepi64_pd (__m256i __A) {
 478  return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
 479}
 480
 481static __inline__ __m256d __DEFAULT_FN_ATTRS256
 482_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
 483  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 484                                              (__v4df)_mm256_cvtepi64_pd(__A),
 485                                              (__v4df)__W);
 486}
 487
 488static __inline__ __m256d __DEFAULT_FN_ATTRS256
 489_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
 490  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 491                                              (__v4df)_mm256_cvtepi64_pd(__A),
 492                                              (__v4df)_mm256_setzero_pd());
 493}
 494
 495static __inline__ __m128 __DEFAULT_FN_ATTRS128
 496_mm_cvtepi64_ps (__m128i __A) {
 497  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
 498                (__v4sf) _mm_setzero_ps(),
 499                (__mmask8) -1);
 500}
 501
 502static __inline__ __m128 __DEFAULT_FN_ATTRS128
 503_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
 504  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
 505                (__v4sf) __W,
 506                (__mmask8) __U);
 507}
 508
 509static __inline__ __m128 __DEFAULT_FN_ATTRS128
 510_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
 511  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
 512                (__v4sf) _mm_setzero_ps(),
 513                (__mmask8) __U);
 514}
 515
 516static __inline__ __m128 __DEFAULT_FN_ATTRS256
 517_mm256_cvtepi64_ps (__m256i __A) {
 518  return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
 519}
 520
 521static __inline__ __m128 __DEFAULT_FN_ATTRS256
 522_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
 523  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 524                                             (__v4sf)_mm256_cvtepi64_ps(__A),
 525                                             (__v4sf)__W);
 526}
 527
 528static __inline__ __m128 __DEFAULT_FN_ATTRS256
 529_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
 530  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 531                                             (__v4sf)_mm256_cvtepi64_ps(__A),
 532                                             (__v4sf)_mm_setzero_ps());
 533}
 534
 535static __inline__ __m128i __DEFAULT_FN_ATTRS128
 536_mm_cvttpd_epi64 (__m128d __A) {
 537  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
 538                (__v2di) _mm_setzero_si128(),
 539                (__mmask8) -1);
 540}
 541
 542static __inline__ __m128i __DEFAULT_FN_ATTRS128
 543_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
 544  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
 545                (__v2di) __W,
 546                (__mmask8) __U);
 547}
 548
 549static __inline__ __m128i __DEFAULT_FN_ATTRS128
 550_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
 551  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
 552                (__v2di) _mm_setzero_si128(),
 553                (__mmask8) __U);
 554}
 555
 556static __inline__ __m256i __DEFAULT_FN_ATTRS256
 557_mm256_cvttpd_epi64 (__m256d __A) {
 558  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
 559                (__v4di) _mm256_setzero_si256(),
 560                (__mmask8) -1);
 561}
 562
 563static __inline__ __m256i __DEFAULT_FN_ATTRS256
 564_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
 565  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
 566                (__v4di) __W,
 567                (__mmask8) __U);
 568}
 569
 570static __inline__ __m256i __DEFAULT_FN_ATTRS256
 571_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
 572  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
 573                (__v4di) _mm256_setzero_si256(),
 574                (__mmask8) __U);
 575}
 576
 577static __inline__ __m128i __DEFAULT_FN_ATTRS128
 578_mm_cvttpd_epu64 (__m128d __A) {
 579  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
 580                (__v2di) _mm_setzero_si128(),
 581                (__mmask8) -1);
 582}
 583
 584static __inline__ __m128i __DEFAULT_FN_ATTRS128
 585_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
 586  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
 587                (__v2di) __W,
 588                (__mmask8) __U);
 589}
 590
 591static __inline__ __m128i __DEFAULT_FN_ATTRS128
 592_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
 593  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
 594                (__v2di) _mm_setzero_si128(),
 595                (__mmask8) __U);
 596}
 597
 598static __inline__ __m256i __DEFAULT_FN_ATTRS256
 599_mm256_cvttpd_epu64 (__m256d __A) {
 600  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
 601                (__v4di) _mm256_setzero_si256(),
 602                (__mmask8) -1);
 603}
 604
 605static __inline__ __m256i __DEFAULT_FN_ATTRS256
 606_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
 607  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
 608                (__v4di) __W,
 609                (__mmask8) __U);
 610}
 611
 612static __inline__ __m256i __DEFAULT_FN_ATTRS256
 613_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
 614  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
 615                (__v4di) _mm256_setzero_si256(),
 616                (__mmask8) __U);
 617}
 618
 619static __inline__ __m128i __DEFAULT_FN_ATTRS128
 620_mm_cvttps_epi64 (__m128 __A) {
 621  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
 622                (__v2di) _mm_setzero_si128(),
 623                (__mmask8) -1);
 624}
 625
 626static __inline__ __m128i __DEFAULT_FN_ATTRS128
 627_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
 628  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
 629                (__v2di) __W,
 630                (__mmask8) __U);
 631}
 632
 633static __inline__ __m128i __DEFAULT_FN_ATTRS128
 634_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
 635  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
 636                (__v2di) _mm_setzero_si128(),
 637                (__mmask8) __U);
 638}
 639
 640static __inline__ __m256i __DEFAULT_FN_ATTRS256
 641_mm256_cvttps_epi64 (__m128 __A) {
 642  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
 643                (__v4di) _mm256_setzero_si256(),
 644                (__mmask8) -1);
 645}
 646
 647static __inline__ __m256i __DEFAULT_FN_ATTRS256
 648_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
 649  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
 650                (__v4di) __W,
 651                (__mmask8) __U);
 652}
 653
 654static __inline__ __m256i __DEFAULT_FN_ATTRS256
 655_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
 656  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
 657                (__v4di) _mm256_setzero_si256(),
 658                (__mmask8) __U);
 659}
 660
 661static __inline__ __m128i __DEFAULT_FN_ATTRS128
 662_mm_cvttps_epu64 (__m128 __A) {
 663  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
 664                (__v2di) _mm_setzero_si128(),
 665                (__mmask8) -1);
 666}
 667
 668static __inline__ __m128i __DEFAULT_FN_ATTRS128
 669_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
 670  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
 671                (__v2di) __W,
 672                (__mmask8) __U);
 673}
 674
 675static __inline__ __m128i __DEFAULT_FN_ATTRS128
 676_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
 677  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
 678                (__v2di) _mm_setzero_si128(),
 679                (__mmask8) __U);
 680}
 681
 682static __inline__ __m256i __DEFAULT_FN_ATTRS256
 683_mm256_cvttps_epu64 (__m128 __A) {
 684  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
 685                (__v4di) _mm256_setzero_si256(),
 686                (__mmask8) -1);
 687}
 688
 689static __inline__ __m256i __DEFAULT_FN_ATTRS256
 690_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
 691  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
 692                (__v4di) __W,
 693                (__mmask8) __U);
 694}
 695
 696static __inline__ __m256i __DEFAULT_FN_ATTRS256
 697_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
 698  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
 699                (__v4di) _mm256_setzero_si256(),
 700                (__mmask8) __U);
 701}
 702
 703static __inline__ __m128d __DEFAULT_FN_ATTRS128
 704_mm_cvtepu64_pd (__m128i __A) {
 705  return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
 706}
 707
 708static __inline__ __m128d __DEFAULT_FN_ATTRS128
 709_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
 710  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 711                                              (__v2df)_mm_cvtepu64_pd(__A),
 712                                              (__v2df)__W);
 713}
 714
 715static __inline__ __m128d __DEFAULT_FN_ATTRS128
 716_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
 717  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
 718                                              (__v2df)_mm_cvtepu64_pd(__A),
 719                                              (__v2df)_mm_setzero_pd());
 720}
 721
 722static __inline__ __m256d __DEFAULT_FN_ATTRS256
 723_mm256_cvtepu64_pd (__m256i __A) {
 724  return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
 725}
 726
 727static __inline__ __m256d __DEFAULT_FN_ATTRS256
 728_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
 729  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 730                                              (__v4df)_mm256_cvtepu64_pd(__A),
 731                                              (__v4df)__W);
 732}
 733
 734static __inline__ __m256d __DEFAULT_FN_ATTRS256
 735_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
 736  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
 737                                              (__v4df)_mm256_cvtepu64_pd(__A),
 738                                              (__v4df)_mm256_setzero_pd());
 739}
 740
 741static __inline__ __m128 __DEFAULT_FN_ATTRS128
 742_mm_cvtepu64_ps (__m128i __A) {
 743  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
 744                (__v4sf) _mm_setzero_ps(),
 745                (__mmask8) -1);
 746}
 747
 748static __inline__ __m128 __DEFAULT_FN_ATTRS128
 749_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
 750  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
 751                (__v4sf) __W,
 752                (__mmask8) __U);
 753}
 754
 755static __inline__ __m128 __DEFAULT_FN_ATTRS128
 756_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
 757  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
 758                (__v4sf) _mm_setzero_ps(),
 759                (__mmask8) __U);
 760}
 761
 762static __inline__ __m128 __DEFAULT_FN_ATTRS256
 763_mm256_cvtepu64_ps (__m256i __A) {
 764  return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
 765}
 766
 767static __inline__ __m128 __DEFAULT_FN_ATTRS256
 768_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
 769  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 770                                             (__v4sf)_mm256_cvtepu64_ps(__A),
 771                                             (__v4sf)__W);
 772}
 773
 774static __inline__ __m128 __DEFAULT_FN_ATTRS256
 775_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
 776  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
 777                                             (__v4sf)_mm256_cvtepu64_ps(__A),
 778                                             (__v4sf)_mm_setzero_ps());
 779}
 780
 781#define _mm_range_pd(A, B, C) \
 782  ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
 783                                           (__v2df)(__m128d)(B), (int)(C), \
 784                                           (__v2df)_mm_setzero_pd(), \
 785                                           (__mmask8)-1))
 786
 787#define _mm_mask_range_pd(W, U, A, B, C) \
 788  ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
 789                                           (__v2df)(__m128d)(B), (int)(C), \
 790                                           (__v2df)(__m128d)(W), \
 791                                           (__mmask8)(U)))
 792
 793#define _mm_maskz_range_pd(U, A, B, C) \
 794  ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
 795                                           (__v2df)(__m128d)(B), (int)(C), \
 796                                           (__v2df)_mm_setzero_pd(), \
 797                                           (__mmask8)(U)))
 798
 799#define _mm256_range_pd(A, B, C) \
 800  ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
 801                                           (__v4df)(__m256d)(B), (int)(C), \
 802                                           (__v4df)_mm256_setzero_pd(), \
 803                                           (__mmask8)-1))
 804
 805#define _mm256_mask_range_pd(W, U, A, B, C) \
 806  ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
 807                                           (__v4df)(__m256d)(B), (int)(C), \
 808                                           (__v4df)(__m256d)(W), \
 809                                           (__mmask8)(U)))
 810
 811#define _mm256_maskz_range_pd(U, A, B, C) \
 812  ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
 813                                           (__v4df)(__m256d)(B), (int)(C), \
 814                                           (__v4df)_mm256_setzero_pd(), \
 815                                           (__mmask8)(U)))
 816
 817#define _mm_range_ps(A, B, C) \
 818  ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
 819                                          (__v4sf)(__m128)(B), (int)(C), \
 820                                          (__v4sf)_mm_setzero_ps(), \
 821                                          (__mmask8)-1))
 822
 823#define _mm_mask_range_ps(W, U, A, B, C) \
 824  ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
 825                                          (__v4sf)(__m128)(B), (int)(C), \
 826                                          (__v4sf)(__m128)(W), (__mmask8)(U)))
 827
 828#define _mm_maskz_range_ps(U, A, B, C) \
 829  ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
 830                                          (__v4sf)(__m128)(B), (int)(C), \
 831                                          (__v4sf)_mm_setzero_ps(), \
 832                                          (__mmask8)(U)))
 833
 834#define _mm256_range_ps(A, B, C) \
 835  ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
 836                                          (__v8sf)(__m256)(B), (int)(C), \
 837                                          (__v8sf)_mm256_setzero_ps(), \
 838                                          (__mmask8)-1))
 839
 840#define _mm256_mask_range_ps(W, U, A, B, C) \
 841  ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
 842                                          (__v8sf)(__m256)(B), (int)(C), \
 843                                          (__v8sf)(__m256)(W), (__mmask8)(U)))
 844
 845#define _mm256_maskz_range_ps(U, A, B, C) \
 846  ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
 847                                          (__v8sf)(__m256)(B), (int)(C), \
 848                                          (__v8sf)_mm256_setzero_ps(), \
 849                                          (__mmask8)(U)))
 850
 851#define _mm_reduce_pd(A, B) \
 852  ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
 853                                            (__v2df)_mm_setzero_pd(), \
 854                                            (__mmask8)-1))
 855
 856#define _mm_mask_reduce_pd(W, U, A, B) \
 857  ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
 858                                            (__v2df)(__m128d)(W), \
 859                                            (__mmask8)(U)))
 860
 861#define _mm_maskz_reduce_pd(U, A, B) \
 862  ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
 863                                            (__v2df)_mm_setzero_pd(), \
 864                                            (__mmask8)(U)))
 865
 866#define _mm256_reduce_pd(A, B) \
 867  ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
 868                                            (__v4df)_mm256_setzero_pd(), \
 869                                            (__mmask8)-1))
 870
 871#define _mm256_mask_reduce_pd(W, U, A, B) \
 872  ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
 873                                            (__v4df)(__m256d)(W), \
 874                                            (__mmask8)(U)))
 875
 876#define _mm256_maskz_reduce_pd(U, A, B) \
 877  ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
 878                                            (__v4df)_mm256_setzero_pd(), \
 879                                            (__mmask8)(U)))
 880
 881#define _mm_reduce_ps(A, B) \
 882  ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
 883                                           (__v4sf)_mm_setzero_ps(), \
 884                                           (__mmask8)-1))
 885
 886#define _mm_mask_reduce_ps(W, U, A, B) \
 887  ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
 888                                           (__v4sf)(__m128)(W), \
 889                                           (__mmask8)(U)))
 890
 891#define _mm_maskz_reduce_ps(U, A, B) \
 892  ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
 893                                           (__v4sf)_mm_setzero_ps(), \
 894                                           (__mmask8)(U)))
 895
 896#define _mm256_reduce_ps(A, B) \
 897  ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
 898                                           (__v8sf)_mm256_setzero_ps(), \
 899                                           (__mmask8)-1))
 900
 901#define _mm256_mask_reduce_ps(W, U, A, B) \
 902  ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
 903                                           (__v8sf)(__m256)(W), \
 904                                           (__mmask8)(U)))
 905
 906#define _mm256_maskz_reduce_ps(U, A, B) \
 907  ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
 908                                           (__v8sf)_mm256_setzero_ps(), \
 909                                           (__mmask8)(U)))
 910
 911static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 912_mm_movepi32_mask (__m128i __A)
 913{
 914  return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
 915}
 916
 917static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 918_mm256_movepi32_mask (__m256i __A)
 919{
 920  return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
 921}
 922
 923static __inline__ __m128i __DEFAULT_FN_ATTRS128
 924_mm_movm_epi32 (__mmask8 __A)
 925{
 926  return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
 927}
 928
 929static __inline__ __m256i __DEFAULT_FN_ATTRS256
 930_mm256_movm_epi32 (__mmask8 __A)
 931{
 932  return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
 933}
 934
 935static __inline__ __m128i __DEFAULT_FN_ATTRS128
 936_mm_movm_epi64 (__mmask8 __A)
 937{
 938  return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
 939}
 940
 941static __inline__ __m256i __DEFAULT_FN_ATTRS256
 942_mm256_movm_epi64 (__mmask8 __A)
 943{
 944  return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
 945}
 946
 947static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 948_mm_movepi64_mask (__m128i __A)
 949{
 950  return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
 951}
 952
 953static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 954_mm256_movepi64_mask (__m256i __A)
 955{
 956  return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
 957}
 958
 959static __inline__ __m256 __DEFAULT_FN_ATTRS256
 960_mm256_broadcast_f32x2 (__m128 __A)
 961{
 962  return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
 963                                         0, 1, 0, 1, 0, 1, 0, 1);
 964}
 965
 966static __inline__ __m256 __DEFAULT_FN_ATTRS256
 967_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
 968{
 969  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
 970                                             (__v8sf)_mm256_broadcast_f32x2(__A),
 971                                             (__v8sf)__O);
 972}
 973
 974static __inline__ __m256 __DEFAULT_FN_ATTRS256
 975_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
 976{
 977  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
 978                                             (__v8sf)_mm256_broadcast_f32x2(__A),
 979                                             (__v8sf)_mm256_setzero_ps());
 980}
 981
 982static __inline__ __m256d __DEFAULT_FN_ATTRS256
 983_mm256_broadcast_f64x2(__m128d __A)
 984{
 985  return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
 986                                          0, 1, 0, 1);
 987}
 988
 989static __inline__ __m256d __DEFAULT_FN_ATTRS256
 990_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
 991{
 992  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
 993                                            (__v4df)_mm256_broadcast_f64x2(__A),
 994                                            (__v4df)__O);
 995}
 996
 997static __inline__ __m256d __DEFAULT_FN_ATTRS256
 998_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
 999{
1000  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1001                                            (__v4df)_mm256_broadcast_f64x2(__A),
1002                                            (__v4df)_mm256_setzero_pd());
1003}
1004
1005static __inline__ __m128i __DEFAULT_FN_ATTRS128
1006_mm_broadcast_i32x2 (__m128i __A)
1007{
1008  return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1009                                          0, 1, 0, 1);
1010}
1011
1012static __inline__ __m128i __DEFAULT_FN_ATTRS128
1013_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1014{
1015  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1016                                             (__v4si)_mm_broadcast_i32x2(__A),
1017                                             (__v4si)__O);
1018}
1019
1020static __inline__ __m128i __DEFAULT_FN_ATTRS128
1021_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1022{
1023  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1024                                             (__v4si)_mm_broadcast_i32x2(__A),
1025                                             (__v4si)_mm_setzero_si128());
1026}
1027
1028static __inline__ __m256i __DEFAULT_FN_ATTRS256
1029_mm256_broadcast_i32x2 (__m128i __A)
1030{
1031  return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1032                                          0, 1, 0, 1, 0, 1, 0, 1);
1033}
1034
1035static __inline__ __m256i __DEFAULT_FN_ATTRS256
1036_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1037{
1038  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1039                                             (__v8si)_mm256_broadcast_i32x2(__A),
1040                                             (__v8si)__O);
1041}
1042
1043static __inline__ __m256i __DEFAULT_FN_ATTRS256
1044_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1045{
1046  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1047                                             (__v8si)_mm256_broadcast_i32x2(__A),
1048                                             (__v8si)_mm256_setzero_si256());
1049}
1050
1051static __inline__ __m256i __DEFAULT_FN_ATTRS256
1052_mm256_broadcast_i64x2(__m128i __A)
1053{
1054  return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1055                                          0, 1, 0, 1);
1056}
1057
1058static __inline__ __m256i __DEFAULT_FN_ATTRS256
1059_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
1060{
1061  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1062                                            (__v4di)_mm256_broadcast_i64x2(__A),
1063                                            (__v4di)__O);
1064}
1065
1066static __inline__ __m256i __DEFAULT_FN_ATTRS256
1067_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1068{
1069  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1070                                            (__v4di)_mm256_broadcast_i64x2(__A),
1071                                            (__v4di)_mm256_setzero_si256());
1072}
1073
1074#define _mm256_extractf64x2_pd(A, imm) \
1075  ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1076                                                 (int)(imm), \
1077                                                 (__v2df)_mm_undefined_pd(), \
1078                                                 (__mmask8)-1))
1079
1080#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1081  ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1082                                                 (int)(imm), \
1083                                                 (__v2df)(__m128d)(W), \
1084                                                 (__mmask8)(U)))
1085
1086#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1087  ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1088                                                 (int)(imm), \
1089                                                 (__v2df)_mm_setzero_pd(), \
1090                                                 (__mmask8)(U)))
1091
1092#define _mm256_extracti64x2_epi64(A, imm) \
1093  ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1094                                                (int)(imm), \
1095                                                (__v2di)_mm_undefined_si128(), \
1096                                                (__mmask8)-1))
1097
1098#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1099  ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1100                                                 (int)(imm), \
1101                                                 (__v2di)(__m128i)(W), \
1102                                                 (__mmask8)(U)))
1103
1104#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1105  ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1106                                                 (int)(imm), \
1107                                                 (__v2di)_mm_setzero_si128(), \
1108                                                 (__mmask8)(U)))
1109
1110#define _mm256_insertf64x2(A, B, imm) \
1111  ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1112                                           (__v2df)(__m128d)(B), (int)(imm)))
1113
1114#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1115  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1116                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1117                                  (__v4df)(__m256d)(W)))
1118
1119#define _mm256_maskz_insertf64x2(U, A, B, imm) \
1120  ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1121                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1122                                  (__v4df)_mm256_setzero_pd()))
1123
1124#define _mm256_inserti64x2(A, B, imm) \
1125  ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1126                                           (__v2di)(__m128i)(B), (int)(imm)))
1127
1128#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1129  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1130                                   (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1131                                   (__v4di)(__m256i)(W)))
1132
1133#define _mm256_maskz_inserti64x2(U, A, B, imm) \
1134  ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1135                                   (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1136                                   (__v4di)_mm256_setzero_si256()))
1137
1138#define _mm_mask_fpclass_pd_mask(U, A, imm) \
1139  ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1140                                              (__mmask8)(U)))
1141
1142#define _mm_fpclass_pd_mask(A, imm) \
1143  ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1144                                              (__mmask8)-1))
1145
1146#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1147  ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1148                                              (__mmask8)(U)))
1149
1150#define _mm256_fpclass_pd_mask(A, imm) \
1151  ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1152                                              (__mmask8)-1))
1153
1154#define _mm_mask_fpclass_ps_mask(U, A, imm) \
1155  ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1156                                              (__mmask8)(U)))
1157
1158#define _mm_fpclass_ps_mask(A, imm) \
1159  ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1160                                              (__mmask8)-1))
1161
1162#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1163  ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1164                                              (__mmask8)(U)))
1165
1166#define _mm256_fpclass_ps_mask(A, imm) \
1167  ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1168                                              (__mmask8)-1))
1169
1170#undef __DEFAULT_FN_ATTRS128
1171#undef __DEFAULT_FN_ATTRS256
1172
1173#endif