master
   1/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
   2 *
   3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 * See https://llvm.org/LICENSE.txt for license information.
   5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 *
   7 *===-----------------------------------------------------------------------===
   8 */
   9
  10#ifndef __IMMINTRIN_H
  11#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
  12#endif
  13
  14#ifndef __AVX512DQINTRIN_H
  15#define __AVX512DQINTRIN_H
  16
  17/* Define the default attributes for the functions in this file. */
  18#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
  19#define __DEFAULT_FN_ATTRS                                                     \
  20  __attribute__((__always_inline__, __nodebug__,                               \
  21                 __target__("avx512dq,no-evex512")))
  22
  23static __inline __mmask8 __DEFAULT_FN_ATTRS
  24_knot_mask8(__mmask8 __M)
  25{
  26  return __builtin_ia32_knotqi(__M);
  27}
  28
  29static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  30_kand_mask8(__mmask8 __A, __mmask8 __B)
  31{
  32  return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
  33}
  34
  35static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  36_kandn_mask8(__mmask8 __A, __mmask8 __B)
  37{
  38  return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
  39}
  40
  41static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  42_kor_mask8(__mmask8 __A, __mmask8 __B)
  43{
  44  return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
  45}
  46
  47static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  48_kxnor_mask8(__mmask8 __A, __mmask8 __B)
  49{
  50  return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
  51}
  52
  53static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  54_kxor_mask8(__mmask8 __A, __mmask8 __B)
  55{
  56  return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
  57}
  58
  59static __inline__ unsigned char __DEFAULT_FN_ATTRS
  60_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
  61{
  62  return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
  63}
  64
  65static __inline__ unsigned char __DEFAULT_FN_ATTRS
  66_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
  67{
  68  return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
  69}
  70
  71static __inline__ unsigned char __DEFAULT_FN_ATTRS
  72_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
  73  *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
  74  return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
  75}
  76
  77static __inline__ unsigned char __DEFAULT_FN_ATTRS
  78_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
  79{
  80  return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
  81}
  82
  83static __inline__ unsigned char __DEFAULT_FN_ATTRS
  84_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
  85{
  86  return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
  87}
  88
  89static __inline__ unsigned char __DEFAULT_FN_ATTRS
  90_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
  91  *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
  92  return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
  93}
  94
  95static __inline__ unsigned char __DEFAULT_FN_ATTRS
  96_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
  97{
  98  return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
  99}
 100
 101static __inline__ unsigned char __DEFAULT_FN_ATTRS
 102_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
 103{
 104  return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
 105}
 106
 107static __inline__ unsigned char __DEFAULT_FN_ATTRS
 108_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
 109  *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
 110  return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
 111}
 112
 113static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 114_kadd_mask8(__mmask8 __A, __mmask8 __B)
 115{
 116  return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
 117}
 118
 119static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 120_kadd_mask16(__mmask16 __A, __mmask16 __B)
 121{
 122  return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
 123}
 124
 125#define _kshiftli_mask8(A, I) \
 126  ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I)))
 127
 128#define _kshiftri_mask8(A, I) \
 129  ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)))
 130
 131static __inline__ unsigned int __DEFAULT_FN_ATTRS
 132_cvtmask8_u32(__mmask8 __A) {
 133  return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
 134}
 135
 136static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 137_cvtu32_mask8(unsigned int __A) {
 138  return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
 139}
 140
 141static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 142_load_mask8(__mmask8 *__A) {
 143  return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A);
 144}
 145
 146static __inline__ void __DEFAULT_FN_ATTRS
 147_store_mask8(__mmask8 *__A, __mmask8 __B) {
 148  *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
 149}
 150
 151static __inline__ __m512i __DEFAULT_FN_ATTRS512
 152_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
 153  return (__m512i) ((__v8du) __A * (__v8du) __B);
 154}
 155
 156static __inline__ __m512i __DEFAULT_FN_ATTRS512
 157_mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
 158  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
 159                                             (__v8di)_mm512_mullo_epi64(__A, __B),
 160                                             (__v8di)__W);
 161}
 162
 163static __inline__ __m512i __DEFAULT_FN_ATTRS512
 164_mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
 165  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
 166                                             (__v8di)_mm512_mullo_epi64(__A, __B),
 167                                             (__v8di)_mm512_setzero_si512());
 168}
 169
 170static __inline__ __m512d __DEFAULT_FN_ATTRS512
 171_mm512_xor_pd(__m512d __A, __m512d __B) {
 172  return (__m512d)((__v8du)__A ^ (__v8du)__B);
 173}
 174
 175static __inline__ __m512d __DEFAULT_FN_ATTRS512
 176_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
 177  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 178                                              (__v8df)_mm512_xor_pd(__A, __B),
 179                                              (__v8df)__W);
 180}
 181
 182static __inline__ __m512d __DEFAULT_FN_ATTRS512
 183_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
 184  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 185                                              (__v8df)_mm512_xor_pd(__A, __B),
 186                                              (__v8df)_mm512_setzero_pd());
 187}
 188
 189static __inline__ __m512 __DEFAULT_FN_ATTRS512
 190_mm512_xor_ps (__m512 __A, __m512 __B) {
 191  return (__m512)((__v16su)__A ^ (__v16su)__B);
 192}
 193
 194static __inline__ __m512 __DEFAULT_FN_ATTRS512
 195_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
 196  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 197                                             (__v16sf)_mm512_xor_ps(__A, __B),
 198                                             (__v16sf)__W);
 199}
 200
 201static __inline__ __m512 __DEFAULT_FN_ATTRS512
 202_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
 203  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 204                                             (__v16sf)_mm512_xor_ps(__A, __B),
 205                                             (__v16sf)_mm512_setzero_ps());
 206}
 207
 208static __inline__ __m512d __DEFAULT_FN_ATTRS512
 209_mm512_or_pd(__m512d __A, __m512d __B) {
 210  return (__m512d)((__v8du)__A | (__v8du)__B);
 211}
 212
 213static __inline__ __m512d __DEFAULT_FN_ATTRS512
 214_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
 215  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 216                                              (__v8df)_mm512_or_pd(__A, __B),
 217                                              (__v8df)__W);
 218}
 219
 220static __inline__ __m512d __DEFAULT_FN_ATTRS512
 221_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
 222  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 223                                              (__v8df)_mm512_or_pd(__A, __B),
 224                                              (__v8df)_mm512_setzero_pd());
 225}
 226
 227static __inline__ __m512 __DEFAULT_FN_ATTRS512
 228_mm512_or_ps(__m512 __A, __m512 __B) {
 229  return (__m512)((__v16su)__A | (__v16su)__B);
 230}
 231
 232static __inline__ __m512 __DEFAULT_FN_ATTRS512
 233_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
 234  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 235                                             (__v16sf)_mm512_or_ps(__A, __B),
 236                                             (__v16sf)__W);
 237}
 238
 239static __inline__ __m512 __DEFAULT_FN_ATTRS512
 240_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
 241  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 242                                             (__v16sf)_mm512_or_ps(__A, __B),
 243                                             (__v16sf)_mm512_setzero_ps());
 244}
 245
 246static __inline__ __m512d __DEFAULT_FN_ATTRS512
 247_mm512_and_pd(__m512d __A, __m512d __B) {
 248  return (__m512d)((__v8du)__A & (__v8du)__B);
 249}
 250
 251static __inline__ __m512d __DEFAULT_FN_ATTRS512
 252_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
 253  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 254                                              (__v8df)_mm512_and_pd(__A, __B),
 255                                              (__v8df)__W);
 256}
 257
 258static __inline__ __m512d __DEFAULT_FN_ATTRS512
 259_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
 260  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 261                                              (__v8df)_mm512_and_pd(__A, __B),
 262                                              (__v8df)_mm512_setzero_pd());
 263}
 264
 265static __inline__ __m512 __DEFAULT_FN_ATTRS512
 266_mm512_and_ps(__m512 __A, __m512 __B) {
 267  return (__m512)((__v16su)__A & (__v16su)__B);
 268}
 269
 270static __inline__ __m512 __DEFAULT_FN_ATTRS512
 271_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
 272  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 273                                             (__v16sf)_mm512_and_ps(__A, __B),
 274                                             (__v16sf)__W);
 275}
 276
 277static __inline__ __m512 __DEFAULT_FN_ATTRS512
 278_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
 279  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 280                                             (__v16sf)_mm512_and_ps(__A, __B),
 281                                             (__v16sf)_mm512_setzero_ps());
 282}
 283
 284static __inline__ __m512d __DEFAULT_FN_ATTRS512
 285_mm512_andnot_pd(__m512d __A, __m512d __B) {
 286  return (__m512d)(~(__v8du)__A & (__v8du)__B);
 287}
 288
 289static __inline__ __m512d __DEFAULT_FN_ATTRS512
 290_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
 291  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 292                                              (__v8df)_mm512_andnot_pd(__A, __B),
 293                                              (__v8df)__W);
 294}
 295
 296static __inline__ __m512d __DEFAULT_FN_ATTRS512
 297_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
 298  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 299                                              (__v8df)_mm512_andnot_pd(__A, __B),
 300                                              (__v8df)_mm512_setzero_pd());
 301}
 302
 303static __inline__ __m512 __DEFAULT_FN_ATTRS512
 304_mm512_andnot_ps(__m512 __A, __m512 __B) {
 305  return (__m512)(~(__v16su)__A & (__v16su)__B);
 306}
 307
 308static __inline__ __m512 __DEFAULT_FN_ATTRS512
 309_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
 310  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 311                                             (__v16sf)_mm512_andnot_ps(__A, __B),
 312                                             (__v16sf)__W);
 313}
 314
 315static __inline__ __m512 __DEFAULT_FN_ATTRS512
 316_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
 317  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
 318                                             (__v16sf)_mm512_andnot_ps(__A, __B),
 319                                             (__v16sf)_mm512_setzero_ps());
 320}
 321
 322static __inline__ __m512i __DEFAULT_FN_ATTRS512
 323_mm512_cvtpd_epi64 (__m512d __A) {
 324  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
 325                (__v8di) _mm512_setzero_si512(),
 326                (__mmask8) -1,
 327                _MM_FROUND_CUR_DIRECTION);
 328}
 329
 330static __inline__ __m512i __DEFAULT_FN_ATTRS512
 331_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
 332  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
 333                (__v8di) __W,
 334                (__mmask8) __U,
 335                _MM_FROUND_CUR_DIRECTION);
 336}
 337
 338static __inline__ __m512i __DEFAULT_FN_ATTRS512
 339_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
 340  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
 341                (__v8di) _mm512_setzero_si512(),
 342                (__mmask8) __U,
 343                _MM_FROUND_CUR_DIRECTION);
 344}
 345
 346#define _mm512_cvt_roundpd_epi64(A, R) \
 347  ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
 348                                            (__v8di)_mm512_setzero_si512(), \
 349                                            (__mmask8)-1, (int)(R)))
 350
 351#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \
 352  ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
 353                                            (__v8di)(__m512i)(W), \
 354                                            (__mmask8)(U), (int)(R)))
 355
 356#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \
 357  ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
 358                                            (__v8di)_mm512_setzero_si512(), \
 359                                            (__mmask8)(U), (int)(R)))
 360
 361static __inline__ __m512i __DEFAULT_FN_ATTRS512
 362_mm512_cvtpd_epu64 (__m512d __A) {
 363  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
 364                 (__v8di) _mm512_setzero_si512(),
 365                 (__mmask8) -1,
 366                 _MM_FROUND_CUR_DIRECTION);
 367}
 368
 369static __inline__ __m512i __DEFAULT_FN_ATTRS512
 370_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
 371  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
 372                 (__v8di) __W,
 373                 (__mmask8) __U,
 374                 _MM_FROUND_CUR_DIRECTION);
 375}
 376
 377static __inline__ __m512i __DEFAULT_FN_ATTRS512
 378_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
 379  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
 380                 (__v8di) _mm512_setzero_si512(),
 381                 (__mmask8) __U,
 382                 _MM_FROUND_CUR_DIRECTION);
 383}
 384
 385#define _mm512_cvt_roundpd_epu64(A, R) \
 386  ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
 387                                             (__v8di)_mm512_setzero_si512(), \
 388                                             (__mmask8)-1, (int)(R)))
 389
 390#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \
 391  ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
 392                                             (__v8di)(__m512i)(W), \
 393                                             (__mmask8)(U), (int)(R)))
 394
 395#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \
 396  ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
 397                                             (__v8di)_mm512_setzero_si512(), \
 398                                             (__mmask8)(U), (int)(R)))
 399
 400static __inline__ __m512i __DEFAULT_FN_ATTRS512
 401_mm512_cvtps_epi64 (__m256 __A) {
 402  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
 403                (__v8di) _mm512_setzero_si512(),
 404                (__mmask8) -1,
 405                _MM_FROUND_CUR_DIRECTION);
 406}
 407
 408static __inline__ __m512i __DEFAULT_FN_ATTRS512
 409_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
 410  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
 411                (__v8di) __W,
 412                (__mmask8) __U,
 413                _MM_FROUND_CUR_DIRECTION);
 414}
 415
 416static __inline__ __m512i __DEFAULT_FN_ATTRS512
 417_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
 418  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
 419                (__v8di) _mm512_setzero_si512(),
 420                (__mmask8) __U,
 421                _MM_FROUND_CUR_DIRECTION);
 422}
 423
 424#define _mm512_cvt_roundps_epi64(A, R) \
 425  ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
 426                                            (__v8di)_mm512_setzero_si512(), \
 427                                            (__mmask8)-1, (int)(R)))
 428
 429#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \
 430  ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
 431                                            (__v8di)(__m512i)(W), \
 432                                            (__mmask8)(U), (int)(R)))
 433
 434#define _mm512_maskz_cvt_roundps_epi64(U, A, R) \
 435  ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
 436                                            (__v8di)_mm512_setzero_si512(), \
 437                                            (__mmask8)(U), (int)(R)))
 438
 439static __inline__ __m512i __DEFAULT_FN_ATTRS512
 440_mm512_cvtps_epu64 (__m256 __A) {
 441  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
 442                 (__v8di) _mm512_setzero_si512(),
 443                 (__mmask8) -1,
 444                 _MM_FROUND_CUR_DIRECTION);
 445}
 446
 447static __inline__ __m512i __DEFAULT_FN_ATTRS512
 448_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
 449  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
 450                 (__v8di) __W,
 451                 (__mmask8) __U,
 452                 _MM_FROUND_CUR_DIRECTION);
 453}
 454
 455static __inline__ __m512i __DEFAULT_FN_ATTRS512
 456_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
 457  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
 458                 (__v8di) _mm512_setzero_si512(),
 459                 (__mmask8) __U,
 460                 _MM_FROUND_CUR_DIRECTION);
 461}
 462
 463#define _mm512_cvt_roundps_epu64(A, R) \
 464  ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
 465                                             (__v8di)_mm512_setzero_si512(), \
 466                                             (__mmask8)-1, (int)(R)))
 467
 468#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \
 469  ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
 470                                             (__v8di)(__m512i)(W), \
 471                                             (__mmask8)(U), (int)(R)))
 472
 473#define _mm512_maskz_cvt_roundps_epu64(U, A, R) \
 474  ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
 475                                             (__v8di)_mm512_setzero_si512(), \
 476                                             (__mmask8)(U), (int)(R)))
 477
 478
 479static __inline__ __m512d __DEFAULT_FN_ATTRS512
 480_mm512_cvtepi64_pd (__m512i __A) {
 481  return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
 482}
 483
 484static __inline__ __m512d __DEFAULT_FN_ATTRS512
 485_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
 486  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 487                                              (__v8df)_mm512_cvtepi64_pd(__A),
 488                                              (__v8df)__W);
 489}
 490
 491static __inline__ __m512d __DEFAULT_FN_ATTRS512
 492_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
 493  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 494                                              (__v8df)_mm512_cvtepi64_pd(__A),
 495                                              (__v8df)_mm512_setzero_pd());
 496}
 497
 498#define _mm512_cvt_roundepi64_pd(A, R) \
 499  ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
 500                                            (__v8df)_mm512_setzero_pd(), \
 501                                            (__mmask8)-1, (int)(R)))
 502
 503#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \
 504  ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
 505                                            (__v8df)(__m512d)(W), \
 506                                            (__mmask8)(U), (int)(R)))
 507
 508#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \
 509  ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
 510                                            (__v8df)_mm512_setzero_pd(), \
 511                                            (__mmask8)(U), (int)(R)))
 512
 513static __inline__ __m256 __DEFAULT_FN_ATTRS512
 514_mm512_cvtepi64_ps (__m512i __A) {
 515  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
 516               (__v8sf) _mm256_setzero_ps(),
 517               (__mmask8) -1,
 518               _MM_FROUND_CUR_DIRECTION);
 519}
 520
 521static __inline__ __m256 __DEFAULT_FN_ATTRS512
 522_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
 523  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
 524               (__v8sf) __W,
 525               (__mmask8) __U,
 526               _MM_FROUND_CUR_DIRECTION);
 527}
 528
 529static __inline__ __m256 __DEFAULT_FN_ATTRS512
 530_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
 531  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
 532               (__v8sf) _mm256_setzero_ps(),
 533               (__mmask8) __U,
 534               _MM_FROUND_CUR_DIRECTION);
 535}
 536
 537#define _mm512_cvt_roundepi64_ps(A, R) \
 538  ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
 539                                           (__v8sf)_mm256_setzero_ps(), \
 540                                           (__mmask8)-1, (int)(R)))
 541
 542#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \
 543  ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
 544                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
 545                                           (int)(R)))
 546
 547#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \
 548  ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
 549                                           (__v8sf)_mm256_setzero_ps(), \
 550                                           (__mmask8)(U), (int)(R)))
 551
 552
 553static __inline__ __m512i __DEFAULT_FN_ATTRS512
 554_mm512_cvttpd_epi64 (__m512d __A) {
 555  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
 556                 (__v8di) _mm512_setzero_si512(),
 557                 (__mmask8) -1,
 558                 _MM_FROUND_CUR_DIRECTION);
 559}
 560
 561static __inline__ __m512i __DEFAULT_FN_ATTRS512
 562_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
 563  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
 564                 (__v8di) __W,
 565                 (__mmask8) __U,
 566                 _MM_FROUND_CUR_DIRECTION);
 567}
 568
 569static __inline__ __m512i __DEFAULT_FN_ATTRS512
 570_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
 571  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
 572                 (__v8di) _mm512_setzero_si512(),
 573                 (__mmask8) __U,
 574                 _MM_FROUND_CUR_DIRECTION);
 575}
 576
 577#define _mm512_cvtt_roundpd_epi64(A, R) \
 578  ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
 579                                             (__v8di)_mm512_setzero_si512(), \
 580                                             (__mmask8)-1, (int)(R)))
 581
 582#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \
 583  ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
 584                                             (__v8di)(__m512i)(W), \
 585                                             (__mmask8)(U), (int)(R)))
 586
 587#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \
 588  ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
 589                                             (__v8di)_mm512_setzero_si512(), \
 590                                             (__mmask8)(U), (int)(R)))
 591
 592static __inline__ __m512i __DEFAULT_FN_ATTRS512
 593_mm512_cvttpd_epu64 (__m512d __A) {
 594  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
 595                  (__v8di) _mm512_setzero_si512(),
 596                  (__mmask8) -1,
 597                  _MM_FROUND_CUR_DIRECTION);
 598}
 599
 600static __inline__ __m512i __DEFAULT_FN_ATTRS512
 601_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
 602  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
 603                  (__v8di) __W,
 604                  (__mmask8) __U,
 605                  _MM_FROUND_CUR_DIRECTION);
 606}
 607
 608static __inline__ __m512i __DEFAULT_FN_ATTRS512
 609_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
 610  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
 611                  (__v8di) _mm512_setzero_si512(),
 612                  (__mmask8) __U,
 613                  _MM_FROUND_CUR_DIRECTION);
 614}
 615
 616#define _mm512_cvtt_roundpd_epu64(A, R) \
 617  ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
 618                                              (__v8di)_mm512_setzero_si512(), \
 619                                              (__mmask8)-1, (int)(R)))
 620
 621#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \
 622  ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
 623                                              (__v8di)(__m512i)(W), \
 624                                              (__mmask8)(U), (int)(R)))
 625
 626#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \
 627  ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
 628                                              (__v8di)_mm512_setzero_si512(), \
 629                                              (__mmask8)(U), (int)(R)))
 630
 631static __inline__ __m512i __DEFAULT_FN_ATTRS512
 632_mm512_cvttps_epi64 (__m256 __A) {
 633  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
 634                 (__v8di) _mm512_setzero_si512(),
 635                 (__mmask8) -1,
 636                 _MM_FROUND_CUR_DIRECTION);
 637}
 638
 639static __inline__ __m512i __DEFAULT_FN_ATTRS512
 640_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
 641  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
 642                 (__v8di) __W,
 643                 (__mmask8) __U,
 644                 _MM_FROUND_CUR_DIRECTION);
 645}
 646
 647static __inline__ __m512i __DEFAULT_FN_ATTRS512
 648_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
 649  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
 650                 (__v8di) _mm512_setzero_si512(),
 651                 (__mmask8) __U,
 652                 _MM_FROUND_CUR_DIRECTION);
 653}
 654
 655#define _mm512_cvtt_roundps_epi64(A, R) \
 656  ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
 657                                             (__v8di)_mm512_setzero_si512(), \
 658                                             (__mmask8)-1, (int)(R)))
 659
 660#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \
 661  ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
 662                                             (__v8di)(__m512i)(W), \
 663                                             (__mmask8)(U), (int)(R)))
 664
 665#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \
 666  ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
 667                                             (__v8di)_mm512_setzero_si512(), \
 668                                             (__mmask8)(U), (int)(R)))
 669
 670static __inline__ __m512i __DEFAULT_FN_ATTRS512
 671_mm512_cvttps_epu64 (__m256 __A) {
 672  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
 673                  (__v8di) _mm512_setzero_si512(),
 674                  (__mmask8) -1,
 675                  _MM_FROUND_CUR_DIRECTION);
 676}
 677
 678static __inline__ __m512i __DEFAULT_FN_ATTRS512
 679_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
 680  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
 681                  (__v8di) __W,
 682                  (__mmask8) __U,
 683                  _MM_FROUND_CUR_DIRECTION);
 684}
 685
 686static __inline__ __m512i __DEFAULT_FN_ATTRS512
 687_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
 688  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
 689                  (__v8di) _mm512_setzero_si512(),
 690                  (__mmask8) __U,
 691                  _MM_FROUND_CUR_DIRECTION);
 692}
 693
 694#define _mm512_cvtt_roundps_epu64(A, R) \
 695  ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
 696                                              (__v8di)_mm512_setzero_si512(), \
 697                                              (__mmask8)-1, (int)(R)))
 698
 699#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \
 700  ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
 701                                              (__v8di)(__m512i)(W), \
 702                                              (__mmask8)(U), (int)(R)))
 703
 704#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \
 705  ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
 706                                              (__v8di)_mm512_setzero_si512(), \
 707                                              (__mmask8)(U), (int)(R)))
 708
 709static __inline__ __m512d __DEFAULT_FN_ATTRS512
 710_mm512_cvtepu64_pd (__m512i __A) {
 711  return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
 712}
 713
 714static __inline__ __m512d __DEFAULT_FN_ATTRS512
 715_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
 716  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 717                                              (__v8df)_mm512_cvtepu64_pd(__A),
 718                                              (__v8df)__W);
 719}
 720
 721static __inline__ __m512d __DEFAULT_FN_ATTRS512
 722_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
 723  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
 724                                              (__v8df)_mm512_cvtepu64_pd(__A),
 725                                              (__v8df)_mm512_setzero_pd());
 726}
 727
 728#define _mm512_cvt_roundepu64_pd(A, R) \
 729  ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
 730                                             (__v8df)_mm512_setzero_pd(), \
 731                                             (__mmask8)-1, (int)(R)))
 732
 733#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \
 734  ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
 735                                             (__v8df)(__m512d)(W), \
 736                                             (__mmask8)(U), (int)(R)))
 737
 738
 739#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \
 740  ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
 741                                             (__v8df)_mm512_setzero_pd(), \
 742                                             (__mmask8)(U), (int)(R)))
 743
 744
 745static __inline__ __m256 __DEFAULT_FN_ATTRS512
 746_mm512_cvtepu64_ps (__m512i __A) {
 747  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
 748                (__v8sf) _mm256_setzero_ps(),
 749                (__mmask8) -1,
 750                _MM_FROUND_CUR_DIRECTION);
 751}
 752
 753static __inline__ __m256 __DEFAULT_FN_ATTRS512
 754_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
 755  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
 756                (__v8sf) __W,
 757                (__mmask8) __U,
 758                _MM_FROUND_CUR_DIRECTION);
 759}
 760
 761static __inline__ __m256 __DEFAULT_FN_ATTRS512
 762_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
 763  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
 764                (__v8sf) _mm256_setzero_ps(),
 765                (__mmask8) __U,
 766                _MM_FROUND_CUR_DIRECTION);
 767}
 768
 769#define _mm512_cvt_roundepu64_ps(A, R) \
 770  ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
 771                                            (__v8sf)_mm256_setzero_ps(), \
 772                                            (__mmask8)-1, (int)(R)))
 773
 774#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \
 775  ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
 776                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
 777                                            (int)(R)))
 778
 779#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \
 780  ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
 781                                            (__v8sf)_mm256_setzero_ps(), \
 782                                            (__mmask8)(U), (int)(R)))
 783
 784#define _mm512_range_pd(A, B, C) \
 785  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
 786                                           (__v8df)(__m512d)(B), (int)(C), \
 787                                           (__v8df)_mm512_setzero_pd(), \
 788                                           (__mmask8)-1, \
 789                                           _MM_FROUND_CUR_DIRECTION))
 790
 791#define _mm512_mask_range_pd(W, U, A, B, C) \
 792  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
 793                                           (__v8df)(__m512d)(B), (int)(C), \
 794                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
 795                                           _MM_FROUND_CUR_DIRECTION))
 796
 797#define _mm512_maskz_range_pd(U, A, B, C) \
 798  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
 799                                           (__v8df)(__m512d)(B), (int)(C), \
 800                                           (__v8df)_mm512_setzero_pd(), \
 801                                           (__mmask8)(U), \
 802                                           _MM_FROUND_CUR_DIRECTION))
 803
 804#define _mm512_range_round_pd(A, B, C, R) \
 805  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
 806                                           (__v8df)(__m512d)(B), (int)(C), \
 807                                           (__v8df)_mm512_setzero_pd(), \
 808                                           (__mmask8)-1, (int)(R)))
 809
 810#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
 811  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
 812                                           (__v8df)(__m512d)(B), (int)(C), \
 813                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
 814                                           (int)(R)))
 815
 816#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
 817  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
 818                                           (__v8df)(__m512d)(B), (int)(C), \
 819                                           (__v8df)_mm512_setzero_pd(), \
 820                                           (__mmask8)(U), (int)(R)))
 821
 822#define _mm512_range_ps(A, B, C) \
 823  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
 824                                          (__v16sf)(__m512)(B), (int)(C), \
 825                                          (__v16sf)_mm512_setzero_ps(), \
 826                                          (__mmask16)-1, \
 827                                          _MM_FROUND_CUR_DIRECTION))
 828
 829#define _mm512_mask_range_ps(W, U, A, B, C) \
 830  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
 831                                          (__v16sf)(__m512)(B), (int)(C), \
 832                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
 833                                          _MM_FROUND_CUR_DIRECTION))
 834
 835#define _mm512_maskz_range_ps(U, A, B, C) \
 836  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
 837                                          (__v16sf)(__m512)(B), (int)(C), \
 838                                          (__v16sf)_mm512_setzero_ps(), \
 839                                          (__mmask16)(U), \
 840                                          _MM_FROUND_CUR_DIRECTION))
 841
 842#define _mm512_range_round_ps(A, B, C, R) \
 843  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
 844                                          (__v16sf)(__m512)(B), (int)(C), \
 845                                          (__v16sf)_mm512_setzero_ps(), \
 846                                          (__mmask16)-1, (int)(R)))
 847
 848#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
 849  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
 850                                          (__v16sf)(__m512)(B), (int)(C), \
 851                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
 852                                          (int)(R)))
 853
 854#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
 855  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
 856                                          (__v16sf)(__m512)(B), (int)(C), \
 857                                          (__v16sf)_mm512_setzero_ps(), \
 858                                          (__mmask16)(U), (int)(R)))
 859
 860#define _mm_range_round_ss(A, B, C, R) \
 861  ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
 862                                                (__v4sf)(__m128)(B), \
 863                                                (__v4sf)_mm_setzero_ps(), \
 864                                                (__mmask8) -1, (int)(C),\
 865                                                (int)(R)))
 866
 867#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
 868
 869#define _mm_mask_range_round_ss(W, U, A, B, C, R) \
 870  ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
 871                                                (__v4sf)(__m128)(B), \
 872                                                (__v4sf)(__m128)(W),\
 873                                                (__mmask8)(U), (int)(C),\
 874                                                (int)(R)))
 875
 876#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
 877
 878#define _mm_maskz_range_round_ss(U, A, B, C, R) \
 879  ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
 880                                                (__v4sf)(__m128)(B), \
 881                                                (__v4sf)_mm_setzero_ps(), \
 882                                                (__mmask8)(U), (int)(C),\
 883                                                (int)(R)))
 884
 885#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
 886
 887#define _mm_range_round_sd(A, B, C, R) \
 888  ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
 889                                                 (__v2df)(__m128d)(B), \
 890                                                 (__v2df)_mm_setzero_pd(), \
 891                                                 (__mmask8) -1, (int)(C),\
 892                                                 (int)(R)))
 893
 894#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
 895
 896#define _mm_mask_range_round_sd(W, U, A, B, C, R) \
 897  ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
 898                                                 (__v2df)(__m128d)(B), \
 899                                                 (__v2df)(__m128d)(W),\
 900                                                 (__mmask8)(U), (int)(C),\
 901                                                 (int)(R)))
 902
 903#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
 904
 905#define _mm_maskz_range_round_sd(U, A, B, C, R) \
 906  ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
 907                                                 (__v2df)(__m128d)(B), \
 908                                                 (__v2df)_mm_setzero_pd(), \
 909                                                 (__mmask8)(U), (int)(C),\
 910                                                 (int)(R)))
 911
 912#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
 913
 914#define _mm512_reduce_pd(A, B) \
 915  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
 916                                            (__v8df)_mm512_setzero_pd(), \
 917                                            (__mmask8)-1, \
 918                                            _MM_FROUND_CUR_DIRECTION))
 919
 920#define _mm512_mask_reduce_pd(W, U, A, B) \
 921  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
 922                                            (__v8df)(__m512d)(W), \
 923                                            (__mmask8)(U), \
 924                                            _MM_FROUND_CUR_DIRECTION))
 925
 926#define _mm512_maskz_reduce_pd(U, A, B) \
 927  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
 928                                            (__v8df)_mm512_setzero_pd(), \
 929                                            (__mmask8)(U), \
 930                                            _MM_FROUND_CUR_DIRECTION))
 931
 932#define _mm512_reduce_ps(A, B) \
 933  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
 934                                           (__v16sf)_mm512_setzero_ps(), \
 935                                           (__mmask16)-1, \
 936                                           _MM_FROUND_CUR_DIRECTION))
 937
 938#define _mm512_mask_reduce_ps(W, U, A, B) \
 939  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
 940                                           (__v16sf)(__m512)(W), \
 941                                           (__mmask16)(U), \
 942                                           _MM_FROUND_CUR_DIRECTION))
 943
 944#define _mm512_maskz_reduce_ps(U, A, B) \
 945  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
 946                                           (__v16sf)_mm512_setzero_ps(), \
 947                                           (__mmask16)(U), \
 948                                           _MM_FROUND_CUR_DIRECTION))
 949
 950#define _mm512_reduce_round_pd(A, B, R) \
 951  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
 952                                            (__v8df)_mm512_setzero_pd(), \
 953                                            (__mmask8)-1, (int)(R)))
 954
 955#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
 956  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
 957                                            (__v8df)(__m512d)(W), \
 958                                            (__mmask8)(U), (int)(R)))
 959
 960#define _mm512_maskz_reduce_round_pd(U, A, B, R) \
 961  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
 962                                            (__v8df)_mm512_setzero_pd(), \
 963                                            (__mmask8)(U), (int)(R)))
 964
 965#define _mm512_reduce_round_ps(A, B, R) \
 966  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
 967                                           (__v16sf)_mm512_setzero_ps(), \
 968                                           (__mmask16)-1, (int)(R)))
 969
 970#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
 971  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
 972                                           (__v16sf)(__m512)(W), \
 973                                           (__mmask16)(U), (int)(R)))
 974
 975#define _mm512_maskz_reduce_round_ps(U, A, B, R) \
 976  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
 977                                           (__v16sf)_mm512_setzero_ps(), \
 978                                           (__mmask16)(U), (int)(R)))
 979
 980#define _mm_reduce_ss(A, B, C) \
 981  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
 982                                        (__v4sf)(__m128)(B), \
 983                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
 984                                        (int)(C), _MM_FROUND_CUR_DIRECTION))
 985
 986#define _mm_mask_reduce_ss(W, U, A, B, C) \
 987  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
 988                                        (__v4sf)(__m128)(B), \
 989                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
 990                                        (int)(C), _MM_FROUND_CUR_DIRECTION))
 991
 992#define _mm_maskz_reduce_ss(U, A, B, C) \
 993  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
 994                                        (__v4sf)(__m128)(B), \
 995                                        (__v4sf)_mm_setzero_ps(), \
 996                                        (__mmask8)(U), (int)(C), \
 997                                        _MM_FROUND_CUR_DIRECTION))
 998
 999#define _mm_reduce_round_ss(A, B, C, R) \
1000  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1001                                        (__v4sf)(__m128)(B), \
1002                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
1003                                        (int)(C), (int)(R)))
1004
1005#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
1006  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1007                                        (__v4sf)(__m128)(B), \
1008                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
1009                                        (int)(C), (int)(R)))
1010
1011#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
1012  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1013                                        (__v4sf)(__m128)(B), \
1014                                        (__v4sf)_mm_setzero_ps(), \
1015                                        (__mmask8)(U), (int)(C), (int)(R)))
1016
1017#define _mm_reduce_sd(A, B, C) \
1018  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1019                                         (__v2df)(__m128d)(B), \
1020                                         (__v2df)_mm_setzero_pd(), \
1021                                         (__mmask8)-1, (int)(C), \
1022                                         _MM_FROUND_CUR_DIRECTION))
1023
1024#define _mm_mask_reduce_sd(W, U, A, B, C) \
1025  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1026                                         (__v2df)(__m128d)(B), \
1027                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
1028                                         (int)(C), _MM_FROUND_CUR_DIRECTION))
1029
1030#define _mm_maskz_reduce_sd(U, A, B, C) \
1031  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1032                                         (__v2df)(__m128d)(B), \
1033                                         (__v2df)_mm_setzero_pd(), \
1034                                         (__mmask8)(U), (int)(C), \
1035                                         _MM_FROUND_CUR_DIRECTION))
1036
1037#define _mm_reduce_round_sd(A, B, C, R) \
1038  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1039                                         (__v2df)(__m128d)(B), \
1040                                         (__v2df)_mm_setzero_pd(), \
1041                                         (__mmask8)-1, (int)(C), (int)(R)))
1042
1043#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
1044  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1045                                         (__v2df)(__m128d)(B), \
1046                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
1047                                         (int)(C), (int)(R)))
1048
1049#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
1050  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1051                                         (__v2df)(__m128d)(B), \
1052                                         (__v2df)_mm_setzero_pd(), \
1053                                         (__mmask8)(U), (int)(C), (int)(R)))
1054
1055static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
1056_mm512_movepi32_mask (__m512i __A)
1057{
1058  return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
1059}
1060
1061static __inline__ __m512i __DEFAULT_FN_ATTRS512
1062_mm512_movm_epi32 (__mmask16 __A)
1063{
1064  return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
1065}
1066
1067static __inline__ __m512i __DEFAULT_FN_ATTRS512
1068_mm512_movm_epi64 (__mmask8 __A)
1069{
1070  return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
1071}
1072
1073static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
1074_mm512_movepi64_mask (__m512i __A)
1075{
1076  return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
1077}
1078
1079
1080static __inline__ __m512 __DEFAULT_FN_ATTRS512
1081_mm512_broadcast_f32x2 (__m128 __A)
1082{
1083  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
1084                                         0, 1, 0, 1, 0, 1, 0, 1,
1085                                         0, 1, 0, 1, 0, 1, 0, 1);
1086}
1087
1088static __inline__ __m512 __DEFAULT_FN_ATTRS512
1089_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
1090{
1091  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1092                                             (__v16sf)_mm512_broadcast_f32x2(__A),
1093                                             (__v16sf)__O);
1094}
1095
1096static __inline__ __m512 __DEFAULT_FN_ATTRS512
1097_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
1098{
1099  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1100                                             (__v16sf)_mm512_broadcast_f32x2(__A),
1101                                             (__v16sf)_mm512_setzero_ps());
1102}
1103
1104static __inline__ __m512 __DEFAULT_FN_ATTRS512
1105_mm512_broadcast_f32x8(__m256 __A)
1106{
1107  return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
1108                                         0, 1, 2, 3, 4, 5, 6, 7,
1109                                         0, 1, 2, 3, 4, 5, 6, 7);
1110}
1111
1112static __inline__ __m512 __DEFAULT_FN_ATTRS512
1113_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
1114{
1115  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1116                                           (__v16sf)_mm512_broadcast_f32x8(__A),
1117                                           (__v16sf)__O);
1118}
1119
1120static __inline__ __m512 __DEFAULT_FN_ATTRS512
1121_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
1122{
1123  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1124                                           (__v16sf)_mm512_broadcast_f32x8(__A),
1125                                           (__v16sf)_mm512_setzero_ps());
1126}
1127
1128static __inline__ __m512d __DEFAULT_FN_ATTRS512
1129_mm512_broadcast_f64x2(__m128d __A)
1130{
1131  return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1132                                          0, 1, 0, 1, 0, 1, 0, 1);
1133}
1134
1135static __inline__ __m512d __DEFAULT_FN_ATTRS512
1136_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
1137{
1138  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1139                                            (__v8df)_mm512_broadcast_f64x2(__A),
1140                                            (__v8df)__O);
1141}
1142
1143static __inline__ __m512d __DEFAULT_FN_ATTRS512
1144_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
1145{
1146  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1147                                            (__v8df)_mm512_broadcast_f64x2(__A),
1148                                            (__v8df)_mm512_setzero_pd());
1149}
1150
1151static __inline__ __m512i __DEFAULT_FN_ATTRS512
1152_mm512_broadcast_i32x2 (__m128i __A)
1153{
1154  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1155                                          0, 1, 0, 1, 0, 1, 0, 1,
1156                                          0, 1, 0, 1, 0, 1, 0, 1);
1157}
1158
1159static __inline__ __m512i __DEFAULT_FN_ATTRS512
1160_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1161{
1162  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1163                                             (__v16si)_mm512_broadcast_i32x2(__A),
1164                                             (__v16si)__O);
1165}
1166
1167static __inline__ __m512i __DEFAULT_FN_ATTRS512
1168_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1169{
1170  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1171                                             (__v16si)_mm512_broadcast_i32x2(__A),
1172                                             (__v16si)_mm512_setzero_si512());
1173}
1174
1175static __inline__ __m512i __DEFAULT_FN_ATTRS512
1176_mm512_broadcast_i32x8(__m256i __A)
1177{
1178  return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1179                                          0, 1, 2, 3, 4, 5, 6, 7,
1180                                          0, 1, 2, 3, 4, 5, 6, 7);
1181}
1182
1183static __inline__ __m512i __DEFAULT_FN_ATTRS512
1184_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
1185{
1186  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1187                                           (__v16si)_mm512_broadcast_i32x8(__A),
1188                                           (__v16si)__O);
1189}
1190
1191static __inline__ __m512i __DEFAULT_FN_ATTRS512
1192_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
1193{
1194  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1195                                           (__v16si)_mm512_broadcast_i32x8(__A),
1196                                           (__v16si)_mm512_setzero_si512());
1197}
1198
1199static __inline__ __m512i __DEFAULT_FN_ATTRS512
1200_mm512_broadcast_i64x2(__m128i __A)
1201{
1202  return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1203                                          0, 1, 0, 1, 0, 1, 0, 1);
1204}
1205
1206static __inline__ __m512i __DEFAULT_FN_ATTRS512
1207_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
1208{
1209  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1210                                            (__v8di)_mm512_broadcast_i64x2(__A),
1211                                            (__v8di)__O);
1212}
1213
1214static __inline__ __m512i __DEFAULT_FN_ATTRS512
1215_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
1216{
1217  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1218                                            (__v8di)_mm512_broadcast_i64x2(__A),
1219                                            (__v8di)_mm512_setzero_si512());
1220}
1221
1222#define _mm512_extractf32x8_ps(A, imm) \
1223  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1224                                            (__v8sf)_mm256_undefined_ps(), \
1225                                            (__mmask8)-1))
1226
1227#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
1228  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1229                                            (__v8sf)(__m256)(W), \
1230                                            (__mmask8)(U)))
1231
1232#define _mm512_maskz_extractf32x8_ps(U, A, imm) \
1233  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1234                                            (__v8sf)_mm256_setzero_ps(), \
1235                                            (__mmask8)(U)))
1236
1237#define _mm512_extractf64x2_pd(A, imm) \
1238  ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1239                                                 (int)(imm), \
1240                                                 (__v2df)_mm_undefined_pd(), \
1241                                                 (__mmask8)-1))
1242
1243#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
1244  ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1245                                                 (int)(imm), \
1246                                                 (__v2df)(__m128d)(W), \
1247                                                 (__mmask8)(U)))
1248
1249#define _mm512_maskz_extractf64x2_pd(U, A, imm) \
1250  ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1251                                                 (int)(imm), \
1252                                                 (__v2df)_mm_setzero_pd(), \
1253                                                 (__mmask8)(U)))
1254
1255#define _mm512_extracti32x8_epi32(A, imm) \
1256  ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1257                                             (__v8si)_mm256_undefined_si256(), \
1258                                             (__mmask8)-1))
1259
1260#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
1261  ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1262                                             (__v8si)(__m256i)(W), \
1263                                             (__mmask8)(U)))
1264
1265#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
1266  ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1267                                             (__v8si)_mm256_setzero_si256(), \
1268                                             (__mmask8)(U)))
1269
1270#define _mm512_extracti64x2_epi64(A, imm) \
1271  ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1272                                                (int)(imm), \
1273                                                (__v2di)_mm_undefined_si128(), \
1274                                                (__mmask8)-1))
1275
1276#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
1277  ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1278                                                 (int)(imm), \
1279                                                 (__v2di)(__m128i)(W), \
1280                                                 (__mmask8)(U)))
1281
1282#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
1283  ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1284                                                 (int)(imm), \
1285                                                 (__v2di)_mm_setzero_si128(), \
1286                                                 (__mmask8)(U)))
1287
1288#define _mm512_insertf32x8(A, B, imm) \
1289  ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
1290                                      (__v8sf)(__m256)(B), (int)(imm)))
1291
1292#define _mm512_mask_insertf32x8(W, U, A, B, imm) \
1293  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1294                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1295                                 (__v16sf)(__m512)(W)))
1296
1297#define _mm512_maskz_insertf32x8(U, A, B, imm) \
1298  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1299                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1300                                 (__v16sf)_mm512_setzero_ps()))
1301
1302#define _mm512_insertf64x2(A, B, imm) \
1303  ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
1304                                           (__v2df)(__m128d)(B), (int)(imm)))
1305
1306#define _mm512_mask_insertf64x2(W, U, A, B, imm) \
1307  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1308                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1309                                  (__v8df)(__m512d)(W)))
1310
1311#define _mm512_maskz_insertf64x2(U, A, B, imm) \
1312  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1313                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1314                                  (__v8df)_mm512_setzero_pd()))
1315
1316#define _mm512_inserti32x8(A, B, imm) \
1317  ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
1318                                       (__v8si)(__m256i)(B), (int)(imm)))
1319
1320#define _mm512_mask_inserti32x8(W, U, A, B, imm) \
1321  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1322                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1323                                 (__v16si)(__m512i)(W)))
1324
1325#define _mm512_maskz_inserti32x8(U, A, B, imm) \
1326  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1327                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1328                                 (__v16si)_mm512_setzero_si512()))
1329
1330#define _mm512_inserti64x2(A, B, imm) \
1331  ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
1332                                           (__v2di)(__m128i)(B), (int)(imm)))
1333
1334#define _mm512_mask_inserti64x2(W, U, A, B, imm) \
1335  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1336                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1337                                  (__v8di)(__m512i)(W)))
1338
1339#define _mm512_maskz_inserti64x2(U, A, B, imm) \
1340  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1341                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1342                                  (__v8di)_mm512_setzero_si512()))
1343
1344#define _mm512_mask_fpclass_ps_mask(U, A, imm) \
1345  ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1346                                               (int)(imm), (__mmask16)(U)))
1347
1348#define _mm512_fpclass_ps_mask(A, imm) \
1349  ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1350                                               (int)(imm), (__mmask16)-1))
1351
1352#define _mm512_mask_fpclass_pd_mask(U, A, imm) \
1353  ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1354                                              (__mmask8)(U)))
1355
1356#define _mm512_fpclass_pd_mask(A, imm) \
1357  ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1358                                              (__mmask8)-1))
1359
1360#define _mm_fpclass_sd_mask(A, imm) \
1361  ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1362                                           (__mmask8)-1))
1363
1364#define _mm_mask_fpclass_sd_mask(U, A, imm) \
1365  ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1366                                           (__mmask8)(U)))
1367
1368#define _mm_fpclass_ss_mask(A, imm) \
1369  ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1370                                           (__mmask8)-1))
1371
1372#define _mm_mask_fpclass_ss_mask(U, A, imm) \
1373  ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1374                                           (__mmask8)(U)))
1375
1376#undef __DEFAULT_FN_ATTRS512
1377#undef __DEFAULT_FN_ATTRS
1378
1379#endif