master
  1/*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------===
  2 *
  3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4 * See https://llvm.org/LICENSE.txt for license information.
  5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6 *
  7 *===-----------------------------------------------------------------------===
  8 */
  9#ifndef __IMMINTRIN_H
 10#error                                                                         \
 11    "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead."
 12#endif // __IMMINTRIN_H
 13
 14#ifndef __AVX10_2_512SATCVTINTRIN_H
 15#define __AVX10_2_512SATCVTINTRIN_H
 16
 17#define _mm512_ipcvts_bf16_epi8(A)                                             \
 18  ((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
 19
 20#define _mm512_mask_ipcvts_bf16_epi8(W, U, A)                                  \
 21  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 22                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
 23                                       (__v32hi)(__m512i)(W)))
 24
 25#define _mm512_maskz_ipcvts_bf16_epi8(U, A)                                    \
 26  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 27                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
 28                                       (__v32hi)_mm512_setzero_si512()))
 29
 30#define _mm512_ipcvts_bf16_epu8(A)                                             \
 31  ((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
 32
 33#define _mm512_mask_ipcvts_bf16_epu8(W, U, A)                                  \
 34  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 35                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
 36                                       (__v32hi)(__m512i)(W)))
 37
 38#define _mm512_maskz_ipcvts_bf16_epu8(U, A)                                    \
 39  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 40                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
 41                                       (__v32hi)_mm512_setzero_si512()))
 42
 43#define _mm512_ipcvtts_bf16_epi8(A)                                            \
 44  ((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
 45
 46#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A)                                 \
 47  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 48                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
 49                                       (__v32hi)(__m512i)(W)))
 50
 51#define _mm512_maskz_ipcvtts_bf16_epi8(U, A)                                   \
 52  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 53                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
 54                                       (__v32hi)_mm512_setzero_si512()))
 55
 56#define _mm512_ipcvtts_bf16_epu8(A)                                            \
 57  ((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
 58
 59#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A)                                 \
 60  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 61                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
 62                                       (__v32hi)(__m512i)(W)))
 63
 64#define _mm512_maskz_ipcvtts_bf16_epu8(U, A)                                   \
 65  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
 66                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
 67                                       (__v32hi)_mm512_setzero_si512()))
 68
 69#define _mm512_ipcvts_ph_epi8(A)                                               \
 70  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
 71      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
 72      _MM_FROUND_CUR_DIRECTION))
 73
 74#define _mm512_mask_ipcvts_ph_epi8(W, U, A)                                    \
 75  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
 76                                              (__v32hu)(W), (__mmask32)(U),    \
 77                                              _MM_FROUND_CUR_DIRECTION))
 78
 79#define _mm512_maskz_ipcvts_ph_epi8(U, A)                                      \
 80  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
 81      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
 82      _MM_FROUND_CUR_DIRECTION))
 83
 84#define _mm512_ipcvts_roundph_epi8(A, R)                                       \
 85  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
 86                                              (__v32hu)_mm512_setzero_si512(), \
 87                                              (__mmask32) - 1, (const int)R))
 88
 89#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R)                            \
 90  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
 91      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
 92
 93#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R)                              \
 94  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
 95                                              (__v32hu)_mm512_setzero_si512(), \
 96                                              (__mmask32)(U), (const int)R))
 97
 98#define _mm512_ipcvts_ph_epu8(A)                                               \
 99  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
100      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
101      _MM_FROUND_CUR_DIRECTION))
102
103#define _mm512_mask_ipcvts_ph_epu8(W, U, A)                                    \
104  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A),          \
105                                               (__v32hu)(W), (__mmask32)(U),   \
106                                               _MM_FROUND_CUR_DIRECTION))
107
108#define _mm512_maskz_ipcvts_ph_epu8(U, A)                                      \
109  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
110      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
111      _MM_FROUND_CUR_DIRECTION))
112
113#define _mm512_ipcvts_roundph_epu8(A, R)                                       \
114  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
115      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
116      (const int)R))
117
118#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R)                            \
119  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
120      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
121
122#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R)                              \
123  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
124      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
125      (const int)R))
126
127#define _mm512_ipcvts_ps_epi8(A)                                               \
128  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
129      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
130      _MM_FROUND_CUR_DIRECTION))
131
132#define _mm512_mask_ipcvts_ps_epi8(W, U, A)                                    \
133  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
134                                              (__v16su)(W), (__mmask16)(U),    \
135                                              _MM_FROUND_CUR_DIRECTION))
136
137#define _mm512_maskz_ipcvts_ps_epi8(U, A)                                      \
138  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
139      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
140      _MM_FROUND_CUR_DIRECTION))
141
142#define _mm512_ipcvts_roundps_epi8(A, R)                                       \
143  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
144                                              (__v16su)_mm512_setzero_si512(), \
145                                              (__mmask16) - 1, (const int)R))
146
147#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R)                            \
148  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
149      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
150
151#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R)                              \
152  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
153                                              (__v16su)_mm512_setzero_si512(), \
154                                              (__mmask16)(U), (const int)R))
155
156#define _mm512_ipcvts_ps_epu8(A)                                               \
157  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
158      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
159      _MM_FROUND_CUR_DIRECTION))
160
161#define _mm512_mask_ipcvts_ps_epu8(W, U, A)                                    \
162  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A),           \
163                                               (__v16su)(W), (__mmask16)(U),   \
164                                               _MM_FROUND_CUR_DIRECTION))
165
166#define _mm512_maskz_ipcvts_ps_epu8(U, A)                                      \
167  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
168      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
169      _MM_FROUND_CUR_DIRECTION))
170
171#define _mm512_ipcvts_roundps_epu8(A, R)                                       \
172  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
173      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
174      (const int)R))
175
176#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R)                            \
177  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
178      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
179
180#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R)                              \
181  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
182      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
183      (const int)R))
184
185#define _mm512_ipcvtts_ph_epi8(A)                                              \
186  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
187      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
188      _MM_FROUND_CUR_DIRECTION))
189
190#define _mm512_mask_ipcvtts_ph_epi8(W, U, A)                                   \
191  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A),          \
192                                               (__v32hu)(W), (__mmask32)(U),   \
193                                               _MM_FROUND_CUR_DIRECTION))
194
195#define _mm512_maskz_ipcvtts_ph_epi8(U, A)                                     \
196  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
197      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
198      _MM_FROUND_CUR_DIRECTION))
199
200#define _mm512_ipcvtts_roundph_epi8(A, S)                                      \
201  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
202      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
203      S))
204
205#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, S)                           \
206  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
207      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
208
209#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, S)                             \
210  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
211      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
212      S))
213
214#define _mm512_ipcvtts_ph_epu8(A)                                              \
215  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
216      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
217      _MM_FROUND_CUR_DIRECTION))
218
219#define _mm512_mask_ipcvtts_ph_epu8(W, U, A)                                   \
220  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A),         \
221                                                (__v32hu)(W), (__mmask32)(U),  \
222                                                _MM_FROUND_CUR_DIRECTION))
223
224#define _mm512_maskz_ipcvtts_ph_epu8(U, A)                                     \
225  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
226      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
227      _MM_FROUND_CUR_DIRECTION))
228
229#define _mm512_ipcvtts_roundph_epu8(A, S)                                      \
230  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
231      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
232      S))
233
234#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, S)                           \
235  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
236      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
237
238#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, S)                             \
239  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
240      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
241      S))
242
243#define _mm512_ipcvtts_ps_epi8(A)                                              \
244  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
245      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
246      _MM_FROUND_CUR_DIRECTION))
247
248#define _mm512_mask_ipcvtts_ps_epi8(W, U, A)                                   \
249  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A),          \
250                                               (__v16su)(W), (__mmask16)(U),   \
251                                               _MM_FROUND_CUR_DIRECTION))
252
253#define _mm512_maskz_ipcvtts_ps_epi8(U, A)                                     \
254  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
255      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
256      _MM_FROUND_CUR_DIRECTION))
257
258#define _mm512_ipcvtts_roundps_epi8(A, S)                                      \
259  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
260      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
261      S))
262
263#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, S)                           \
264  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
265      (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S))
266
267#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, S)                             \
268  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
269      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
270      S))
271
272#define _mm512_ipcvtts_ps_epu8(A)                                              \
273  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
274      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
275      _MM_FROUND_CUR_DIRECTION))
276
277#define _mm512_mask_ipcvtts_ps_epu8(W, U, A)                                   \
278  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask((__v16sf)(__m512h)(A),         \
279                                                (__v16su)(W), (__mmask16)(U),  \
280                                                _MM_FROUND_CUR_DIRECTION))
281
282#define _mm512_maskz_ipcvtts_ps_epu8(U, A)                                     \
283  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
284      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
285      _MM_FROUND_CUR_DIRECTION))
286
287#define _mm512_ipcvtts_roundps_epu8(A, S)                                      \
288  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
289      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
290      S))
291
292#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, S)                           \
293  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
294      (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S))
295
296#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, S)                             \
297  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
298      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),  \
299      S))
300
301#endif // __AVX10_2_512SATCVTINTRIN_H