master
  1/*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------===
  2 *
  3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4 * See https://llvm.org/LICENSE.txt for license information.
  5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6 *
  7 *===-----------------------------------------------------------------------===
  8 */
  9#ifndef __IMMINTRIN_H
 10#error                                                                         \
 11    "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
 12#endif // __IMMINTRIN_H
 13
 14#ifndef __AVX10_2SATCVTINTRIN_H
 15#define __AVX10_2SATCVTINTRIN_H
 16
 17#define _mm_ipcvts_bf16_epi8(A)                                                \
 18  ((__m128i)__builtin_ia32_vcvtbf162ibs128((__v8bf)(__m128bh)(A)))
 19
 20#define _mm_mask_ipcvts_bf16_epi8(W, U, A)                                     \
 21  ((__m128i)__builtin_ia32_selectw_128(                                        \
 22      (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epi8(A), (__v8hi)(__m128i)(W)))
 23
 24#define _mm_maskz_ipcvts_bf16_epi8(U, A)                                       \
 25  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
 26                                       (__v8hi)_mm_ipcvts_bf16_epi8(A),        \
 27                                       (__v8hi)_mm_setzero_si128()))
 28
 29#define _mm256_ipcvts_bf16_epi8(A)                                             \
 30  ((__m256i)__builtin_ia32_vcvtbf162ibs256((__v16bf)(__m256bh)(A)))
 31
 32#define _mm256_mask_ipcvts_bf16_epi8(W, U, A)                                  \
 33  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
 34                                       (__v16hi)_mm256_ipcvts_bf16_epi8(A),    \
 35                                       (__v16hi)(__m256i)(W)))
 36
 37#define _mm256_maskz_ipcvts_bf16_epi8(U, A)                                    \
 38  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
 39                                       (__v16hi)_mm256_ipcvts_bf16_epi8(A),    \
 40                                       (__v16hi)_mm256_setzero_si256()))
 41
 42#define _mm_ipcvts_bf16_epu8(A)                                                \
 43  ((__m128i)__builtin_ia32_vcvtbf162iubs128((__v8bf)(__m128bh)(A)))
 44
 45#define _mm_mask_ipcvts_bf16_epu8(W, U, A)                                     \
 46  ((__m128i)__builtin_ia32_selectw_128(                                        \
 47      (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epu8(A), (__v8hi)(__m128i)(W)))
 48
 49#define _mm_maskz_ipcvts_bf16_epu8(U, A)                                       \
 50  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
 51                                       (__v8hi)_mm_ipcvts_bf16_epu8(A),        \
 52                                       (__v8hi)_mm_setzero_si128()))
 53
 54#define _mm256_ipcvts_bf16_epu8(A)                                             \
 55  ((__m256i)__builtin_ia32_vcvtbf162iubs256((__v16bf)(__m256bh)(A)))
 56
 57#define _mm256_mask_ipcvts_bf16_epu8(W, U, A)                                  \
 58  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
 59                                       (__v16hi)_mm256_ipcvts_bf16_epu8(A),    \
 60                                       (__v16hi)(__m256i)(W)))
 61
 62#define _mm256_maskz_ipcvts_bf16_epu8(U, A)                                    \
 63  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
 64                                       (__v16hi)_mm256_ipcvts_bf16_epu8(A),    \
 65                                       (__v16hi)_mm256_setzero_si256()))
 66
 67#define _mm_ipcvts_ph_epi8(A)                                                  \
 68  ((__m128i)__builtin_ia32_vcvtph2ibs128_mask(                                 \
 69      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
 70
 71#define _mm_mask_ipcvts_ph_epi8(W, U, A)                                       \
 72  ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A),            \
 73                                              (__v8hu)(W), (__mmask8)(U)))
 74
 75#define _mm_maskz_ipcvts_ph_epi8(U, A)                                         \
 76  ((__m128i)__builtin_ia32_vcvtph2ibs128_mask(                                 \
 77      (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
 78
 79#define _mm256_ipcvts_ph_epi8(A)                                               \
 80  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
 81      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
 82
 83#define _mm256_mask_ipcvts_ph_epi8(W, U, A)                                    \
 84  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
 85                                              (__v16hu)(W), (__mmask16)(U)))
 86
 87#define _mm256_maskz_ipcvts_ph_epi8(U, A)                                      \
 88  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
 89      (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
 90      (__mmask16)(U)))
 91
 92#define _mm_ipcvts_ph_epu8(A)                                                  \
 93  ((__m128i)__builtin_ia32_vcvtph2iubs128_mask(                                \
 94      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
 95
 96#define _mm_mask_ipcvts_ph_epu8(W, U, A)                                       \
 97  ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A),           \
 98                                               (__v8hu)(W), (__mmask8)(U)))
 99
100#define _mm_maskz_ipcvts_ph_epu8(U, A)                                         \
101  ((__m128i)__builtin_ia32_vcvtph2iubs128_mask(                                \
102      (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
103
104#define _mm256_ipcvts_ph_epu8(A)                                               \
105  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
106      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
107
108#define _mm256_mask_ipcvts_ph_epu8(W, U, A)                                    \
109  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A),          \
110                                               (__v16hu)(W), (__mmask16)(U)))
111
112#define _mm256_maskz_ipcvts_ph_epu8(U, A)                                      \
113  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
114      (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
115      (__mmask16)(U)))
116
117#define _mm_ipcvts_ps_epi8(A)                                                  \
118  ((__m128i)__builtin_ia32_vcvtps2ibs128_mask(                                 \
119      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
120
121#define _mm_mask_ipcvts_ps_epi8(W, U, A)                                       \
122  ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A),             \
123                                              (__v4su)(W), (__mmask8)(U)))
124
125#define _mm_maskz_ipcvts_ps_epi8(U, A)                                         \
126  ((__m128i)__builtin_ia32_vcvtps2ibs128_mask(                                 \
127      (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
128
129#define _mm256_ipcvts_ps_epi8(A)                                               \
130  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
131      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
132
133#define _mm256_mask_ipcvts_ps_epi8(W, U, A)                                    \
134  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
135                                              (__v8su)(W), (__mmask8)(U)))
136
137#define _mm256_maskz_ipcvts_ps_epi8(U, A)                                      \
138  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
139      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
140
141#define _mm_ipcvts_ps_epu8(A)                                                  \
142  ((__m128i)__builtin_ia32_vcvtps2iubs128_mask(                                \
143      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
144
145#define _mm_mask_ipcvts_ps_epu8(W, U, A)                                       \
146  ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A),            \
147                                               (__v4su)(W), (__mmask8)(U)))
148
149#define _mm_maskz_ipcvts_ps_epu8(U, A)                                         \
150  ((__m128i)__builtin_ia32_vcvtps2iubs128_mask(                                \
151      (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
152
153#define _mm256_ipcvts_ps_epu8(A)                                               \
154  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
155      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
156
157#define _mm256_mask_ipcvts_ps_epu8(W, U, A)                                    \
158  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
159                                               (__v8su)(W), (__mmask8)(U)))
160
161#define _mm256_maskz_ipcvts_ps_epu8(U, A)                                      \
162  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
163      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
164
165#define _mm_ipcvtts_bf16_epi8(A)                                               \
166  ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A)))
167
168#define _mm_mask_ipcvtts_bf16_epi8(W, U, A)                                    \
169  ((__m128i)__builtin_ia32_selectw_128(                                        \
170      (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epi8(A), (__v8hi)(__m128i)(W)))
171
172#define _mm_maskz_ipcvtts_bf16_epi8(U, A)                                      \
173  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
174                                       (__v8hi)_mm_ipcvtts_bf16_epi8(A),       \
175                                       (__v8hi)_mm_setzero_si128()))
176
177#define _mm256_ipcvtts_bf16_epi8(A)                                            \
178  ((__m256i)__builtin_ia32_vcvttbf162ibs256((__v16bf)(__m256bh)(A)))
179
180#define _mm256_mask_ipcvtts_bf16_epi8(W, U, A)                                 \
181  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
182                                       (__v16hi)_mm256_ipcvtts_bf16_epi8(A),   \
183                                       (__v16hi)(__m256i)(W)))
184
185#define _mm256_maskz_ipcvtts_bf16_epi8(U, A)                                   \
186  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
187                                       (__v16hi)_mm256_ipcvtts_bf16_epi8(A),   \
188                                       (__v16hi)_mm256_setzero_si256()))
189
190#define _mm_ipcvtts_bf16_epu8(A)                                               \
191  ((__m128i)__builtin_ia32_vcvttbf162iubs128((__v8bf)(__m128bh)(A)))
192
193#define _mm_mask_ipcvtts_bf16_epu8(W, U, A)                                    \
194  ((__m128i)__builtin_ia32_selectw_128(                                        \
195      (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epu8(A), (__v8hi)(__m128i)(W)))
196
197#define _mm_maskz_ipcvtts_bf16_epu8(U, A)                                      \
198  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U),                          \
199                                       (__v8hi)_mm_ipcvtts_bf16_epu8(A),       \
200                                       (__v8hi)_mm_setzero_si128()))
201
202#define _mm256_ipcvtts_bf16_epu8(A)                                            \
203  ((__m256i)__builtin_ia32_vcvttbf162iubs256((__v16bf)(__m256bh)(A)))
204
205#define _mm256_mask_ipcvtts_bf16_epu8(W, U, A)                                 \
206  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
207                                       (__v16hi)_mm256_ipcvtts_bf16_epu8(A),   \
208                                       (__v16hi)(__m256i)(W)))
209
210#define _mm256_maskz_ipcvtts_bf16_epu8(U, A)                                   \
211  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U),                         \
212                                       (__v16hi)_mm256_ipcvtts_bf16_epu8(A),   \
213                                       (__v16hi)_mm256_setzero_si256()))
214
215#define _mm_ipcvtts_ph_epi8(A)                                                 \
216  ((__m128i)__builtin_ia32_vcvttph2ibs128_mask(                                \
217      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
218
219#define _mm_mask_ipcvtts_ph_epi8(W, U, A)                                      \
220  ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A),           \
221                                               (__v8hu)(W), (__mmask8)(U)))
222
223#define _mm_maskz_ipcvtts_ph_epi8(U, A)                                        \
224  ((__m128i)__builtin_ia32_vcvttph2ibs128_mask(                                \
225      (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
226
227#define _mm256_ipcvtts_ph_epi8(A)                                              \
228  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
229      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
230
231#define _mm256_mask_ipcvtts_ph_epi8(W, U, A)                                   \
232  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A),          \
233                                               (__v16hu)(W), (__mmask16)(U)))
234
235#define _mm256_maskz_ipcvtts_ph_epi8(U, A)                                     \
236  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
237      (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
238      (__mmask16)(U)))
239
240#define _mm_ipcvtts_ph_epu8(A)                                                 \
241  ((__m128i)__builtin_ia32_vcvttph2iubs128_mask(                               \
242      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
243
244#define _mm_mask_ipcvtts_ph_epu8(W, U, A)                                      \
245  ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A),          \
246                                                (__v8hu)(W), (__mmask8)(U)))
247
248#define _mm_maskz_ipcvtts_ph_epu8(U, A)                                        \
249  ((__m128i)__builtin_ia32_vcvttph2iubs128_mask(                               \
250      (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
251
252#define _mm256_ipcvtts_ph_epu8(A)                                              \
253  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
254      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
255
256#define _mm256_mask_ipcvtts_ph_epu8(W, U, A)                                   \
257  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A),         \
258                                                (__v16hu)(W), (__mmask16)(U)))
259
260#define _mm256_maskz_ipcvtts_ph_epu8(U, A)                                     \
261  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
262      (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
263      (__mmask16)(U)))
264
265#define _mm_ipcvtts_ps_epi8(A)                                                 \
266  ((__m128i)__builtin_ia32_vcvttps2ibs128_mask(                                \
267      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
268
269#define _mm_mask_ipcvtts_ps_epi8(W, U, A)                                      \
270  ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A),            \
271                                               (__v4su)(W), (__mmask8)(U)))
272
273#define _mm_maskz_ipcvtts_ps_epi8(U, A)                                        \
274  ((__m128i)__builtin_ia32_vcvttps2ibs128_mask(                                \
275      (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
276
277#define _mm256_ipcvtts_ps_epi8(A)                                              \
278  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
279      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
280
281#define _mm256_mask_ipcvtts_ps_epi8(W, U, A)                                   \
282  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A),            \
283                                               (__v8su)(W), (__mmask8)(U)))
284
285#define _mm256_maskz_ipcvtts_ps_epi8(U, A)                                     \
286  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
287      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
288
289#define _mm_ipcvtts_ps_epu8(A)                                                 \
290  ((__m128i)__builtin_ia32_vcvttps2iubs128_mask(                               \
291      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
292
293#define _mm_mask_ipcvtts_ps_epu8(W, U, A)                                      \
294  ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A),           \
295                                                (__v4su)(W), (__mmask8)(U)))
296
297#define _mm_maskz_ipcvtts_ps_epu8(U, A)                                        \
298  ((__m128i)__builtin_ia32_vcvttps2iubs128_mask(                               \
299      (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
300
301#define _mm256_ipcvtts_ps_epu8(A)                                              \
302  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
303      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
304
305#define _mm256_mask_ipcvtts_ps_epu8(W, U, A)                                   \
306  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A),           \
307                                                (__v8su)(W), (__mmask8)(U)))
308
309#define _mm256_maskz_ipcvtts_ps_epu8(U, A)                                     \
310  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
311      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
312#endif // __AVX10_2SATCVTINTRIN_H