zig/lib/include/f16cintrin.h at master

  1/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
  2 *
  3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4 * See https://llvm.org/LICENSE.txt for license information.
  5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6 *
  7 *===-----------------------------------------------------------------------===
  8 */
  9
 10#if !defined __IMMINTRIN_H
 11#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
 12#endif
 13
 14#ifndef __F16CINTRIN_H
 15#define __F16CINTRIN_H
 16
 17/* Define the default attributes for the functions in this file. */
 18#define __DEFAULT_FN_ATTRS128 \
 19  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
 20#define __DEFAULT_FN_ATTRS256 \
 21  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
 22
 23/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
 24 * but that's because icc can emulate these without f16c using a library call.
 25 * Since we don't do that let's leave these in f16cintrin.h.
 26 */
 27
 28/// Converts a 16-bit half-precision float value into a 32-bit float
 29///    value.
 30///
 31/// \headerfile <x86intrin.h>
 32///
 33/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
 34///
 35/// \param __a
 36///    A 16-bit half-precision float value.
 37/// \returns The converted 32-bit float value.
 38static __inline float __DEFAULT_FN_ATTRS128
 39_cvtsh_ss(unsigned short __a)
 40{
 41  __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
 42  __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
 43  return __r[0];
 44}
 45
 46/// Converts a 32-bit single-precision float value to a 16-bit
 47///    half-precision float value.
 48///
 49/// \headerfile <x86intrin.h>
 50///
 51/// \code
 52/// unsigned short _cvtss_sh(float a, const int imm);
 53/// \endcode
 54///
 55/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
 56///
 57/// \param a
 58///    A 32-bit single-precision float value to be converted to a 16-bit
 59///    half-precision float value.
 60/// \param imm
 61///    An immediate value controlling rounding using bits [2:0]: \n
 62///    000: Nearest \n
 63///    001: Down \n
 64///    010: Up \n
 65///    011: Truncate \n
 66///    1XX: Use MXCSR.RC for rounding
 67/// \returns The converted 16-bit half-precision float value.
 68#define _cvtss_sh(a, imm) __extension__ ({ \
 69  (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
 70                                                     (imm)))[0]); })
 71
 72/// Converts a 128-bit vector containing 32-bit float values into a
 73///    128-bit vector containing 16-bit half-precision float values.
 74///
 75/// \headerfile <x86intrin.h>
 76///
 77/// \code
 78/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
 79/// \endcode
 80///
 81/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
 82///
 83/// \param a
 84///    A 128-bit vector containing 32-bit float values.
 85/// \param imm
 86///    An immediate value controlling rounding using bits [2:0]: \n
 87///    000: Nearest \n
 88///    001: Down \n
 89///    010: Up \n
 90///    011: Truncate \n
 91///    1XX: Use MXCSR.RC for rounding
 92/// \returns A 128-bit vector containing converted 16-bit half-precision float
 93///    values. The lower 64 bits are used to store the converted 16-bit
 94///    half-precision floating-point values.
 95#define _mm_cvtps_ph(a, imm) \
 96  ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
 97
 98/// Converts a 128-bit vector containing 16-bit half-precision float
 99///    values into a 128-bit vector containing 32-bit float values.
100///
101/// \headerfile <x86intrin.h>
102///
103/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
104///
105/// \param __a
106///    A 128-bit vector containing 16-bit half-precision float values. The lower
107///    64 bits are used in the conversion.
108/// \returns A 128-bit vector of [4 x float] containing converted float values.
109static __inline __m128 __DEFAULT_FN_ATTRS128
110_mm_cvtph_ps(__m128i __a)
111{
112  return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
113}
114
115/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
116///    containing 16-bit half-precision float values.
117///
118/// \headerfile <x86intrin.h>
119///
120/// \code
121/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
122/// \endcode
123///
124/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
125///
126/// \param a
127///    A 256-bit vector containing 32-bit single-precision float values to be
128///    converted to 16-bit half-precision float values.
129/// \param imm
130///    An immediate value controlling rounding using bits [2:0]: \n
131///    000: Nearest \n
132///    001: Down \n
133///    010: Up \n
134///    011: Truncate \n
135///    1XX: Use MXCSR.RC for rounding
136/// \returns A 128-bit vector containing the converted 16-bit half-precision
137///    float values.
138#define _mm256_cvtps_ph(a, imm) \
139 ((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)))
140
141/// Converts a 128-bit vector containing 16-bit half-precision float
142///    values into a 256-bit vector of [8 x float].
143///
144/// \headerfile <x86intrin.h>
145///
146/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
147///
148/// \param __a
149///    A 128-bit vector containing 16-bit half-precision float values to be
150///    converted to 32-bit single-precision float values.
151/// \returns A vector of [8 x float] containing the converted 32-bit
152///    single-precision float values.
153static __inline __m256 __DEFAULT_FN_ATTRS256
154_mm256_cvtph_ps(__m128i __a)
155{
156  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
157}
158
159#undef __DEFAULT_FN_ATTRS128
160#undef __DEFAULT_FN_ATTRS256
161
162#endif /* __F16CINTRIN_H */