master
1/*! @header
2 * The interfaces declared in this header provide "common" elementwise
3 * operations that are neither math nor logic functions. These are available
4 * only for floating-point vectors and scalars, except for min, max, abs,
5 * clamp, and the reduce operations, which also support integer vectors.
6 *
7 * simd_abs(x) Absolute value of x. Also available as fabs
8 * for floating-point vectors. If x is the
9 * smallest signed integer, x is returned.
10 *
11 * simd_max(x,y) Returns the maximum of x and y. Also available
12 * as fmax for floating-point vectors.
13 *
14 * simd_min(x,y) Returns the minimum of x and y. Also available
15 * as fmin for floating-point vectors.
16 *
17 * simd_clamp(x,min,max) x clamped to the range [min, max].
18 *
19 * simd_sign(x) -1 if x is less than zero, 0 if x is zero or
20 * NaN, and +1 if x is greater than zero.
21 *
22 * simd_mix(x,y,t) If t is not in the range [0,1], the result is
23 * simd_lerp(x,y,t) undefined. Otherwise the result is x+(y-x)*t,
24 * which linearly interpolates between x and y.
25 *
26 * simd_recip(x) An approximation to 1/x. If x is very near the
27 * limits of representable values, or is infinity
28 * or NaN, the result is undefined. There are
29 * two variants of this function:
30 *
31 * simd_precise_recip(x)
32 *
33 * and
34 *
35 * simd_fast_recip(x).
36 *
37 * The "precise" variant is accurate to a few ULPs,
38 * whereas the "fast" variant may have as little
39 * as 11 bits of accuracy in float and about 22
40 * bits in double.
41 *
42 * The function simd_recip(x) resolves to
43 * simd_precise_recip(x) ordinarily, but to
44 * simd_fast_recip(x) when used in a translation
45 * unit compiled with -ffast-math (when
46 * -ffast-math is in effect, you may still use the
47 * precise version of this function by calling it
48 * explicitly by name).
49 *
50 * simd_rsqrt(x) An approximation to 1/sqrt(x). If x is
51 * infinity or NaN, the result is undefined.
52 * There are two variants of this function:
53 *
54 * simd_precise_rsqrt(x)
55 *
56 * and
57 *
58 * simd_fast_rsqrt(x).
59 *
60 * The "precise" variant is accurate to a few ULPs,
61 * whereas the "fast" variant may have as little
62 * as 11 bits of accuracy in float and about 22
63 * bits in double.
64 *
65 * The function simd_rsqrt(x) resolves to
66 * simd_precise_rsqrt(x) ordinarily, but to
67 * simd_fast_rsqrt(x) when used in a translation
68 * unit compiled with -ffast-math (when
69 * -ffast-math is in effect, you may still use the
70 * precise version of this function by calling it
71 * explicitly by name).
72 *
73 * simd_fract(x) The "fractional part" of x, which lies strictly
74 * in the range [0, 0x1.fffffep-1].
75 *
76 * simd_step(edge,x) 0 if x < edge, and 1 otherwise.
77 *
78 * simd_smoothstep(edge0,edge1,x) 0 if x <= edge0, 1 if x >= edge1, and
79 * a Hermite interpolation between 0 and 1 if
80 * edge0 < x < edge1.
81 *
82 * simd_reduce_add(x) Sum of the elements of x.
83 *
84 * simd_reduce_min(x) Minimum of the elements of x.
85 *
86 * simd_reduce_max(x) Maximum of the elements of x.
87 *
88 * simd_equal(x,y) True if and only if every lane of x is equal
89 * to the corresponding lane of y.
90 *
91 * The following common functions are available in the simd:: namespace:
92 *
93 * C++ Function Equivalent C Function
94 * --------------------------------------------------------------------
95 * simd::abs(x) simd_abs(x)
96 * simd::max(x,y) simd_max(x,y)
97 * simd::min(x,y) simd_min(x,y)
98 * simd::clamp(x,min,max) simd_clamp(x,min,max)
99 * simd::sign(x) simd_sign(x)
100 * simd::mix(x,y,t) simd_mix(x,y,t)
101 * simd::lerp(x,y,t) simd_lerp(x,y,t)
102 * simd::recip(x) simd_recip(x)
103 * simd::rsqrt(x) simd_rsqrt(x)
104 * simd::fract(x) simd_fract(x)
105 * simd::step(edge,x) simd_step(edge,x)
106 * simd::smoothstep(e0,e1,x) simd_smoothstep(e0,e1,x)
107 * simd::reduce_add(x) simd_reduce_add(x)
108 * simd::reduce_max(x) simd_reduce_max(x)
109 * simd::reduce_min(x) simd_reduce_min(x)
110 * simd::equal(x,y) simd_equal(x,y)
111 *
112 * simd::precise::recip(x) simd_precise_recip(x)
113 * simd::precise::rsqrt(x) simd_precise_rsqrt(x)
114 *
115 * simd::fast::recip(x) simd_fast_recip(x)
116 * simd::fast::rsqrt(x) simd_fast_rsqrt(x)
117 *
118 * @copyright 2014-2017 Apple, Inc. All rights reserved.
119 * @unsorted */
120
121#ifndef SIMD_COMMON_HEADER
122#define SIMD_COMMON_HEADER
123
124#include <simd/base.h>
125#if SIMD_COMPILER_HAS_REQUIRED_FEATURES
126#include <simd/vector_make.h>
127#include <simd/logic.h>
128#include <simd/math.h>
129
130#ifdef __cplusplus
131extern "C" {
132#endif
133
134/*! @abstract The elementwise absolute value of x. */
135static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x);
136/*! @abstract The elementwise absolute value of x. */
137static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x);
138/*! @abstract The elementwise absolute value of x. */
139static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x);
140/*! @abstract The elementwise absolute value of x. */
141static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x);
142/*! @abstract The elementwise absolute value of x. */
143static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x);
144/*! @abstract The elementwise absolute value of x. */
145static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x);
146/*! @abstract The elementwise absolute value of x. */
147static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x);
148/*! @abstract The elementwise absolute value of x. */
149static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x);
150/*! @abstract The elementwise absolute value of x. */
151static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x);
152/*! @abstract The elementwise absolute value of x. */
153static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x);
154/*! @abstract The elementwise absolute value of x. */
155static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x);
156/*! @abstract The elementwise absolute value of x. */
157static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x);
158/*! @abstract The elementwise absolute value of x. */
159static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x);
160/*! @abstract The elementwise absolute value of x. */
161static inline SIMD_CFUNC simd_half2 simd_abs(simd_half2 x);
162/*! @abstract The elementwise absolute value of x. */
163static inline SIMD_CFUNC simd_half3 simd_abs(simd_half3 x);
164/*! @abstract The elementwise absolute value of x. */
165static inline SIMD_CFUNC simd_half4 simd_abs(simd_half4 x);
166/*! @abstract The elementwise absolute value of x. */
167static inline SIMD_CFUNC simd_half8 simd_abs(simd_half8 x);
168/*! @abstract The elementwise absolute value of x. */
169static inline SIMD_CFUNC simd_half16 simd_abs(simd_half16 x);
170/*! @abstract The elementwise absolute value of x. */
171static inline SIMD_CFUNC simd_half32 simd_abs(simd_half32 x);
172/*! @abstract The elementwise absolute value of x. */
173static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x);
174/*! @abstract The elementwise absolute value of x. */
175static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x);
176/*! @abstract The elementwise absolute value of x. */
177static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x);
178/*! @abstract The elementwise absolute value of x. */
179static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x);
180/*! @abstract The elementwise absolute value of x. */
181static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x);
182/*! @abstract The elementwise absolute value of x. */
183static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x);
184/*! @abstract The elementwise absolute value of x. */
185static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x);
186/*! @abstract The elementwise absolute value of x. */
187static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x);
188/*! @abstract The elementwise absolute value of x. */
189static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x);
190/*! @abstract The elementwise absolute value of x. */
191static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x);
192/*! @abstract The elementwise absolute value of x. */
193static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x);
194/*! @abstract The elementwise absolute value of x. */
195static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x);
196/*! @abstract The elementwise absolute value of x. */
197static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x);
198/*! @abstract The elementwise absolute value of x. */
199static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x);
200/*! @abstract The elementwise absolute value of x. */
201static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x);
202/*! @abstract The elementwise absolute value of x. */
203static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x);
204/*! @abstract The elementwise absolute value of x. */
205static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x);
206/*! @abstract The elementwise absolute value of x. */
207static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x);
208/*! @abstract The elementwise absolute value of x.
209 * @discussion Deprecated. Use simd_abs(x) instead. */
210#define vector_abs simd_abs
211
212/*! @abstract The elementwise maximum of x and y. */
213static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y);
214/*! @abstract The elementwise maximum of x and y. */
215static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y);
216/*! @abstract The elementwise maximum of x and y. */
217static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y);
218/*! @abstract The elementwise maximum of x and y. */
219static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y);
220/*! @abstract The elementwise maximum of x and y. */
221static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y);
222/*! @abstract The elementwise maximum of x and y. */
223static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y);
224/*! @abstract The elementwise maximum of x and y. */
225static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y);
226/*! @abstract The elementwise maximum of x and y. */
227static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y);
228/*! @abstract The elementwise maximum of x and y. */
229static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y);
230/*! @abstract The elementwise maximum of x and y. */
231static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y);
232/*! @abstract The elementwise maximum of x and y. */
233static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y);
234/*! @abstract The elementwise maximum of x and y. */
235static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y);
236/*! @abstract The elementwise maximum of x and y. */
237static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y);
238/*! @abstract The elementwise maximum of x and y. */
239static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y);
240/*! @abstract The elementwise maximum of x and y. */
241static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y);
242/*! @abstract The elementwise maximum of x and y. */
243static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y);
244/*! @abstract The elementwise maximum of x and y. */
245static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y);
246/*! @abstract The elementwise maximum of x and y. */
247static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y);
248/*! @abstract The elementwise maximum of x and y. */
249static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y);
250/*! @abstract The elementwise maximum of x and y. */
251static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y);
252/*! @abstract The elementwise maximum of x and y. */
253static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y);
254/*! @abstract The elementwise maximum of x and y. */
255static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y);
256/*! @abstract The elementwise maximum of x and y. */
257static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y);
258/*! @abstract The elementwise maximum of x and y. */
259static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y);
260/*! @abstract The elementwise maximum of x and y. */
261static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y);
262/*! @abstract The elementwise maximum of x and y. */
263static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y);
264/*! @abstract The elementwise maximum of x and y. */
265static inline SIMD_CFUNC _Float16 simd_max(_Float16 x, _Float16 y);
266/*! @abstract The elementwise maximum of x and y. */
267static inline SIMD_CFUNC simd_half2 simd_max(simd_half2 x, simd_half2 y);
268/*! @abstract The elementwise maximum of x and y. */
269static inline SIMD_CFUNC simd_half3 simd_max(simd_half3 x, simd_half3 y);
270/*! @abstract The elementwise maximum of x and y. */
271static inline SIMD_CFUNC simd_half4 simd_max(simd_half4 x, simd_half4 y);
272/*! @abstract The elementwise maximum of x and y. */
273static inline SIMD_CFUNC simd_half8 simd_max(simd_half8 x, simd_half8 y);
274/*! @abstract The elementwise maximum of x and y. */
275static inline SIMD_CFUNC simd_half16 simd_max(simd_half16 x, simd_half16 y);
276/*! @abstract The elementwise maximum of x and y. */
277static inline SIMD_CFUNC simd_half32 simd_max(simd_half32 x, simd_half32 y);
278/*! @abstract The elementwise maximum of x and y. */
279static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y);
280/*! @abstract The elementwise maximum of x and y. */
281static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y);
282/*! @abstract The elementwise maximum of x and y. */
283static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y);
284/*! @abstract The elementwise maximum of x and y. */
285static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y);
286/*! @abstract The elementwise maximum of x and y. */
287static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y);
288/*! @abstract The elementwise maximum of x and y. */
289static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y);
290/*! @abstract The elementwise maximum of x and y. */
291static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y);
292/*! @abstract The elementwise maximum of x and y. */
293static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y);
294/*! @abstract The elementwise maximum of x and y. */
295static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y);
296/*! @abstract The elementwise maximum of x and y. */
297static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y);
298/*! @abstract The elementwise maximum of x and y. */
299static inline SIMD_CFUNC float simd_max(float x, float y);
300/*! @abstract The elementwise maximum of x and y. */
301static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y);
302/*! @abstract The elementwise maximum of x and y. */
303static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y);
304/*! @abstract The elementwise maximum of x and y. */
305static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y);
306/*! @abstract The elementwise maximum of x and y. */
307static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y);
308/*! @abstract The elementwise maximum of x and y. */
309static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y);
310/*! @abstract The elementwise maximum of x and y. */
311static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y);
312/*! @abstract The elementwise maximum of x and y. */
313static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y);
314/*! @abstract The elementwise maximum of x and y. */
315static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y);
316/*! @abstract The elementwise maximum of x and y. */
317static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y);
318/*! @abstract The elementwise maximum of x and y. */
319static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y);
320/*! @abstract The elementwise maximum of x and y. */
321static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y);
322/*! @abstract The elementwise maximum of x and y. */
323static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y);
324/*! @abstract The elementwise maximum of x and y. */
325static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y);
326/*! @abstract The elementwise maximum of x and y. */
327static inline SIMD_CFUNC double simd_max(double x, double y);
328/*! @abstract The elementwise maximum of x and y. */
329static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y);
330/*! @abstract The elementwise maximum of x and y. */
331static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y);
332/*! @abstract The elementwise maximum of x and y. */
333static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y);
334/*! @abstract The elementwise maximum of x and y. */
335static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y);
336/*! @abstract The elementwise maximum of x and y.
337 * @discussion Deprecated. Use simd_max(x,y) instead. */
338#define vector_max simd_max
339
340/*! @abstract The elementwise minimum of x and y. */
341static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y);
342/*! @abstract The elementwise minimum of x and y. */
343static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y);
344/*! @abstract The elementwise minimum of x and y. */
345static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y);
346/*! @abstract The elementwise minimum of x and y. */
347static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y);
348/*! @abstract The elementwise minimum of x and y. */
349static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y);
350/*! @abstract The elementwise minimum of x and y. */
351static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y);
352/*! @abstract The elementwise minimum of x and y. */
353static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y);
354/*! @abstract The elementwise minimum of x and y. */
355static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y);
356/*! @abstract The elementwise minimum of x and y. */
357static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y);
358/*! @abstract The elementwise minimum of x and y. */
359static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y);
360/*! @abstract The elementwise minimum of x and y. */
361static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y);
362/*! @abstract The elementwise minimum of x and y. */
363static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y);
364/*! @abstract The elementwise minimum of x and y. */
365static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y);
366/*! @abstract The elementwise minimum of x and y. */
367static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y);
368/*! @abstract The elementwise minimum of x and y. */
369static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y);
370/*! @abstract The elementwise minimum of x and y. */
371static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y);
372/*! @abstract The elementwise minimum of x and y. */
373static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y);
374/*! @abstract The elementwise minimum of x and y. */
375static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y);
376/*! @abstract The elementwise minimum of x and y. */
377static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y);
378/*! @abstract The elementwise minimum of x and y. */
379static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y);
380/*! @abstract The elementwise minimum of x and y. */
381static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y);
382/*! @abstract The elementwise minimum of x and y. */
383static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y);
384/*! @abstract The elementwise minimum of x and y. */
385static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y);
386/*! @abstract The elementwise minimum of x and y. */
387static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y);
388/*! @abstract The elementwise minimum of x and y. */
389static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y);
390/*! @abstract The elementwise minimum of x and y. */
391static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y);
392/*! @abstract The elementwise minimum of x and y. */
393static inline SIMD_CFUNC _Float16 simd_min(_Float16 x, _Float16 y);
394/*! @abstract The elementwise minimum of x and y. */
395static inline SIMD_CFUNC simd_half2 simd_min(simd_half2 x, simd_half2 y);
396/*! @abstract The elementwise minimum of x and y. */
397static inline SIMD_CFUNC simd_half3 simd_min(simd_half3 x, simd_half3 y);
398/*! @abstract The elementwise minimum of x and y. */
399static inline SIMD_CFUNC simd_half4 simd_min(simd_half4 x, simd_half4 y);
400/*! @abstract The elementwise minimum of x and y. */
401static inline SIMD_CFUNC simd_half8 simd_min(simd_half8 x, simd_half8 y);
402/*! @abstract The elementwise minimum of x and y. */
403static inline SIMD_CFUNC simd_half16 simd_min(simd_half16 x, simd_half16 y);
404/*! @abstract The elementwise minimum of x and y. */
405static inline SIMD_CFUNC simd_half32 simd_min(simd_half32 x, simd_half32 y);
406/*! @abstract The elementwise minimum of x and y. */
407static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y);
408/*! @abstract The elementwise minimum of x and y. */
409static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y);
410/*! @abstract The elementwise minimum of x and y. */
411static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y);
412/*! @abstract The elementwise minimum of x and y. */
413static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y);
414/*! @abstract The elementwise minimum of x and y. */
415static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y);
416/*! @abstract The elementwise minimum of x and y. */
417static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y);
418/*! @abstract The elementwise minimum of x and y. */
419static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y);
420/*! @abstract The elementwise minimum of x and y. */
421static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y);
422/*! @abstract The elementwise minimum of x and y. */
423static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y);
424/*! @abstract The elementwise minimum of x and y. */
425static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y);
426/*! @abstract The elementwise minimum of x and y. */
427static inline SIMD_CFUNC float simd_min(float x, float y);
428/*! @abstract The elementwise minimum of x and y. */
429static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y);
430/*! @abstract The elementwise minimum of x and y. */
431static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y);
432/*! @abstract The elementwise minimum of x and y. */
433static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y);
434/*! @abstract The elementwise minimum of x and y. */
435static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y);
436/*! @abstract The elementwise minimum of x and y. */
437static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y);
438/*! @abstract The elementwise minimum of x and y. */
439static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y);
440/*! @abstract The elementwise minimum of x and y. */
441static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y);
442/*! @abstract The elementwise minimum of x and y. */
443static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y);
444/*! @abstract The elementwise minimum of x and y. */
445static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y);
446/*! @abstract The elementwise minimum of x and y. */
447static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y);
448/*! @abstract The elementwise minimum of x and y. */
449static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y);
450/*! @abstract The elementwise minimum of x and y. */
451static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y);
452/*! @abstract The elementwise minimum of x and y. */
453static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y);
454/*! @abstract The elementwise minimum of x and y. */
455static inline SIMD_CFUNC double simd_min(double x, double y);
456/*! @abstract The elementwise minimum of x and y. */
457static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y);
458/*! @abstract The elementwise minimum of x and y. */
459static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y);
460/*! @abstract The elementwise minimum of x and y. */
461static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y);
462/*! @abstract The elementwise minimum of x and y. */
463static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y);
464/*! @abstract The elementwise minimum of x and y.
465 * @discussion Deprecated. Use simd_min(x,y) instead. */
466#define vector_min simd_min
467
468
469/*! @abstract x clamped to the range [min, max].
470 * @discussion Note that if you want to clamp all lanes to the same range,
471 * you can use a scalar value for min and max. */
472static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max);
473/*! @abstract x clamped to the range [min, max].
474 * @discussion Note that if you want to clamp all lanes to the same range,
475 * you can use a scalar value for min and max. */
476static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max);
477/*! @abstract x clamped to the range [min, max].
478 * @discussion Note that if you want to clamp all lanes to the same range,
479 * you can use a scalar value for min and max. */
480static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max);
481/*! @abstract x clamped to the range [min, max].
482 * @discussion Note that if you want to clamp all lanes to the same range,
483 * you can use a scalar value for min and max. */
484static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max);
485/*! @abstract x clamped to the range [min, max].
486 * @discussion Note that if you want to clamp all lanes to the same range,
487 * you can use a scalar value for min and max. */
488static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max);
489/*! @abstract x clamped to the range [min, max].
490 * @discussion Note that if you want to clamp all lanes to the same range,
491 * you can use a scalar value for min and max. */
492static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max);
493/*! @abstract x clamped to the range [min, max].
494 * @discussion Note that if you want to clamp all lanes to the same range,
495 * you can use a scalar value for min and max. */
496static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max);
497/*! @abstract x clamped to the range [min, max].
498 * @discussion Note that if you want to clamp all lanes to the same range,
499 * you can use a scalar value for min and max. */
500static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max);
501/*! @abstract x clamped to the range [min, max].
502 * @discussion Note that if you want to clamp all lanes to the same range,
503 * you can use a scalar value for min and max. */
504static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max);
505/*! @abstract x clamped to the range [min, max].
506 * @discussion Note that if you want to clamp all lanes to the same range,
507 * you can use a scalar value for min and max. */
508static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max);
509/*! @abstract x clamped to the range [min, max].
510 * @discussion Note that if you want to clamp all lanes to the same range,
511 * you can use a scalar value for min and max. */
512static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max);
513/*! @abstract x clamped to the range [min, max].
514 * @discussion Note that if you want to clamp all lanes to the same range,
515 * you can use a scalar value for min and max. */
516static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max);
517/*! @abstract x clamped to the range [min, max].
518 * @discussion Note that if you want to clamp all lanes to the same range,
519 * you can use a scalar value for min and max. */
520static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max);
521/*! @abstract x clamped to the range [min, max].
522 * @discussion Note that if you want to clamp all lanes to the same range,
523 * you can use a scalar value for min and max. */
524static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max);
525/*! @abstract x clamped to the range [min, max].
526 * @discussion Note that if you want to clamp all lanes to the same range,
527 * you can use a scalar value for min and max. */
528static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max);
529/*! @abstract x clamped to the range [min, max].
530 * @discussion Note that if you want to clamp all lanes to the same range,
531 * you can use a scalar value for min and max. */
532static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max);
533/*! @abstract x clamped to the range [min, max].
534 * @discussion Note that if you want to clamp all lanes to the same range,
535 * you can use a scalar value for min and max. */
536static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max);
537/*! @abstract x clamped to the range [min, max].
538 * @discussion Note that if you want to clamp all lanes to the same range,
539 * you can use a scalar value for min and max. */
540static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max);
541/*! @abstract x clamped to the range [min, max].
542 * @discussion Note that if you want to clamp all lanes to the same range,
543 * you can use a scalar value for min and max. */
544static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max);
545/*! @abstract x clamped to the range [min, max].
546 * @discussion Note that if you want to clamp all lanes to the same range,
547 * you can use a scalar value for min and max. */
548static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max);
549/*! @abstract x clamped to the range [min, max].
550 * @discussion Note that if you want to clamp all lanes to the same range,
551 * you can use a scalar value for min and max. */
552static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max);
553/*! @abstract x clamped to the range [min, max].
554 * @discussion Note that if you want to clamp all lanes to the same range,
555 * you can use a scalar value for min and max. */
556static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max);
557/*! @abstract x clamped to the range [min, max].
558 * @discussion Note that if you want to clamp all lanes to the same range,
559 * you can use a scalar value for min and max. */
560static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max);
561/*! @abstract x clamped to the range [min, max].
562 * @discussion Note that if you want to clamp all lanes to the same range,
563 * you can use a scalar value for min and max. */
564static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max);
565/*! @abstract x clamped to the range [min, max].
566 * @discussion Note that if you want to clamp all lanes to the same range,
567 * you can use a scalar value for min and max. */
568static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max);
569/*! @abstract x clamped to the range [min, max].
570 * @discussion Note that if you want to clamp all lanes to the same range,
571 * you can use a scalar value for min and max. */
572static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max);
573/*! @abstract x clamped to the range [min, max].
574 * @discussion Note that if you want to clamp all lanes to the same range,
575 * you can use a scalar value for min and max. */
576static inline SIMD_CFUNC _Float16 simd_clamp(_Float16 x, _Float16 min, _Float16 max);
577/*! @abstract x clamped to the range [min, max].
578 * @discussion Note that if you want to clamp all lanes to the same range,
579 * you can use a scalar value for min and max. */
580static inline SIMD_CFUNC simd_half2 simd_clamp(simd_half2 x, simd_half2 min, simd_half2 max);
581/*! @abstract x clamped to the range [min, max].
582 * @discussion Note that if you want to clamp all lanes to the same range,
583 * you can use a scalar value for min and max. */
584static inline SIMD_CFUNC simd_half3 simd_clamp(simd_half3 x, simd_half3 min, simd_half3 max);
585/*! @abstract x clamped to the range [min, max].
586 * @discussion Note that if you want to clamp all lanes to the same range,
587 * you can use a scalar value for min and max. */
588static inline SIMD_CFUNC simd_half4 simd_clamp(simd_half4 x, simd_half4 min, simd_half4 max);
589/*! @abstract x clamped to the range [min, max].
590 * @discussion Note that if you want to clamp all lanes to the same range,
591 * you can use a scalar value for min and max. */
592static inline SIMD_CFUNC simd_half8 simd_clamp(simd_half8 x, simd_half8 min, simd_half8 max);
593/*! @abstract x clamped to the range [min, max].
594 * @discussion Note that if you want to clamp all lanes to the same range,
595 * you can use a scalar value for min and max. */
596static inline SIMD_CFUNC simd_half16 simd_clamp(simd_half16 x, simd_half16 min, simd_half16 max);
597/*! @abstract x clamped to the range [min, max].
598 * @discussion Note that if you want to clamp all lanes to the same range,
599 * you can use a scalar value for min and max. */
600static inline SIMD_CFUNC simd_half32 simd_clamp(simd_half32 x, simd_half32 min, simd_half32 max);
601/*! @abstract x clamped to the range [min, max].
602 * @discussion Note that if you want to clamp all lanes to the same range,
603 * you can use a scalar value for min and max. */
604static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max);
605/*! @abstract x clamped to the range [min, max].
606 * @discussion Note that if you want to clamp all lanes to the same range,
607 * you can use a scalar value for min and max. */
608static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max);
609/*! @abstract x clamped to the range [min, max].
610 * @discussion Note that if you want to clamp all lanes to the same range,
611 * you can use a scalar value for min and max. */
612static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max);
613/*! @abstract x clamped to the range [min, max].
614 * @discussion Note that if you want to clamp all lanes to the same range,
615 * you can use a scalar value for min and max. */
616static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max);
617/*! @abstract x clamped to the range [min, max].
618 * @discussion Note that if you want to clamp all lanes to the same range,
619 * you can use a scalar value for min and max. */
620static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max);
621/*! @abstract x clamped to the range [min, max].
622 * @discussion Note that if you want to clamp all lanes to the same range,
623 * you can use a scalar value for min and max. */
624static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max);
625/*! @abstract x clamped to the range [min, max].
626 * @discussion Note that if you want to clamp all lanes to the same range,
627 * you can use a scalar value for min and max. */
628static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max);
629/*! @abstract x clamped to the range [min, max].
630 * @discussion Note that if you want to clamp all lanes to the same range,
631 * you can use a scalar value for min and max. */
632static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max);
633/*! @abstract x clamped to the range [min, max].
634 * @discussion Note that if you want to clamp all lanes to the same range,
635 * you can use a scalar value for min and max. */
636static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max);
637/*! @abstract x clamped to the range [min, max].
638 * @discussion Note that if you want to clamp all lanes to the same range,
639 * you can use a scalar value for min and max. */
640static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max);
641/*! @abstract x clamped to the range [min, max].
642 * @discussion Note that if you want to clamp all lanes to the same range,
643 * you can use a scalar value for min and max. */
644static inline SIMD_CFUNC float simd_clamp(float x, float min, float max);
645/*! @abstract x clamped to the range [min, max].
646 * @discussion Note that if you want to clamp all lanes to the same range,
647 * you can use a scalar value for min and max. */
648static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max);
649/*! @abstract x clamped to the range [min, max].
650 * @discussion Note that if you want to clamp all lanes to the same range,
651 * you can use a scalar value for min and max. */
652static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max);
653/*! @abstract x clamped to the range [min, max].
654 * @discussion Note that if you want to clamp all lanes to the same range,
655 * you can use a scalar value for min and max. */
656static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max);
657/*! @abstract x clamped to the range [min, max].
658 * @discussion Note that if you want to clamp all lanes to the same range,
659 * you can use a scalar value for min and max. */
660static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max);
661/*! @abstract x clamped to the range [min, max].
662 * @discussion Note that if you want to clamp all lanes to the same range,
663 * you can use a scalar value for min and max. */
664static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max);
665/*! @abstract x clamped to the range [min, max].
666 * @discussion Note that if you want to clamp all lanes to the same range,
667 * you can use a scalar value for min and max. */
668static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max);
669/*! @abstract x clamped to the range [min, max].
670 * @discussion Note that if you want to clamp all lanes to the same range,
671 * you can use a scalar value for min and max. */
672static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max);
673/*! @abstract x clamped to the range [min, max].
674 * @discussion Note that if you want to clamp all lanes to the same range,
675 * you can use a scalar value for min and max. */
676static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max);
677/*! @abstract x clamped to the range [min, max].
678 * @discussion Note that if you want to clamp all lanes to the same range,
679 * you can use a scalar value for min and max. */
680static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max);
681/*! @abstract x clamped to the range [min, max].
682 * @discussion Note that if you want to clamp all lanes to the same range,
683 * you can use a scalar value for min and max. */
684static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max);
685/*! @abstract x clamped to the range [min, max].
686 * @discussion Note that if you want to clamp all lanes to the same range,
687 * you can use a scalar value for min and max. */
688static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max);
689/*! @abstract x clamped to the range [min, max].
690 * @discussion Note that if you want to clamp all lanes to the same range,
691 * you can use a scalar value for min and max. */
692static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max);
693/*! @abstract x clamped to the range [min, max].
694 * @discussion Note that if you want to clamp all lanes to the same range,
695 * you can use a scalar value for min and max. */
696static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max);
697/*! @abstract x clamped to the range [min, max].
698 * @discussion Note that if you want to clamp all lanes to the same range,
699 * you can use a scalar value for min and max. */
700static inline SIMD_CFUNC double simd_clamp(double x, double min, double max);
701/*! @abstract x clamped to the range [min, max].
702 * @discussion Note that if you want to clamp all lanes to the same range,
703 * you can use a scalar value for min and max. */
704static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max);
705/*! @abstract x clamped to the range [min, max].
706 * @discussion Note that if you want to clamp all lanes to the same range,
707 * you can use a scalar value for min and max. */
708static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max);
709/*! @abstract x clamped to the range [min, max].
710 * @discussion Note that if you want to clamp all lanes to the same range,
711 * you can use a scalar value for min and max. */
712static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max);
713/*! @abstract x clamped to the range [min, max].
714 * @discussion Note that if you want to clamp all lanes to the same range,
715 * you can use a scalar value for min and max. */
716static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max);
717/*! @abstract x clamped to the range [min, max].
718 * @discussion Deprecated. Use simd_clamp(x,min,max) instead. */
719#define vector_clamp simd_clamp
720
721/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
722static inline SIMD_CFUNC _Float16 simd_sign(_Float16 x);
723/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
724static inline SIMD_CFUNC simd_half2 simd_sign(simd_half2 x);
725/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
726static inline SIMD_CFUNC simd_half3 simd_sign(simd_half3 x);
727/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
728static inline SIMD_CFUNC simd_half4 simd_sign(simd_half4 x);
729/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
730static inline SIMD_CFUNC simd_half8 simd_sign(simd_half8 x);
731/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
732static inline SIMD_CFUNC simd_half16 simd_sign(simd_half16 x);
733/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
734static inline SIMD_CFUNC simd_half32 simd_sign(simd_half32 x);
735/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
736static inline SIMD_CFUNC float simd_sign(float x);
737/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
738static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x);
739/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
740static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x);
741/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
742static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x);
743/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
744static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x);
745/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
746static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x);
747/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
748static inline SIMD_CFUNC double simd_sign(double x);
749/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
750static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x);
751/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
752static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x);
753/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
754static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x);
755/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */
756static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x);
757/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.
758 * @discussion Deprecated. Use simd_sign(x) instead. */
759#define vector_sign simd_sign
760
761/*! @abstract Linearly interpolates between x and y, taking the value x when
762 * t=0 and y when t=1 */
763static inline SIMD_CFUNC _Float16 simd_mix(_Float16 x, _Float16 y, _Float16 t);
764/*! @abstract Linearly interpolates between x and y, taking the value x when
765 * t=0 and y when t=1 */
766static inline SIMD_CFUNC simd_half2 simd_mix(simd_half2 x, simd_half2 y, simd_half2 t);
767/*! @abstract Linearly interpolates between x and y, taking the value x when
768 * t=0 and y when t=1 */
769static inline SIMD_CFUNC simd_half3 simd_mix(simd_half3 x, simd_half3 y, simd_half3 t);
770/*! @abstract Linearly interpolates between x and y, taking the value x when
771 * t=0 and y when t=1 */
772static inline SIMD_CFUNC simd_half4 simd_mix(simd_half4 x, simd_half4 y, simd_half4 t);
773/*! @abstract Linearly interpolates between x and y, taking the value x when
774 * t=0 and y when t=1 */
775static inline SIMD_CFUNC simd_half8 simd_mix(simd_half8 x, simd_half8 y, simd_half8 t);
776/*! @abstract Linearly interpolates between x and y, taking the value x when
777 * t=0 and y when t=1 */
778static inline SIMD_CFUNC simd_half16 simd_mix(simd_half16 x, simd_half16 y, simd_half16 t);
779/*! @abstract Linearly interpolates between x and y, taking the value x when
780 * t=0 and y when t=1 */
781static inline SIMD_CFUNC simd_half32 simd_mix(simd_half32 x, simd_half32 y, simd_half32 t);
782/*! @abstract Linearly interpolates between x and y, taking the value x when
783 * t=0 and y when t=1 */
784static inline SIMD_CFUNC float simd_mix(float x, float y, float t);
785/*! @abstract Linearly interpolates between x and y, taking the value x when
786 * t=0 and y when t=1 */
787static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t);
788/*! @abstract Linearly interpolates between x and y, taking the value x when
789 * t=0 and y when t=1 */
790static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t);
791/*! @abstract Linearly interpolates between x and y, taking the value x when
792 * t=0 and y when t=1 */
793static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t);
794/*! @abstract Linearly interpolates between x and y, taking the value x when
795 * t=0 and y when t=1 */
796static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t);
797/*! @abstract Linearly interpolates between x and y, taking the value x when
798 * t=0 and y when t=1 */
799static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t);
800/*! @abstract Linearly interpolates between x and y, taking the value x when
801 * t=0 and y when t=1 */
802static inline SIMD_CFUNC double simd_mix(double x, double y, double t);
803/*! @abstract Linearly interpolates between x and y, taking the value x when
804 * t=0 and y when t=1 */
805static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t);
806/*! @abstract Linearly interpolates between x and y, taking the value x when
807 * t=0 and y when t=1 */
808static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t);
809/*! @abstract Linearly interpolates between x and y, taking the value x when
810 * t=0 and y when t=1 */
811static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t);
812/*! @abstract Linearly interpolates between x and y, taking the value x when
813 * t=0 and y when t=1 */
814static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t);
815/*! @abstract Linearly interpolates between x and y, taking the value x when
816 * t=0 and y when t=1
817 * @discussion Deprecated. Use simd_mix(x, y, t) instead. */
818#define vector_mix simd_mix
819#define simd_lerp simd_mix
820
821/*! @abstract A good approximation to 1/x.
822 * @discussion If x is very close to the limits of representation, the
823 * result may overflow or underflow; otherwise this function is accurate to
824 * a few units in the last place (ULPs). */
825static inline SIMD_CFUNC _Float16 simd_precise_recip(_Float16 x);
826/*! @abstract A good approximation to 1/x.
827 * @discussion If x is very close to the limits of representation, the
828 * result may overflow or underflow; otherwise this function is accurate to
829 * a few units in the last place (ULPs). */
830static inline SIMD_CFUNC simd_half2 simd_precise_recip(simd_half2 x);
831/*! @abstract A good approximation to 1/x.
832 * @discussion If x is very close to the limits of representation, the
833 * result may overflow or underflow; otherwise this function is accurate to
834 * a few units in the last place (ULPs). */
835static inline SIMD_CFUNC simd_half3 simd_precise_recip(simd_half3 x);
836/*! @abstract A good approximation to 1/x.
837 * @discussion If x is very close to the limits of representation, the
838 * result may overflow or underflow; otherwise this function is accurate to
839 * a few units in the last place (ULPs). */
840static inline SIMD_CFUNC simd_half4 simd_precise_recip(simd_half4 x);
841/*! @abstract A good approximation to 1/x.
842 * @discussion If x is very close to the limits of representation, the
843 * result may overflow or underflow; otherwise this function is accurate to
844 * a few units in the last place (ULPs). */
845static inline SIMD_CFUNC simd_half8 simd_precise_recip(simd_half8 x);
846/*! @abstract A good approximation to 1/x.
847 * @discussion If x is very close to the limits of representation, the
848 * result may overflow or underflow; otherwise this function is accurate to
849 * a few units in the last place (ULPs). */
850static inline SIMD_CFUNC simd_half16 simd_precise_recip(simd_half16 x);
851/*! @abstract A good approximation to 1/x.
852 * @discussion If x is very close to the limits of representation, the
853 * result may overflow or underflow; otherwise this function is accurate to
854 * a few units in the last place (ULPs). */
855static inline SIMD_CFUNC simd_half32 simd_precise_recip(simd_half32 x);
856/*! @abstract A good approximation to 1/x.
857 * @discussion If x is very close to the limits of representation, the
858 * result may overflow or underflow; otherwise this function is accurate to
859 * a few units in the last place (ULPs). */
860static inline SIMD_CFUNC float simd_precise_recip(float x);
861/*! @abstract A good approximation to 1/x.
862 * @discussion If x is very close to the limits of representation, the
863 * result may overflow or underflow; otherwise this function is accurate to
864 * a few units in the last place (ULPs). */
865static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x);
866/*! @abstract A good approximation to 1/x.
867 * @discussion If x is very close to the limits of representation, the
868 * result may overflow or underflow; otherwise this function is accurate to
869 * a few units in the last place (ULPs). */
870static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x);
871/*! @abstract A good approximation to 1/x.
872 * @discussion If x is very close to the limits of representation, the
873 * result may overflow or underflow; otherwise this function is accurate to
874 * a few units in the last place (ULPs). */
875static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x);
876/*! @abstract A good approximation to 1/x.
877 * @discussion If x is very close to the limits of representation, the
878 * result may overflow or underflow; otherwise this function is accurate to
879 * a few units in the last place (ULPs). */
880static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x);
881/*! @abstract A good approximation to 1/x.
882 * @discussion If x is very close to the limits of representation, the
883 * result may overflow or underflow; otherwise this function is accurate to
884 * a few units in the last place (ULPs). */
885static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x);
886/*! @abstract A good approximation to 1/x.
887 * @discussion If x is very close to the limits of representation, the
888 * result may overflow or underflow; otherwise this function is accurate to
889 * a few units in the last place (ULPs). */
890static inline SIMD_CFUNC double simd_precise_recip(double x);
891/*! @abstract A good approximation to 1/x.
892 * @discussion If x is very close to the limits of representation, the
893 * result may overflow or underflow; otherwise this function is accurate to
894 * a few units in the last place (ULPs). */
895static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x);
896/*! @abstract A good approximation to 1/x.
897 * @discussion If x is very close to the limits of representation, the
898 * result may overflow or underflow; otherwise this function is accurate to
899 * a few units in the last place (ULPs). */
900static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x);
901/*! @abstract A good approximation to 1/x.
902 * @discussion If x is very close to the limits of representation, the
903 * result may overflow or underflow; otherwise this function is accurate to
904 * a few units in the last place (ULPs). */
905static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x);
906/*! @abstract A good approximation to 1/x.
907 * @discussion If x is very close to the limits of representation, the
908 * result may overflow or underflow; otherwise this function is accurate to
909 * a few units in the last place (ULPs). */
910static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x);
911/*! @abstract A good approximation to 1/x.
912 * @discussion Deprecated. Use simd_precise_recip(x) instead. */
913#define vector_precise_recip simd_precise_recip
914
915/*! @abstract A fast approximation to 1/x.
916 * @discussion If x is very close to the limits of representation, the
917 * result may overflow or underflow; otherwise this function is accurate to
918 * at least 11 bits for float and 22 bits for double. */
919static inline SIMD_CFUNC _Float16 simd_fast_recip(_Float16 x);
920/*! @abstract A fast approximation to 1/x.
921 * @discussion If x is very close to the limits of representation, the
922 * result may overflow or underflow; otherwise this function is accurate to
923 * at least 11 bits for float and 22 bits for double. */
924static inline SIMD_CFUNC simd_half2 simd_fast_recip(simd_half2 x);
925/*! @abstract A fast approximation to 1/x.
926 * @discussion If x is very close to the limits of representation, the
927 * result may overflow or underflow; otherwise this function is accurate to
928 * at least 11 bits for float and 22 bits for double. */
929static inline SIMD_CFUNC simd_half3 simd_fast_recip(simd_half3 x);
930/*! @abstract A fast approximation to 1/x.
931 * @discussion If x is very close to the limits of representation, the
932 * result may overflow or underflow; otherwise this function is accurate to
933 * at least 11 bits for float and 22 bits for double. */
934static inline SIMD_CFUNC simd_half4 simd_fast_recip(simd_half4 x);
935/*! @abstract A fast approximation to 1/x.
936 * @discussion If x is very close to the limits of representation, the
937 * result may overflow or underflow; otherwise this function is accurate to
938 * at least 11 bits for float and 22 bits for double. */
939static inline SIMD_CFUNC simd_half8 simd_fast_recip(simd_half8 x);
940/*! @abstract A fast approximation to 1/x.
941 * @discussion If x is very close to the limits of representation, the
942 * result may overflow or underflow; otherwise this function is accurate to
943 * at least 11 bits for float and 22 bits for double. */
944static inline SIMD_CFUNC simd_half16 simd_fast_recip(simd_half16 x);
945/*! @abstract A fast approximation to 1/x.
946 * @discussion If x is very close to the limits of representation, the
947 * result may overflow or underflow; otherwise this function is accurate to
948 * at least 11 bits for float and 22 bits for double. */
949static inline SIMD_CFUNC simd_half32 simd_fast_recip(simd_half32 x);
950/*! @abstract A fast approximation to 1/x.
951 * @discussion If x is very close to the limits of representation, the
952 * result may overflow or underflow; otherwise this function is accurate to
953 * at least 11 bits for float and 22 bits for double. */
954static inline SIMD_CFUNC float simd_fast_recip(float x);
955/*! @abstract A fast approximation to 1/x.
956 * @discussion If x is very close to the limits of representation, the
957 * result may overflow or underflow; otherwise this function is accurate to
958 * at least 11 bits for float and 22 bits for double. */
959static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x);
960/*! @abstract A fast approximation to 1/x.
961 * @discussion If x is very close to the limits of representation, the
962 * result may overflow or underflow; otherwise this function is accurate to
963 * at least 11 bits for float and 22 bits for double. */
964static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x);
965/*! @abstract A fast approximation to 1/x.
966 * @discussion If x is very close to the limits of representation, the
967 * result may overflow or underflow; otherwise this function is accurate to
968 * at least 11 bits for float and 22 bits for double. */
969static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x);
970/*! @abstract A fast approximation to 1/x.
971 * @discussion If x is very close to the limits of representation, the
972 * result may overflow or underflow; otherwise this function is accurate to
973 * at least 11 bits for float and 22 bits for double. */
974static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x);
975/*! @abstract A fast approximation to 1/x.
976 * @discussion If x is very close to the limits of representation, the
977 * result may overflow or underflow; otherwise this function is accurate to
978 * at least 11 bits for float and 22 bits for double. */
979static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x);
980/*! @abstract A fast approximation to 1/x.
981 * @discussion If x is very close to the limits of representation, the
982 * result may overflow or underflow; otherwise this function is accurate to
983 * at least 11 bits for float and 22 bits for double. */
984static inline SIMD_CFUNC double simd_fast_recip(double x);
985/*! @abstract A fast approximation to 1/x.
986 * @discussion If x is very close to the limits of representation, the
987 * result may overflow or underflow; otherwise this function is accurate to
988 * at least 11 bits for float and 22 bits for double. */
989static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x);
990/*! @abstract A fast approximation to 1/x.
991 * @discussion If x is very close to the limits of representation, the
992 * result may overflow or underflow; otherwise this function is accurate to
993 * at least 11 bits for float and 22 bits for double. */
994static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x);
995/*! @abstract A fast approximation to 1/x.
996 * @discussion If x is very close to the limits of representation, the
997 * result may overflow or underflow; otherwise this function is accurate to
998 * at least 11 bits for float and 22 bits for double. */
999static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x);
1000/*! @abstract A fast approximation to 1/x.
1001 * @discussion If x is very close to the limits of representation, the
1002 * result may overflow or underflow; otherwise this function is accurate to
1003 * at least 11 bits for float and 22 bits for double. */
1004static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x);
1005/*! @abstract A fast approximation to 1/x.
1006 * @discussion Deprecated. Use simd_fast_recip(x) instead. */
1007#define vector_fast_recip simd_fast_recip
1008
1009/*! @abstract An approximation to 1/x.
1010 * @discussion If x is very close to the limits of representation, the
1011 * result may overflow or underflow. This function maps to
1012 * simd_fast_recip(x) if -ffast-math is specified, and to
1013 * simd_precise_recip(x) otherwise. */
1014static inline SIMD_CFUNC _Float16 simd_recip(_Float16 x);
1015/*! @abstract An approximation to 1/x.
1016 * @discussion If x is very close to the limits of representation, the
1017 * result may overflow or underflow. This function maps to
1018 * simd_fast_recip(x) if -ffast-math is specified, and to
1019 * simd_precise_recip(x) otherwise. */
1020static inline SIMD_CFUNC simd_half2 simd_recip(simd_half2 x);
1021/*! @abstract An approximation to 1/x.
1022 * @discussion If x is very close to the limits of representation, the
1023 * result may overflow or underflow. This function maps to
1024 * simd_fast_recip(x) if -ffast-math is specified, and to
1025 * simd_precise_recip(x) otherwise. */
1026static inline SIMD_CFUNC simd_half3 simd_recip(simd_half3 x);
1027/*! @abstract An approximation to 1/x.
1028 * @discussion If x is very close to the limits of representation, the
1029 * result may overflow or underflow. This function maps to
1030 * simd_fast_recip(x) if -ffast-math is specified, and to
1031 * simd_precise_recip(x) otherwise. */
1032static inline SIMD_CFUNC simd_half4 simd_recip(simd_half4 x);
1033/*! @abstract An approximation to 1/x.
1034 * @discussion If x is very close to the limits of representation, the
1035 * result may overflow or underflow. This function maps to
1036 * simd_fast_recip(x) if -ffast-math is specified, and to
1037 * simd_precise_recip(x) otherwise. */
1038static inline SIMD_CFUNC simd_half8 simd_recip(simd_half8 x);
1039/*! @abstract An approximation to 1/x.
1040 * @discussion If x is very close to the limits of representation, the
1041 * result may overflow or underflow. This function maps to
1042 * simd_fast_recip(x) if -ffast-math is specified, and to
1043 * simd_precise_recip(x) otherwise. */
1044static inline SIMD_CFUNC simd_half16 simd_recip(simd_half16 x);
1045/*! @abstract An approximation to 1/x.
1046 * @discussion If x is very close to the limits of representation, the
1047 * result may overflow or underflow. This function maps to
1048 * simd_fast_recip(x) if -ffast-math is specified, and to
1049 * simd_precise_recip(x) otherwise. */
1050static inline SIMD_CFUNC simd_half32 simd_recip(simd_half32 x);
1051/*! @abstract An approximation to 1/x.
1052 * @discussion If x is very close to the limits of representation, the
1053 * result may overflow or underflow. This function maps to
1054 * simd_fast_recip(x) if -ffast-math is specified, and to
1055 * simd_precise_recip(x) otherwise. */
1056static inline SIMD_CFUNC float simd_recip(float x);
1057/*! @abstract An approximation to 1/x.
1058 * @discussion If x is very close to the limits of representation, the
1059 * result may overflow or underflow. This function maps to
1060 * simd_fast_recip(x) if -ffast-math is specified, and to
1061 * simd_precise_recip(x) otherwise. */
1062static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x);
1063/*! @abstract An approximation to 1/x.
1064 * @discussion If x is very close to the limits of representation, the
1065 * result may overflow or underflow. This function maps to
1066 * simd_fast_recip(x) if -ffast-math is specified, and to
1067 * simd_precise_recip(x) otherwise. */
1068static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x);
1069/*! @abstract An approximation to 1/x.
1070 * @discussion If x is very close to the limits of representation, the
1071 * result may overflow or underflow. This function maps to
1072 * simd_fast_recip(x) if -ffast-math is specified, and to
1073 * simd_precise_recip(x) otherwise. */
1074static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x);
1075/*! @abstract An approximation to 1/x.
1076 * @discussion If x is very close to the limits of representation, the
1077 * result may overflow or underflow. This function maps to
1078 * simd_fast_recip(x) if -ffast-math is specified, and to
1079 * simd_precise_recip(x) otherwise. */
1080static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x);
1081/*! @abstract An approximation to 1/x.
1082 * @discussion If x is very close to the limits of representation, the
1083 * result may overflow or underflow. This function maps to
1084 * simd_fast_recip(x) if -ffast-math is specified, and to
1085 * simd_precise_recip(x) otherwise. */
1086static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x);
1087/*! @abstract An approximation to 1/x.
1088 * @discussion If x is very close to the limits of representation, the
1089 * result may overflow or underflow. This function maps to
1090 * simd_fast_recip(x) if -ffast-math is specified, and to
1091 * simd_precise_recip(x) otherwise. */
1092static inline SIMD_CFUNC double simd_recip(double x);
1093/*! @abstract An approximation to 1/x.
1094 * @discussion If x is very close to the limits of representation, the
1095 * result may overflow or underflow. This function maps to
1096 * simd_fast_recip(x) if -ffast-math is specified, and to
1097 * simd_precise_recip(x) otherwise. */
1098static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x);
1099/*! @abstract An approximation to 1/x.
1100 * @discussion If x is very close to the limits of representation, the
1101 * result may overflow or underflow. This function maps to
1102 * simd_fast_recip(x) if -ffast-math is specified, and to
1103 * simd_precise_recip(x) otherwise. */
1104static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x);
1105/*! @abstract An approximation to 1/x.
1106 * @discussion If x is very close to the limits of representation, the
1107 * result may overflow or underflow. This function maps to
1108 * simd_fast_recip(x) if -ffast-math is specified, and to
1109 * simd_precise_recip(x) otherwise. */
1110static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x);
1111/*! @abstract An approximation to 1/x.
1112 * @discussion If x is very close to the limits of representation, the
1113 * result may overflow or underflow. This function maps to
1114 * simd_fast_recip(x) if -ffast-math is specified, and to
1115 * simd_precise_recip(x) otherwise. */
1116static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x);
1117/*! @abstract An approximation to 1/x.
1118 * @discussion Deprecated. Use simd_recip(x) instead. */
1119#define vector_recip simd_recip
1120
1121/*! @abstract A good approximation to 1/sqrt(x).
1122 * @discussion This function is accurate to a few units in the last place
1123 * (ULPs). */
1124static inline SIMD_CFUNC _Float16 simd_precise_rsqrt(_Float16 x);
1125/*! @abstract A good approximation to 1/sqrt(x).
1126 * @discussion This function is accurate to a few units in the last place
1127 * (ULPs). */
1128static inline SIMD_CFUNC simd_half2 simd_precise_rsqrt(simd_half2 x);
1129/*! @abstract A good approximation to 1/sqrt(x).
1130 * @discussion This function is accurate to a few units in the last place
1131 * (ULPs). */
1132static inline SIMD_CFUNC simd_half3 simd_precise_rsqrt(simd_half3 x);
1133/*! @abstract A good approximation to 1/sqrt(x).
1134 * @discussion This function is accurate to a few units in the last place
1135 * (ULPs). */
1136static inline SIMD_CFUNC simd_half4 simd_precise_rsqrt(simd_half4 x);
1137/*! @abstract A good approximation to 1/sqrt(x).
1138 * @discussion This function is accurate to a few units in the last place
1139 * (ULPs). */
1140static inline SIMD_CFUNC simd_half8 simd_precise_rsqrt(simd_half8 x);
1141/*! @abstract A good approximation to 1/sqrt(x).
1142 * @discussion This function is accurate to a few units in the last place
1143 * (ULPs). */
1144static inline SIMD_CFUNC simd_half16 simd_precise_rsqrt(simd_half16 x);
1145/*! @abstract A good approximation to 1/sqrt(x).
1146 * @discussion This function is accurate to a few units in the last place
1147 * (ULPs). */
1148static inline SIMD_CFUNC simd_half32 simd_precise_rsqrt(simd_half32 x);
1149/*! @abstract A good approximation to 1/sqrt(x).
1150 * @discussion This function is accurate to a few units in the last place
1151 * (ULPs). */
1152static inline SIMD_CFUNC float simd_precise_rsqrt(float x);
1153/*! @abstract A good approximation to 1/sqrt(x).
1154 * @discussion This function is accurate to a few units in the last place
1155 * (ULPs). */
1156static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x);
1157/*! @abstract A good approximation to 1/sqrt(x).
1158 * @discussion This function is accurate to a few units in the last place
1159 * (ULPs). */
1160static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x);
1161/*! @abstract A good approximation to 1/sqrt(x).
1162 * @discussion This function is accurate to a few units in the last place
1163 * (ULPs). */
1164static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x);
1165/*! @abstract A good approximation to 1/sqrt(x).
1166 * @discussion This function is accurate to a few units in the last place
1167 * (ULPs). */
1168static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x);
1169/*! @abstract A good approximation to 1/sqrt(x).
1170 * @discussion This function is accurate to a few units in the last place
1171 * (ULPs). */
1172static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x);
1173/*! @abstract A good approximation to 1/sqrt(x).
1174 * @discussion This function is accurate to a few units in the last place
1175 * (ULPs). */
1176static inline SIMD_CFUNC double simd_precise_rsqrt(double x);
1177/*! @abstract A good approximation to 1/sqrt(x).
1178 * @discussion This function is accurate to a few units in the last place
1179 * (ULPs). */
1180static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x);
1181/*! @abstract A good approximation to 1/sqrt(x).
1182 * @discussion This function is accurate to a few units in the last place
1183 * (ULPs). */
1184static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x);
1185/*! @abstract A good approximation to 1/sqrt(x).
1186 * @discussion This function is accurate to a few units in the last place
1187 * (ULPs). */
1188static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x);
1189/*! @abstract A good approximation to 1/sqrt(x).
1190 * @discussion This function is accurate to a few units in the last place
1191 * (ULPs). */
1192static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x);
1193/*! @abstract A good approximation to 1/sqrt(x).
1194 * @discussion Deprecated. Use simd_precise_rsqrt(x) instead. */
1195#define vector_precise_rsqrt simd_precise_rsqrt
1196
1197/*! @abstract A fast approximation to 1/sqrt(x).
1198 * @discussion This function is accurate to at least 11 bits for float and
1199 * 22 bits for double. */
1200static inline SIMD_CFUNC _Float16 simd_fast_rsqrt(_Float16 x);
1201/*! @abstract A fast approximation to 1/sqrt(x).
1202 * @discussion This function is accurate to at least 11 bits for float and
1203 * 22 bits for double. */
1204static inline SIMD_CFUNC simd_half2 simd_fast_rsqrt(simd_half2 x);
1205/*! @abstract A fast approximation to 1/sqrt(x).
1206 * @discussion This function is accurate to at least 11 bits for float and
1207 * 22 bits for double. */
1208static inline SIMD_CFUNC simd_half3 simd_fast_rsqrt(simd_half3 x);
1209/*! @abstract A fast approximation to 1/sqrt(x).
1210 * @discussion This function is accurate to at least 11 bits for float and
1211 * 22 bits for double. */
1212static inline SIMD_CFUNC simd_half4 simd_fast_rsqrt(simd_half4 x);
1213/*! @abstract A fast approximation to 1/sqrt(x).
1214 * @discussion This function is accurate to at least 11 bits for float and
1215 * 22 bits for double. */
1216static inline SIMD_CFUNC simd_half8 simd_fast_rsqrt(simd_half8 x);
1217/*! @abstract A fast approximation to 1/sqrt(x).
1218 * @discussion This function is accurate to at least 11 bits for float and
1219 * 22 bits for double. */
1220static inline SIMD_CFUNC simd_half16 simd_fast_rsqrt(simd_half16 x);
1221/*! @abstract A fast approximation to 1/sqrt(x).
1222 * @discussion This function is accurate to at least 11 bits for float and
1223 * 22 bits for double. */
1224static inline SIMD_CFUNC simd_half32 simd_fast_rsqrt(simd_half32 x);
1225/*! @abstract A fast approximation to 1/sqrt(x).
1226 * @discussion This function is accurate to at least 11 bits for float and
1227 * 22 bits for double. */
1228static inline SIMD_CFUNC float simd_fast_rsqrt(float x);
1229/*! @abstract A fast approximation to 1/sqrt(x).
1230 * @discussion This function is accurate to at least 11 bits for float and
1231 * 22 bits for double. */
1232static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x);
1233/*! @abstract A fast approximation to 1/sqrt(x).
1234 * @discussion This function is accurate to at least 11 bits for float and
1235 * 22 bits for double. */
1236static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x);
1237/*! @abstract A fast approximation to 1/sqrt(x).
1238 * @discussion This function is accurate to at least 11 bits for float and
1239 * 22 bits for double. */
1240static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x);
1241/*! @abstract A fast approximation to 1/sqrt(x).
1242 * @discussion This function is accurate to at least 11 bits for float and
1243 * 22 bits for double. */
1244static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x);
1245/*! @abstract A fast approximation to 1/sqrt(x).
1246 * @discussion This function is accurate to at least 11 bits for float and
1247 * 22 bits for double. */
1248static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x);
1249/*! @abstract A fast approximation to 1/sqrt(x).
1250 * @discussion This function is accurate to at least 11 bits for float and
1251 * 22 bits for double. */
1252static inline SIMD_CFUNC double simd_fast_rsqrt(double x);
1253/*! @abstract A fast approximation to 1/sqrt(x).
1254 * @discussion This function is accurate to at least 11 bits for float and
1255 * 22 bits for double. */
1256static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x);
1257/*! @abstract A fast approximation to 1/sqrt(x).
1258 * @discussion This function is accurate to at least 11 bits for float and
1259 * 22 bits for double. */
1260static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x);
1261/*! @abstract A fast approximation to 1/sqrt(x).
1262 * @discussion This function is accurate to at least 11 bits for float and
1263 * 22 bits for double. */
1264static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x);
1265/*! @abstract A fast approximation to 1/sqrt(x).
1266 * @discussion This function is accurate to at least 11 bits for float and
1267 * 22 bits for double. */
1268static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x);
1269/*! @abstract A fast approximation to 1/sqrt(x).
1270 * @discussion Deprecated. Use simd_fast_rsqrt(x) instead. */
1271#define vector_fast_rsqrt simd_fast_rsqrt
1272
1273/*! @abstract An approximation to 1/sqrt(x).
1274 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1275 * specified, and to simd_precise_recip(x) otherwise. */
1276static inline SIMD_CFUNC _Float16 simd_rsqrt(_Float16 x);
1277/*! @abstract An approximation to 1/sqrt(x).
1278 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1279 * specified, and to simd_precise_recip(x) otherwise. */
1280static inline SIMD_CFUNC simd_half2 simd_rsqrt(simd_half2 x);
1281/*! @abstract An approximation to 1/sqrt(x).
1282 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1283 * specified, and to simd_precise_recip(x) otherwise. */
1284static inline SIMD_CFUNC simd_half3 simd_rsqrt(simd_half3 x);
1285/*! @abstract An approximation to 1/sqrt(x).
1286 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1287 * specified, and to simd_precise_recip(x) otherwise. */
1288static inline SIMD_CFUNC simd_half4 simd_rsqrt(simd_half4 x);
1289/*! @abstract An approximation to 1/sqrt(x).
1290 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1291 * specified, and to simd_precise_recip(x) otherwise. */
1292static inline SIMD_CFUNC simd_half8 simd_rsqrt(simd_half8 x);
1293/*! @abstract An approximation to 1/sqrt(x).
1294 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1295 * specified, and to simd_precise_recip(x) otherwise. */
1296static inline SIMD_CFUNC simd_half16 simd_rsqrt(simd_half16 x);
1297/*! @abstract An approximation to 1/sqrt(x).
1298 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1299 * specified, and to simd_precise_recip(x) otherwise. */
1300static inline SIMD_CFUNC simd_half32 simd_rsqrt(simd_half32 x);
1301/*! @abstract An approximation to 1/sqrt(x).
1302 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1303 * specified, and to simd_precise_recip(x) otherwise. */
1304static inline SIMD_CFUNC float simd_rsqrt(float x);
1305/*! @abstract An approximation to 1/sqrt(x).
1306 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1307 * specified, and to simd_precise_recip(x) otherwise. */
1308static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x);
1309/*! @abstract An approximation to 1/sqrt(x).
1310 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1311 * specified, and to simd_precise_recip(x) otherwise. */
1312static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x);
1313/*! @abstract An approximation to 1/sqrt(x).
1314 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1315 * specified, and to simd_precise_recip(x) otherwise. */
1316static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x);
1317/*! @abstract An approximation to 1/sqrt(x).
1318 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1319 * specified, and to simd_precise_recip(x) otherwise. */
1320static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x);
1321/*! @abstract An approximation to 1/sqrt(x).
1322 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1323 * specified, and to simd_precise_recip(x) otherwise. */
1324static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x);
1325/*! @abstract An approximation to 1/sqrt(x).
1326 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1327 * specified, and to simd_precise_recip(x) otherwise. */
1328static inline SIMD_CFUNC double simd_rsqrt(double x);
1329/*! @abstract An approximation to 1/sqrt(x).
1330 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1331 * specified, and to simd_precise_recip(x) otherwise. */
1332static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x);
1333/*! @abstract An approximation to 1/sqrt(x).
1334 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1335 * specified, and to simd_precise_recip(x) otherwise. */
1336static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x);
1337/*! @abstract An approximation to 1/sqrt(x).
1338 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1339 * specified, and to simd_precise_recip(x) otherwise. */
1340static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x);
1341/*! @abstract An approximation to 1/sqrt(x).
1342 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is
1343 * specified, and to simd_precise_recip(x) otherwise. */
1344static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x);
1345/*! @abstract An approximation to 1/sqrt(x).
1346 * @discussion Deprecated. Use simd_rsqrt(x) instead. */
1347#define vector_rsqrt simd_rsqrt
1348
1349/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1350 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1351 * positive and finite, then the two values are exactly equal. */
1352static inline SIMD_CFUNC _Float16 simd_fract(_Float16 x);
1353/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1354 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1355 * positive and finite, then the two values are exactly equal. */
1356static inline SIMD_CFUNC simd_half2 simd_fract(simd_half2 x);
1357/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1358 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1359 * positive and finite, then the two values are exactly equal. */
1360static inline SIMD_CFUNC simd_half3 simd_fract(simd_half3 x);
1361/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1362 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1363 * positive and finite, then the two values are exactly equal. */
1364static inline SIMD_CFUNC simd_half4 simd_fract(simd_half4 x);
1365/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1366 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1367 * positive and finite, then the two values are exactly equal. */
1368static inline SIMD_CFUNC simd_half8 simd_fract(simd_half8 x);
1369/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1370 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1371 * positive and finite, then the two values are exactly equal. */
1372static inline SIMD_CFUNC simd_half16 simd_fract(simd_half16 x);
1373/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1374 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1375 * positive and finite, then the two values are exactly equal. */
1376static inline SIMD_CFUNC simd_half32 simd_fract(simd_half32 x);
1377/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1378 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1379 * positive and finite, then the two values are exactly equal. */
1380static inline SIMD_CFUNC float simd_fract(float x);
1381/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1382 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1383 * positive and finite, then the two values are exactly equal. */
1384static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x);
1385/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1386 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1387 * positive and finite, then the two values are exactly equal. */
1388static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x);
1389/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1390 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1391 * positive and finite, then the two values are exactly equal. */
1392static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x);
1393/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1394 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1395 * positive and finite, then the two values are exactly equal. */
1396static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x);
1397/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1398 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1399 * positive and finite, then the two values are exactly equal. */
1400static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x);
1401/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1402 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1403 * positive and finite, then the two values are exactly equal. */
1404static inline SIMD_CFUNC double simd_fract(double x);
1405/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1406 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1407 * positive and finite, then the two values are exactly equal. */
1408static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x);
1409/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1410 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1411 * positive and finite, then the two values are exactly equal. */
1412static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x);
1413/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1414 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1415 * positive and finite, then the two values are exactly equal. */
1416static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x);
1417/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1418 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
1419 * positive and finite, then the two values are exactly equal. */
1420static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x);
1421/*! @abstract The "fractional part" of x, lying in the range [0, 1).
1422 * @discussion Deprecated. Use simd_fract(x) instead. */
1423#define vector_fract simd_fract
1424
1425/*! @abstract 0 if x < edge, and 1 otherwise.
1426 * @discussion Use a scalar value for edge if you want to apply the same
1427 * threshold to all lanes. */
1428static inline SIMD_CFUNC _Float16 simd_step(_Float16 edge, _Float16 x);
1429/*! @abstract 0 if x < edge, and 1 otherwise.
1430 * @discussion Use a scalar value for edge if you want to apply the same
1431 * threshold to all lanes. */
1432static inline SIMD_CFUNC simd_half2 simd_step(simd_half2 edge, simd_half2 x);
1433/*! @abstract 0 if x < edge, and 1 otherwise.
1434 * @discussion Use a scalar value for edge if you want to apply the same
1435 * threshold to all lanes. */
1436static inline SIMD_CFUNC simd_half3 simd_step(simd_half3 edge, simd_half3 x);
1437/*! @abstract 0 if x < edge, and 1 otherwise.
1438 * @discussion Use a scalar value for edge if you want to apply the same
1439 * threshold to all lanes. */
1440static inline SIMD_CFUNC simd_half4 simd_step(simd_half4 edge, simd_half4 x);
1441/*! @abstract 0 if x < edge, and 1 otherwise.
1442 * @discussion Use a scalar value for edge if you want to apply the same
1443 * threshold to all lanes. */
1444static inline SIMD_CFUNC simd_half8 simd_step(simd_half8 edge, simd_half8 x);
1445/*! @abstract 0 if x < edge, and 1 otherwise.
1446 * @discussion Use a scalar value for edge if you want to apply the same
1447 * threshold to all lanes. */
1448static inline SIMD_CFUNC simd_half16 simd_step(simd_half16 edge, simd_half16 x);
1449/*! @abstract 0 if x < edge, and 1 otherwise.
1450 * @discussion Use a scalar value for edge if you want to apply the same
1451 * threshold to all lanes. */
1452static inline SIMD_CFUNC simd_half32 simd_step(simd_half32 edge, simd_half32 x);
1453/*! @abstract 0 if x < edge, and 1 otherwise.
1454 * @discussion Use a scalar value for edge if you want to apply the same
1455 * threshold to all lanes. */
1456static inline SIMD_CFUNC float simd_step(float edge, float x);
1457/*! @abstract 0 if x < edge, and 1 otherwise.
1458 * @discussion Use a scalar value for edge if you want to apply the same
1459 * threshold to all lanes. */
1460static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x);
1461/*! @abstract 0 if x < edge, and 1 otherwise.
1462 * @discussion Use a scalar value for edge if you want to apply the same
1463 * threshold to all lanes. */
1464static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x);
1465/*! @abstract 0 if x < edge, and 1 otherwise.
1466 * @discussion Use a scalar value for edge if you want to apply the same
1467 * threshold to all lanes. */
1468static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x);
1469/*! @abstract 0 if x < edge, and 1 otherwise.
1470 * @discussion Use a scalar value for edge if you want to apply the same
1471 * threshold to all lanes. */
1472static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x);
1473/*! @abstract 0 if x < edge, and 1 otherwise.
1474 * @discussion Use a scalar value for edge if you want to apply the same
1475 * threshold to all lanes. */
1476static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x);
1477/*! @abstract 0 if x < edge, and 1 otherwise.
1478 * @discussion Use a scalar value for edge if you want to apply the same
1479 * threshold to all lanes. */
1480static inline SIMD_CFUNC double simd_step(double edge, double x);
1481/*! @abstract 0 if x < edge, and 1 otherwise.
1482 * @discussion Use a scalar value for edge if you want to apply the same
1483 * threshold to all lanes. */
1484static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x);
1485/*! @abstract 0 if x < edge, and 1 otherwise.
1486 * @discussion Use a scalar value for edge if you want to apply the same
1487 * threshold to all lanes. */
1488static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x);
1489/*! @abstract 0 if x < edge, and 1 otherwise.
1490 * @discussion Use a scalar value for edge if you want to apply the same
1491 * threshold to all lanes. */
1492static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x);
1493/*! @abstract 0 if x < edge, and 1 otherwise.
1494 * @discussion Use a scalar value for edge if you want to apply the same
1495 * threshold to all lanes. */
1496static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x);
1497/*! @abstract 0 if x < edge, and 1 otherwise.
1498 * @discussion Deprecated. Use simd_step(edge, x) instead. */
1499#define vector_step simd_step
1500
1501/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1502 * @discussion You can use a scalar value for edge0 and edge1 if you want
1503 * to clamp all lanes at the same points. */
1504static inline SIMD_CFUNC _Float16 simd_smoothstep(_Float16 edge0, _Float16 edge1, _Float16 x);
1505/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1506 * @discussion You can use a scalar value for edge0 and edge1 if you want
1507 * to clamp all lanes at the same points. */
1508static inline SIMD_CFUNC simd_half2 simd_smoothstep(simd_half2 edge0, simd_half2 edge1, simd_half2 x);
1509/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1510 * @discussion You can use a scalar value for edge0 and edge1 if you want
1511 * to clamp all lanes at the same points. */
1512static inline SIMD_CFUNC simd_half3 simd_smoothstep(simd_half3 edge0, simd_half3 edge1, simd_half3 x);
1513/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1514 * @discussion You can use a scalar value for edge0 and edge1 if you want
1515 * to clamp all lanes at the same points. */
1516static inline SIMD_CFUNC simd_half4 simd_smoothstep(simd_half4 edge0, simd_half4 edge1, simd_half4 x);
1517/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1518 * @discussion You can use a scalar value for edge0 and edge1 if you want
1519 * to clamp all lanes at the same points. */
1520static inline SIMD_CFUNC simd_half8 simd_smoothstep(simd_half8 edge0, simd_half8 edge1, simd_half8 x);
1521/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1522 * @discussion You can use a scalar value for edge0 and edge1 if you want
1523 * to clamp all lanes at the same points. */
1524static inline SIMD_CFUNC simd_half16 simd_smoothstep(simd_half16 edge0, simd_half16 edge1, simd_half16 x);
1525/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1526 * @discussion You can use a scalar value for edge0 and edge1 if you want
1527 * to clamp all lanes at the same points. */
1528static inline SIMD_CFUNC simd_half32 simd_smoothstep(simd_half32 edge0, simd_half32 edge1, simd_half32 x);
1529/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1530 * @discussion You can use a scalar value for edge0 and edge1 if you want
1531 * to clamp all lanes at the same points. */
1532static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x);
1533/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1534 * @discussion You can use a scalar value for edge0 and edge1 if you want
1535 * to clamp all lanes at the same points. */
1536static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x);
1537/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1538 * @discussion You can use a scalar value for edge0 and edge1 if you want
1539 * to clamp all lanes at the same points. */
1540static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x);
1541/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1542 * @discussion You can use a scalar value for edge0 and edge1 if you want
1543 * to clamp all lanes at the same points. */
1544static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x);
1545/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1546 * @discussion You can use a scalar value for edge0 and edge1 if you want
1547 * to clamp all lanes at the same points. */
1548static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x);
1549/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1550 * @discussion You can use a scalar value for edge0 and edge1 if you want
1551 * to clamp all lanes at the same points. */
1552static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x);
1553/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1554 * @discussion You can use a scalar value for edge0 and edge1 if you want
1555 * to clamp all lanes at the same points. */
1556static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x);
1557/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1558 * @discussion You can use a scalar value for edge0 and edge1 if you want
1559 * to clamp all lanes at the same points. */
1560static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x);
1561/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1562 * @discussion You can use a scalar value for edge0 and edge1 if you want
1563 * to clamp all lanes at the same points. */
1564static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x);
1565/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1566 * @discussion You can use a scalar value for edge0 and edge1 if you want
1567 * to clamp all lanes at the same points. */
1568static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x);
1569/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1570 * @discussion You can use a scalar value for edge0 and edge1 if you want
1571 * to clamp all lanes at the same points. */
1572static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x);
1573/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
1574 * @discussion Deprecated. Use simd_smoothstep(edge0, edge1, x) instead. */
1575#define vector_smoothstep simd_smoothstep
1576
1577/*! @abstract Sum of elements in x.
1578 * @discussion This computation may overflow; especial for 8-bit types you
1579 * may need to convert to a wider type before reducing. */
1580static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x);
1581/*! @abstract Sum of elements in x.
1582 * @discussion This computation may overflow; especial for 8-bit types you
1583 * may need to convert to a wider type before reducing. */
1584static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x);
1585/*! @abstract Sum of elements in x.
1586 * @discussion This computation may overflow; especial for 8-bit types you
1587 * may need to convert to a wider type before reducing. */
1588static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x);
1589/*! @abstract Sum of elements in x.
1590 * @discussion This computation may overflow; especial for 8-bit types you
1591 * may need to convert to a wider type before reducing. */
1592static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x);
1593/*! @abstract Sum of elements in x.
1594 * @discussion This computation may overflow; especial for 8-bit types you
1595 * may need to convert to a wider type before reducing. */
1596static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x);
1597/*! @abstract Sum of elements in x.
1598 * @discussion This computation may overflow; especial for 8-bit types you
1599 * may need to convert to a wider type before reducing. */
1600static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x);
1601/*! @abstract Sum of elements in x.
1602 * @discussion This computation may overflow; especial for 8-bit types you
1603 * may need to convert to a wider type before reducing. */
1604static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x);
1605/*! @abstract Sum of elements in x.
1606 * @discussion This computation may overflow; especial for 8-bit types you
1607 * may need to convert to a wider type before reducing. */
1608static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x);
1609/*! @abstract Sum of elements in x.
1610 * @discussion This computation may overflow; especial for 8-bit types you
1611 * may need to convert to a wider type before reducing. */
1612static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x);
1613/*! @abstract Sum of elements in x.
1614 * @discussion This computation may overflow; especial for 8-bit types you
1615 * may need to convert to a wider type before reducing. */
1616static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x);
1617/*! @abstract Sum of elements in x.
1618 * @discussion This computation may overflow; especial for 8-bit types you
1619 * may need to convert to a wider type before reducing. */
1620static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x);
1621/*! @abstract Sum of elements in x.
1622 * @discussion This computation may overflow; especial for 8-bit types you
1623 * may need to convert to a wider type before reducing. */
1624static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x);
1625/*! @abstract Sum of elements in x.
1626 * @discussion This computation may overflow; especial for 8-bit types you
1627 * may need to convert to a wider type before reducing. */
1628static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x);
1629/*! @abstract Sum of elements in x.
1630 * @discussion This computation may overflow; especial for 8-bit types you
1631 * may need to convert to a wider type before reducing. */
1632static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x);
1633/*! @abstract Sum of elements in x.
1634 * @discussion This computation may overflow; especial for 8-bit types you
1635 * may need to convert to a wider type before reducing. */
1636static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x);
1637/*! @abstract Sum of elements in x.
1638 * @discussion This computation may overflow; especial for 8-bit types you
1639 * may need to convert to a wider type before reducing. */
1640static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x);
1641/*! @abstract Sum of elements in x.
1642 * @discussion This computation may overflow; especial for 8-bit types you
1643 * may need to convert to a wider type before reducing. */
1644static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x);
1645/*! @abstract Sum of elements in x.
1646 * @discussion This computation may overflow; especial for 8-bit types you
1647 * may need to convert to a wider type before reducing. */
1648static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x);
1649/*! @abstract Sum of elements in x.
1650 * @discussion This computation may overflow; especial for 8-bit types you
1651 * may need to convert to a wider type before reducing. */
1652static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x);
1653/*! @abstract Sum of elements in x.
1654 * @discussion This computation may overflow; especial for 8-bit types you
1655 * may need to convert to a wider type before reducing. */
1656static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x);
1657/*! @abstract Sum of elements in x.
1658 * @discussion This computation may overflow; especial for 8-bit types you
1659 * may need to convert to a wider type before reducing. */
1660static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x);
1661/*! @abstract Sum of elements in x.
1662 * @discussion This computation may overflow; especial for 8-bit types you
1663 * may need to convert to a wider type before reducing. */
1664static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x);
1665/*! @abstract Sum of elements in x.
1666 * @discussion This computation may overflow; especial for 8-bit types you
1667 * may need to convert to a wider type before reducing. */
1668static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x);
1669/*! @abstract Sum of elements in x.
1670 * @discussion This computation may overflow; especial for 8-bit types you
1671 * may need to convert to a wider type before reducing. */
1672static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x);
1673/*! @abstract Sum of elements in x.
1674 * @discussion This computation may overflow; especial for 8-bit types you
1675 * may need to convert to a wider type before reducing. */
1676static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x);
1677/*! @abstract Sum of elements in x.
1678 * @discussion This computation may overflow; especial for 8-bit types you
1679 * may need to convert to a wider type before reducing. */
1680static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x);
1681/*! @abstract Sum of elements in x.
1682 * @discussion This computation may overflow; especial for 8-bit types you
1683 * may need to convert to a wider type before reducing. */
1684static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half2 x);
1685/*! @abstract Sum of elements in x.
1686 * @discussion This computation may overflow; especial for 8-bit types you
1687 * may need to convert to a wider type before reducing. */
1688static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half3 x);
1689/*! @abstract Sum of elements in x.
1690 * @discussion This computation may overflow; especial for 8-bit types you
1691 * may need to convert to a wider type before reducing. */
1692static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half4 x);
1693/*! @abstract Sum of elements in x.
1694 * @discussion This computation may overflow; especial for 8-bit types you
1695 * may need to convert to a wider type before reducing. */
1696static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half8 x);
1697/*! @abstract Sum of elements in x.
1698 * @discussion This computation may overflow; especial for 8-bit types you
1699 * may need to convert to a wider type before reducing. */
1700static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half16 x);
1701/*! @abstract Sum of elements in x.
1702 * @discussion This computation may overflow; especial for 8-bit types you
1703 * may need to convert to a wider type before reducing. */
1704static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half32 x);
1705/*! @abstract Sum of elements in x.
1706 * @discussion This computation may overflow; especial for 8-bit types you
1707 * may need to convert to a wider type before reducing. */
1708static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x);
1709/*! @abstract Sum of elements in x.
1710 * @discussion This computation may overflow; especial for 8-bit types you
1711 * may need to convert to a wider type before reducing. */
1712static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x);
1713/*! @abstract Sum of elements in x.
1714 * @discussion This computation may overflow; especial for 8-bit types you
1715 * may need to convert to a wider type before reducing. */
1716static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x);
1717/*! @abstract Sum of elements in x.
1718 * @discussion This computation may overflow; especial for 8-bit types you
1719 * may need to convert to a wider type before reducing. */
1720static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x);
1721/*! @abstract Sum of elements in x.
1722 * @discussion This computation may overflow; especial for 8-bit types you
1723 * may need to convert to a wider type before reducing. */
1724static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x);
1725/*! @abstract Sum of elements in x.
1726 * @discussion This computation may overflow; especial for 8-bit types you
1727 * may need to convert to a wider type before reducing. */
1728static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x);
1729/*! @abstract Sum of elements in x.
1730 * @discussion This computation may overflow; especial for 8-bit types you
1731 * may need to convert to a wider type before reducing. */
1732static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x);
1733/*! @abstract Sum of elements in x.
1734 * @discussion This computation may overflow; especial for 8-bit types you
1735 * may need to convert to a wider type before reducing. */
1736static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x);
1737/*! @abstract Sum of elements in x.
1738 * @discussion This computation may overflow; especial for 8-bit types you
1739 * may need to convert to a wider type before reducing. */
1740static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x);
1741/*! @abstract Sum of elements in x.
1742 * @discussion This computation may overflow; especial for 8-bit types you
1743 * may need to convert to a wider type before reducing. */
1744static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x);
1745/*! @abstract Sum of elements in x.
1746 * @discussion This computation may overflow; especial for 8-bit types you
1747 * may need to convert to a wider type before reducing. */
1748static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x);
1749/*! @abstract Sum of elements in x.
1750 * @discussion This computation may overflow; especial for 8-bit types you
1751 * may need to convert to a wider type before reducing. */
1752static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x);
1753/*! @abstract Sum of elements in x.
1754 * @discussion This computation may overflow; especial for 8-bit types you
1755 * may need to convert to a wider type before reducing. */
1756static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x);
1757/*! @abstract Sum of elements in x.
1758 * @discussion This computation may overflow; especial for 8-bit types you
1759 * may need to convert to a wider type before reducing. */
1760static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x);
1761/*! @abstract Sum of elements in x.
1762 * @discussion This computation may overflow; especial for 8-bit types you
1763 * may need to convert to a wider type before reducing. */
1764static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x);
1765/*! @abstract Sum of elements in x.
1766 * @discussion This computation may overflow; especial for 8-bit types you
1767 * may need to convert to a wider type before reducing. */
1768static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x);
1769/*! @abstract Sum of elements in x.
1770 * @discussion This computation may overflow; especial for 8-bit types you
1771 * may need to convert to a wider type before reducing. */
1772static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x);
1773/*! @abstract Sum of elements in x.
1774 * @discussion This computation may overflow; especial for 8-bit types you
1775 * may need to convert to a wider type before reducing. */
1776static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x);
1777/*! @abstract Sum of elements in x.
1778 * @discussion This computation may overflow; especial for 8-bit types you
1779 * may need to convert to a wider type before reducing. */
1780static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x);
1781/*! @abstract Sum of elements in x.
1782 * @discussion This computation may overflow; especial for 8-bit types you
1783 * may need to convert to a wider type before reducing. */
1784static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x);
1785/*! @abstract Sum of elements in x.
1786 * @discussion This computation may overflow; especial for 8-bit types you
1787 * may need to convert to a wider type before reducing. */
1788static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x);
1789/*! @abstract Sum of elements in x.
1790 * @discussion This computation may overflow; especial for 8-bit types you
1791 * may need to convert to a wider type before reducing. */
1792static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x);
1793/*! @abstract Sum of elements in x.
1794 * @discussion This computation may overflow; especial for 8-bit types you
1795 * may need to convert to a wider type before reducing. */
1796static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x);
1797/*! @abstract Sum of elements in x.
1798 * @discussion This computation may overflow; especial for 8-bit types you
1799 * may need to convert to a wider type before reducing. */
1800static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x);
1801/*! @abstract Sum of elements in x.
1802 * @discussion This computation may overflow; especial for 8-bit types you
1803 * may need to convert to a wider type before reducing. */
1804static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x);
1805/*! @abstract Sum of elements in x.
1806 * @discussion This computation may overflow; especial for 8-bit types you
1807 * may need to convert to a wider type before reducing. */
1808static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x);
1809/*! @abstract Sum of elements in x.
1810 * @discussion This computation may overflow; especial for 8-bit types you
1811 * may need to convert to a wider type before reducing. */
1812static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x);
1813/*! @abstract Sum of elements in x.
1814 * @discussion Deprecated. Use simd_add(x) instead. */
1815#define vector_reduce_add simd_reduce_add
1816
1817/*! @abstract Minimum of elements in x. */
1818static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x);
1819/*! @abstract Minimum of elements in x. */
1820static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x);
1821/*! @abstract Minimum of elements in x. */
1822static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x);
1823/*! @abstract Minimum of elements in x. */
1824static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x);
1825/*! @abstract Minimum of elements in x. */
1826static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x);
1827/*! @abstract Minimum of elements in x. */
1828static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x);
1829/*! @abstract Minimum of elements in x. */
1830static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x);
1831/*! @abstract Minimum of elements in x. */
1832static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x);
1833/*! @abstract Minimum of elements in x. */
1834static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x);
1835/*! @abstract Minimum of elements in x. */
1836static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x);
1837/*! @abstract Minimum of elements in x. */
1838static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x);
1839/*! @abstract Minimum of elements in x. */
1840static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x);
1841/*! @abstract Minimum of elements in x. */
1842static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x);
1843/*! @abstract Minimum of elements in x. */
1844static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x);
1845/*! @abstract Minimum of elements in x. */
1846static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x);
1847/*! @abstract Minimum of elements in x. */
1848static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x);
1849/*! @abstract Minimum of elements in x. */
1850static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x);
1851/*! @abstract Minimum of elements in x. */
1852static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x);
1853/*! @abstract Minimum of elements in x. */
1854static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x);
1855/*! @abstract Minimum of elements in x. */
1856static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x);
1857/*! @abstract Minimum of elements in x. */
1858static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x);
1859/*! @abstract Minimum of elements in x. */
1860static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x);
1861/*! @abstract Minimum of elements in x. */
1862static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x);
1863/*! @abstract Minimum of elements in x. */
1864static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x);
1865/*! @abstract Minimum of elements in x. */
1866static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x);
1867/*! @abstract Minimum of elements in x. */
1868static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x);
1869/*! @abstract Minimum of elements in x. */
1870static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half2 x);
1871/*! @abstract Minimum of elements in x. */
1872static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half3 x);
1873/*! @abstract Minimum of elements in x. */
1874static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half4 x);
1875/*! @abstract Minimum of elements in x. */
1876static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half8 x);
1877/*! @abstract Minimum of elements in x. */
1878static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half16 x);
1879/*! @abstract Minimum of elements in x. */
1880static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half32 x);
1881/*! @abstract Minimum of elements in x. */
1882static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x);
1883/*! @abstract Minimum of elements in x. */
1884static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x);
1885/*! @abstract Minimum of elements in x. */
1886static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x);
1887/*! @abstract Minimum of elements in x. */
1888static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x);
1889/*! @abstract Minimum of elements in x. */
1890static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x);
1891/*! @abstract Minimum of elements in x. */
1892static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x);
1893/*! @abstract Minimum of elements in x. */
1894static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x);
1895/*! @abstract Minimum of elements in x. */
1896static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x);
1897/*! @abstract Minimum of elements in x. */
1898static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x);
1899/*! @abstract Minimum of elements in x. */
1900static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x);
1901/*! @abstract Minimum of elements in x. */
1902static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x);
1903/*! @abstract Minimum of elements in x. */
1904static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x);
1905/*! @abstract Minimum of elements in x. */
1906static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x);
1907/*! @abstract Minimum of elements in x. */
1908static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x);
1909/*! @abstract Minimum of elements in x. */
1910static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x);
1911/*! @abstract Minimum of elements in x. */
1912static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x);
1913/*! @abstract Minimum of elements in x. */
1914static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x);
1915/*! @abstract Minimum of elements in x. */
1916static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x);
1917/*! @abstract Minimum of elements in x. */
1918static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x);
1919/*! @abstract Minimum of elements in x. */
1920static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x);
1921/*! @abstract Minimum of elements in x. */
1922static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x);
1923/*! @abstract Minimum of elements in x. */
1924static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x);
1925/*! @abstract Minimum of elements in x. */
1926static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x);
1927/*! @abstract Minimum of elements in x. */
1928static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x);
1929/*! @abstract Minimum of elements in x. */
1930static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x);
1931/*! @abstract Minimum of elements in x. */
1932static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x);
1933/*! @abstract Minimum of elements in x. */
1934static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x);
1935/*! @abstract Minimum of elements in x.
1936 * @discussion Deprecated. Use simd_min(x) instead. */
1937#define vector_reduce_min simd_reduce_min
1938
1939/*! @abstract Maximum of elements in x. */
1940static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x);
1941/*! @abstract Maximum of elements in x. */
1942static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x);
1943/*! @abstract Maximum of elements in x. */
1944static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x);
1945/*! @abstract Maximum of elements in x. */
1946static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x);
1947/*! @abstract Maximum of elements in x. */
1948static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x);
1949/*! @abstract Maximum of elements in x. */
1950static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x);
1951/*! @abstract Maximum of elements in x. */
1952static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x);
1953/*! @abstract Maximum of elements in x. */
1954static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x);
1955/*! @abstract Maximum of elements in x. */
1956static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x);
1957/*! @abstract Maximum of elements in x. */
1958static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x);
1959/*! @abstract Maximum of elements in x. */
1960static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x);
1961/*! @abstract Maximum of elements in x. */
1962static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x);
1963/*! @abstract Maximum of elements in x. */
1964static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x);
1965/*! @abstract Maximum of elements in x. */
1966static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x);
1967/*! @abstract Maximum of elements in x. */
1968static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x);
1969/*! @abstract Maximum of elements in x. */
1970static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x);
1971/*! @abstract Maximum of elements in x. */
1972static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x);
1973/*! @abstract Maximum of elements in x. */
1974static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x);
1975/*! @abstract Maximum of elements in x. */
1976static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x);
1977/*! @abstract Maximum of elements in x. */
1978static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x);
1979/*! @abstract Maximum of elements in x. */
1980static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x);
1981/*! @abstract Maximum of elements in x. */
1982static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x);
1983/*! @abstract Maximum of elements in x. */
1984static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x);
1985/*! @abstract Maximum of elements in x. */
1986static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x);
1987/*! @abstract Maximum of elements in x. */
1988static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x);
1989/*! @abstract Maximum of elements in x. */
1990static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x);
1991/*! @abstract Maximum of elements in x. */
1992static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half2 x);
1993/*! @abstract Maximum of elements in x. */
1994static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half3 x);
1995/*! @abstract Maximum of elements in x. */
1996static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half4 x);
1997/*! @abstract Maximum of elements in x. */
1998static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half8 x);
1999/*! @abstract Maximum of elements in x. */
2000static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half16 x);
2001/*! @abstract Maximum of elements in x. */
2002static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half32 x);
2003/*! @abstract Maximum of elements in x. */
2004static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x);
2005/*! @abstract Maximum of elements in x. */
2006static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x);
2007/*! @abstract Maximum of elements in x. */
2008static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x);
2009/*! @abstract Maximum of elements in x. */
2010static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x);
2011/*! @abstract Maximum of elements in x. */
2012static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x);
2013/*! @abstract Maximum of elements in x. */
2014static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x);
2015/*! @abstract Maximum of elements in x. */
2016static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x);
2017/*! @abstract Maximum of elements in x. */
2018static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x);
2019/*! @abstract Maximum of elements in x. */
2020static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x);
2021/*! @abstract Maximum of elements in x. */
2022static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x);
2023/*! @abstract Maximum of elements in x. */
2024static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x);
2025/*! @abstract Maximum of elements in x. */
2026static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x);
2027/*! @abstract Maximum of elements in x. */
2028static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x);
2029/*! @abstract Maximum of elements in x. */
2030static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x);
2031/*! @abstract Maximum of elements in x. */
2032static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x);
2033/*! @abstract Maximum of elements in x. */
2034static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x);
2035/*! @abstract Maximum of elements in x. */
2036static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x);
2037/*! @abstract Maximum of elements in x. */
2038static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x);
2039/*! @abstract Maximum of elements in x. */
2040static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x);
2041/*! @abstract Maximum of elements in x. */
2042static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x);
2043/*! @abstract Maximum of elements in x. */
2044static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x);
2045/*! @abstract Maximum of elements in x. */
2046static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x);
2047/*! @abstract Maximum of elements in x. */
2048static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x);
2049/*! @abstract Maximum of elements in x. */
2050static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x);
2051/*! @abstract Maximum of elements in x. */
2052static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x);
2053/*! @abstract Maximum of elements in x. */
2054static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x);
2055/*! @abstract Maximum of elements in x. */
2056static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x);
2057/*! @abstract Maximum of elements in x.
2058 * @discussion Deprecated. Use simd_max(x) instead. */
2059#define vector_reduce_max simd_reduce_max
2060
2061/*! @abstract True if and only if each lane of x is equal to the
2062 * corresponding lane of y. */
2063static inline SIMD_CFUNC simd_bool simd_equal(simd_char2 x, simd_char2 y) {
2064 return simd_all(x == y);
2065}
2066/*! @abstract True if and only if each lane of x is equal to the
2067 * corresponding lane of y. */
2068static inline SIMD_CFUNC simd_bool simd_equal(simd_char3 x, simd_char3 y) {
2069 return simd_all(x == y);
2070}
2071/*! @abstract True if and only if each lane of x is equal to the
2072 * corresponding lane of y. */
2073static inline SIMD_CFUNC simd_bool simd_equal(simd_char4 x, simd_char4 y) {
2074 return simd_all(x == y);
2075}
2076/*! @abstract True if and only if each lane of x is equal to the
2077 * corresponding lane of y. */
2078static inline SIMD_CFUNC simd_bool simd_equal(simd_char8 x, simd_char8 y) {
2079 return simd_all(x == y);
2080}
2081/*! @abstract True if and only if each lane of x is equal to the
2082 * corresponding lane of y. */
2083static inline SIMD_CFUNC simd_bool simd_equal(simd_char16 x, simd_char16 y) {
2084 return simd_all(x == y);
2085}
2086/*! @abstract True if and only if each lane of x is equal to the
2087 * corresponding lane of y. */
2088static inline SIMD_CFUNC simd_bool simd_equal(simd_char32 x, simd_char32 y) {
2089 return simd_all(x == y);
2090}
2091/*! @abstract True if and only if each lane of x is equal to the
2092 * corresponding lane of y. */
2093static inline SIMD_CFUNC simd_bool simd_equal(simd_char64 x, simd_char64 y) {
2094 return simd_all(x == y);
2095}
2096/*! @abstract True if and only if each lane of x is equal to the
2097 * corresponding lane of y. */
2098static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar2 x, simd_uchar2 y) {
2099 return simd_all(x == y);
2100}
2101/*! @abstract True if and only if each lane of x is equal to the
2102 * corresponding lane of y. */
2103static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar3 x, simd_uchar3 y) {
2104 return simd_all(x == y);
2105}
2106/*! @abstract True if and only if each lane of x is equal to the
2107 * corresponding lane of y. */
2108static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar4 x, simd_uchar4 y) {
2109 return simd_all(x == y);
2110}
2111/*! @abstract True if and only if each lane of x is equal to the
2112 * corresponding lane of y. */
2113static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar8 x, simd_uchar8 y) {
2114 return simd_all(x == y);
2115}
2116/*! @abstract True if and only if each lane of x is equal to the
2117 * corresponding lane of y. */
2118static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar16 x, simd_uchar16 y) {
2119 return simd_all(x == y);
2120}
2121/*! @abstract True if and only if each lane of x is equal to the
2122 * corresponding lane of y. */
2123static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar32 x, simd_uchar32 y) {
2124 return simd_all(x == y);
2125}
2126/*! @abstract True if and only if each lane of x is equal to the
2127 * corresponding lane of y. */
2128static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar64 x, simd_uchar64 y) {
2129 return simd_all(x == y);
2130}
2131/*! @abstract True if and only if each lane of x is equal to the
2132 * corresponding lane of y. */
2133static inline SIMD_CFUNC simd_bool simd_equal(simd_short2 x, simd_short2 y) {
2134 return simd_all(x == y);
2135}
2136/*! @abstract True if and only if each lane of x is equal to the
2137 * corresponding lane of y. */
2138static inline SIMD_CFUNC simd_bool simd_equal(simd_short3 x, simd_short3 y) {
2139 return simd_all(x == y);
2140}
2141/*! @abstract True if and only if each lane of x is equal to the
2142 * corresponding lane of y. */
2143static inline SIMD_CFUNC simd_bool simd_equal(simd_short4 x, simd_short4 y) {
2144 return simd_all(x == y);
2145}
2146/*! @abstract True if and only if each lane of x is equal to the
2147 * corresponding lane of y. */
2148static inline SIMD_CFUNC simd_bool simd_equal(simd_short8 x, simd_short8 y) {
2149 return simd_all(x == y);
2150}
2151/*! @abstract True if and only if each lane of x is equal to the
2152 * corresponding lane of y. */
2153static inline SIMD_CFUNC simd_bool simd_equal(simd_short16 x, simd_short16 y) {
2154 return simd_all(x == y);
2155}
2156/*! @abstract True if and only if each lane of x is equal to the
2157 * corresponding lane of y. */
2158static inline SIMD_CFUNC simd_bool simd_equal(simd_short32 x, simd_short32 y) {
2159 return simd_all(x == y);
2160}
2161/*! @abstract True if and only if each lane of x is equal to the
2162 * corresponding lane of y. */
2163static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort2 x, simd_ushort2 y) {
2164 return simd_all(x == y);
2165}
2166/*! @abstract True if and only if each lane of x is equal to the
2167 * corresponding lane of y. */
2168static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort3 x, simd_ushort3 y) {
2169 return simd_all(x == y);
2170}
2171/*! @abstract True if and only if each lane of x is equal to the
2172 * corresponding lane of y. */
2173static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort4 x, simd_ushort4 y) {
2174 return simd_all(x == y);
2175}
2176/*! @abstract True if and only if each lane of x is equal to the
2177 * corresponding lane of y. */
2178static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort8 x, simd_ushort8 y) {
2179 return simd_all(x == y);
2180}
2181/*! @abstract True if and only if each lane of x is equal to the
2182 * corresponding lane of y. */
2183static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort16 x, simd_ushort16 y) {
2184 return simd_all(x == y);
2185}
2186/*! @abstract True if and only if each lane of x is equal to the
2187 * corresponding lane of y. */
2188static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort32 x, simd_ushort32 y) {
2189 return simd_all(x == y);
2190}
2191/*! @abstract True if and only if each lane of x is equal to the
2192 * corresponding lane of y. */
2193static inline SIMD_CFUNC simd_bool simd_equal(simd_half2 x, simd_half2 y) {
2194 return simd_all(x == y);
2195}
2196/*! @abstract True if and only if each lane of x is equal to the
2197 * corresponding lane of y. */
2198static inline SIMD_CFUNC simd_bool simd_equal(simd_half3 x, simd_half3 y) {
2199 return simd_all(x == y);
2200}
2201/*! @abstract True if and only if each lane of x is equal to the
2202 * corresponding lane of y. */
2203static inline SIMD_CFUNC simd_bool simd_equal(simd_half4 x, simd_half4 y) {
2204 return simd_all(x == y);
2205}
2206/*! @abstract True if and only if each lane of x is equal to the
2207 * corresponding lane of y. */
2208static inline SIMD_CFUNC simd_bool simd_equal(simd_half8 x, simd_half8 y) {
2209 return simd_all(x == y);
2210}
2211/*! @abstract True if and only if each lane of x is equal to the
2212 * corresponding lane of y. */
2213static inline SIMD_CFUNC simd_bool simd_equal(simd_half16 x, simd_half16 y) {
2214 return simd_all(x == y);
2215}
2216/*! @abstract True if and only if each lane of x is equal to the
2217 * corresponding lane of y. */
2218static inline SIMD_CFUNC simd_bool simd_equal(simd_half32 x, simd_half32 y) {
2219 return simd_all(x == y);
2220}
2221/*! @abstract True if and only if each lane of x is equal to the
2222 * corresponding lane of y. */
2223static inline SIMD_CFUNC simd_bool simd_equal(simd_int2 x, simd_int2 y) {
2224 return simd_all(x == y);
2225}
2226/*! @abstract True if and only if each lane of x is equal to the
2227 * corresponding lane of y. */
2228static inline SIMD_CFUNC simd_bool simd_equal(simd_int3 x, simd_int3 y) {
2229 return simd_all(x == y);
2230}
2231/*! @abstract True if and only if each lane of x is equal to the
2232 * corresponding lane of y. */
2233static inline SIMD_CFUNC simd_bool simd_equal(simd_int4 x, simd_int4 y) {
2234 return simd_all(x == y);
2235}
2236/*! @abstract True if and only if each lane of x is equal to the
2237 * corresponding lane of y. */
2238static inline SIMD_CFUNC simd_bool simd_equal(simd_int8 x, simd_int8 y) {
2239 return simd_all(x == y);
2240}
2241/*! @abstract True if and only if each lane of x is equal to the
2242 * corresponding lane of y. */
2243static inline SIMD_CFUNC simd_bool simd_equal(simd_int16 x, simd_int16 y) {
2244 return simd_all(x == y);
2245}
2246/*! @abstract True if and only if each lane of x is equal to the
2247 * corresponding lane of y. */
2248static inline SIMD_CFUNC simd_bool simd_equal(simd_uint2 x, simd_uint2 y) {
2249 return simd_all(x == y);
2250}
2251/*! @abstract True if and only if each lane of x is equal to the
2252 * corresponding lane of y. */
2253static inline SIMD_CFUNC simd_bool simd_equal(simd_uint3 x, simd_uint3 y) {
2254 return simd_all(x == y);
2255}
2256/*! @abstract True if and only if each lane of x is equal to the
2257 * corresponding lane of y. */
2258static inline SIMD_CFUNC simd_bool simd_equal(simd_uint4 x, simd_uint4 y) {
2259 return simd_all(x == y);
2260}
2261/*! @abstract True if and only if each lane of x is equal to the
2262 * corresponding lane of y. */
2263static inline SIMD_CFUNC simd_bool simd_equal(simd_uint8 x, simd_uint8 y) {
2264 return simd_all(x == y);
2265}
2266/*! @abstract True if and only if each lane of x is equal to the
2267 * corresponding lane of y. */
2268static inline SIMD_CFUNC simd_bool simd_equal(simd_uint16 x, simd_uint16 y) {
2269 return simd_all(x == y);
2270}
2271/*! @abstract True if and only if each lane of x is equal to the
2272 * corresponding lane of y. */
2273static inline SIMD_CFUNC simd_bool simd_equal(simd_float2 x, simd_float2 y) {
2274 return simd_all(x == y);
2275}
2276/*! @abstract True if and only if each lane of x is equal to the
2277 * corresponding lane of y. */
2278static inline SIMD_CFUNC simd_bool simd_equal(simd_float3 x, simd_float3 y) {
2279 return simd_all(x == y);
2280}
2281/*! @abstract True if and only if each lane of x is equal to the
2282 * corresponding lane of y. */
2283static inline SIMD_CFUNC simd_bool simd_equal(simd_float4 x, simd_float4 y) {
2284 return simd_all(x == y);
2285}
2286/*! @abstract True if and only if each lane of x is equal to the
2287 * corresponding lane of y. */
2288static inline SIMD_CFUNC simd_bool simd_equal(simd_float8 x, simd_float8 y) {
2289 return simd_all(x == y);
2290}
2291/*! @abstract True if and only if each lane of x is equal to the
2292 * corresponding lane of y. */
2293static inline SIMD_CFUNC simd_bool simd_equal(simd_float16 x, simd_float16 y) {
2294 return simd_all(x == y);
2295}
2296/*! @abstract True if and only if each lane of x is equal to the
2297 * corresponding lane of y. */
2298static inline SIMD_CFUNC simd_bool simd_equal(simd_long2 x, simd_long2 y) {
2299 return simd_all(x == y);
2300}
2301/*! @abstract True if and only if each lane of x is equal to the
2302 * corresponding lane of y. */
2303static inline SIMD_CFUNC simd_bool simd_equal(simd_long3 x, simd_long3 y) {
2304 return simd_all(x == y);
2305}
2306/*! @abstract True if and only if each lane of x is equal to the
2307 * corresponding lane of y. */
2308static inline SIMD_CFUNC simd_bool simd_equal(simd_long4 x, simd_long4 y) {
2309 return simd_all(x == y);
2310}
2311/*! @abstract True if and only if each lane of x is equal to the
2312 * corresponding lane of y. */
2313static inline SIMD_CFUNC simd_bool simd_equal(simd_long8 x, simd_long8 y) {
2314 return simd_all(x == y);
2315}
2316/*! @abstract True if and only if each lane of x is equal to the
2317 * corresponding lane of y. */
2318static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong2 x, simd_ulong2 y) {
2319 return simd_all(x == y);
2320}
2321/*! @abstract True if and only if each lane of x is equal to the
2322 * corresponding lane of y. */
2323static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong3 x, simd_ulong3 y) {
2324 return simd_all(x == y);
2325}
2326/*! @abstract True if and only if each lane of x is equal to the
2327 * corresponding lane of y. */
2328static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong4 x, simd_ulong4 y) {
2329 return simd_all(x == y);
2330}
2331/*! @abstract True if and only if each lane of x is equal to the
2332 * corresponding lane of y. */
2333static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong8 x, simd_ulong8 y) {
2334 return simd_all(x == y);
2335}
2336/*! @abstract True if and only if each lane of x is equal to the
2337 * corresponding lane of y. */
2338static inline SIMD_CFUNC simd_bool simd_equal(simd_double2 x, simd_double2 y) {
2339 return simd_all(x == y);
2340}
2341/*! @abstract True if and only if each lane of x is equal to the
2342 * corresponding lane of y. */
2343static inline SIMD_CFUNC simd_bool simd_equal(simd_double3 x, simd_double3 y) {
2344 return simd_all(x == y);
2345}
2346/*! @abstract True if and only if each lane of x is equal to the
2347 * corresponding lane of y. */
2348static inline SIMD_CFUNC simd_bool simd_equal(simd_double4 x, simd_double4 y) {
2349 return simd_all(x == y);
2350}
2351/*! @abstract True if and only if each lane of x is equal to the
2352 * corresponding lane of y. */
2353static inline SIMD_CFUNC simd_bool simd_equal(simd_double8 x, simd_double8 y) {
2354 return simd_all(x == y);
2355}
2356
2357#ifdef __cplusplus
2358} /* extern "C" */
2359
2360namespace simd {
2361 /*! @abstract The lanewise absolute value of x. */
2362 template <typename typeN> static SIMD_CPPFUNC typeN abs(const typeN x) { return ::simd_abs(x); }
2363 /*! @abstract The lanewise maximum of x and y. */
2364 template <typename typeN> static SIMD_CPPFUNC typeN max(const typeN x, const typeN y) { return ::simd_max(x,y); }
2365 /*! @abstract The lanewise minimum of x and y. */
2366 template <typename typeN> static SIMD_CPPFUNC typeN min(const typeN x, const typeN y) { return ::simd_min(x,y); }
2367 /*! @abstract x clamped to the interval [min, max]. */
2368 template <typename typeN> static SIMD_CPPFUNC typeN clamp(const typeN x, const typeN min, const typeN max) { return ::simd_clamp(x,min,max); }
2369 /*! @abstract -1 if x < 0, +1 if x > 0, and 0 otherwise. */
2370 template <typename fptypeN> static SIMD_CPPFUNC fptypeN sign(const fptypeN x) { return ::simd_sign(x); }
2371 /*! @abstract Linearly interpolates between x and y, taking the value x when t=0 and y when t=1 */
2372 template <typename fptypeN> static SIMD_CPPFUNC fptypeN mix(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); }
2373 template <typename fptypeN> static SIMD_CPPFUNC fptypeN lerp(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); }
2374 /*! @abstract An approximation to 1/x. */
2375 template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return simd_recip(x); }
2376 /*! @abstract An approximation to 1/sqrt(x). */
2377 template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return simd_rsqrt(x); }
2378 /*! @abstract The "fracional part" of x, in the range [0,1). */
2379 template <typename fptypeN> static SIMD_CPPFUNC fptypeN fract(const fptypeN x) { return ::simd_fract(x); }
2380 /*! @abstract 0 if x < edge, 1 otherwise. */
2381 template <typename fptypeN> static SIMD_CPPFUNC fptypeN step(const fptypeN edge, const fptypeN x) { return ::simd_step(edge,x); }
2382 /*! @abstract smoothly interpolates from 0 at edge0 to 1 at edge1. */
2383 template <typename fptypeN> static SIMD_CPPFUNC fptypeN smoothstep(const fptypeN edge0, const fptypeN edge1, const fptypeN x) { return ::simd_smoothstep(edge0,edge1,x); }
2384 /*! @abstract True if and only if each lane of x is equal to the
2385 * corresponding lane of y.
2386 *
2387 * @discussion This isn't operator== because that's already defined by
2388 * the compiler to return a lane mask. */
2389 template <typename fptypeN> static SIMD_CPPFUNC simd_bool equal(const fptypeN x, const fptypeN y) { return ::simd_equal(x, y); }
2390#if __cpp_decltype_auto
2391 /* If you are targeting an earlier version of the C++ standard that lacks
2392 decltype_auto support, you may use the C-style simd_reduce_* functions
2393 instead. */
2394 /*! @abstract The sum of the elements in x. May overflow. */
2395 template <typename typeN> static SIMD_CPPFUNC auto reduce_add(typeN x) { return ::simd_reduce_add(x); }
2396 /*! @abstract The least element in x. */
2397 template <typename typeN> static SIMD_CPPFUNC auto reduce_min(typeN x) { return ::simd_reduce_min(x); }
2398 /*! @abstract The greatest element in x. */
2399 template <typename typeN> static SIMD_CPPFUNC auto reduce_max(typeN x) { return ::simd_reduce_max(x); }
2400#endif
2401 namespace precise {
2402 /*! @abstract An approximation to 1/x. */
2403 template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_precise_recip(x); }
2404 /*! @abstract An approximation to 1/sqrt(x). */
2405 template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_precise_rsqrt(x); }
2406 }
2407 namespace fast {
2408 /*! @abstract An approximation to 1/x. */
2409 template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_fast_recip(x); }
2410 /*! @abstract An approximation to 1/sqrt(x). */
2411 template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_fast_rsqrt(x); }
2412 }
2413}
2414
2415extern "C" {
2416#endif /* __cplusplus */
2417
2418#pragma mark - Implementation
2419
2420static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x) {
2421 return simd_make_char2(simd_abs(simd_make_char8_undef(x)));
2422}
2423
2424static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x) {
2425 return simd_make_char3(simd_abs(simd_make_char8_undef(x)));
2426}
2427
2428static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x) {
2429 return simd_make_char4(simd_abs(simd_make_char8_undef(x)));
2430}
2431
2432static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x) {
2433#if defined __arm__ || defined __arm64__ || defined __aarch64__
2434 return vabs_s8(x);
2435#else
2436 return simd_make_char8(simd_abs(simd_make_char16_undef(x)));
2437#endif
2438}
2439
2440static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x) {
2441#if defined __arm__ || defined __arm64__ || defined __aarch64__
2442 return vabsq_s8(x);
2443#elif defined __SSE4_1__
2444 return (simd_char16) _mm_abs_epi8((__m128i)x);
2445#else
2446 simd_char16 mask = x >> 7; return (x ^ mask) - mask;
2447#endif
2448}
2449
2450static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x) {
2451#if defined __AVX2__
2452 return _mm256_abs_epi8(x);
2453#else
2454 return simd_make_char32(simd_abs(x.lo), simd_abs(x.hi));
2455#endif
2456}
2457
2458static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x) {
2459#if defined __AVX512BW__
2460 return _mm512_abs_epi8(x);
2461#else
2462 return simd_make_char64(simd_abs(x.lo), simd_abs(x.hi));
2463#endif
2464}
2465
2466static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x) {
2467 return simd_make_short2(simd_abs(simd_make_short4_undef(x)));
2468}
2469
2470static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x) {
2471 return simd_make_short3(simd_abs(simd_make_short4_undef(x)));
2472}
2473
2474static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x) {
2475#if defined __arm__ || defined __arm64__ || defined __aarch64__
2476 return vabs_s16(x);
2477#else
2478 return simd_make_short4(simd_abs(simd_make_short8_undef(x)));
2479#endif
2480}
2481
2482static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x) {
2483#if defined __arm__ || defined __arm64__ || defined __aarch64__
2484 return vabsq_s16(x);
2485#elif defined __SSE4_1__
2486 return (simd_short8) _mm_abs_epi16((__m128i)x);
2487#else
2488 simd_short8 mask = x >> 15; return (x ^ mask) - mask;
2489#endif
2490}
2491
2492static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x) {
2493#if defined __AVX2__
2494 return _mm256_abs_epi16(x);
2495#else
2496 return simd_make_short16(simd_abs(x.lo), simd_abs(x.hi));
2497#endif
2498}
2499
2500static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x) {
2501#if defined __AVX512BW__
2502 return _mm512_abs_epi16(x);
2503#else
2504 return simd_make_short32(simd_abs(x.lo), simd_abs(x.hi));
2505#endif
2506}
2507
2508static inline SIMD_CFUNC simd_half2 simd_abs(simd_half2 x) {
2509 return __tg_fabs(x);
2510}
2511
2512static inline SIMD_CFUNC simd_half3 simd_abs(simd_half3 x) {
2513 return __tg_fabs(x);
2514}
2515
2516static inline SIMD_CFUNC simd_half4 simd_abs(simd_half4 x) {
2517 return __tg_fabs(x);
2518}
2519
2520static inline SIMD_CFUNC simd_half8 simd_abs(simd_half8 x) {
2521 return __tg_fabs(x);
2522}
2523
2524static inline SIMD_CFUNC simd_half16 simd_abs(simd_half16 x) {
2525 return __tg_fabs(x);
2526}
2527
2528static inline SIMD_CFUNC simd_half32 simd_abs(simd_half32 x) {
2529 return __tg_fabs(x);
2530}
2531
2532static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x) {
2533#if defined __arm__ || defined __arm64__ || defined __aarch64__
2534 return vabs_s32(x);
2535#else
2536 return simd_make_int2(simd_abs(simd_make_int4_undef(x)));
2537#endif
2538}
2539
2540static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x) {
2541 return simd_make_int3(simd_abs(simd_make_int4_undef(x)));
2542}
2543
2544static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x) {
2545#if defined __arm__ || defined __arm64__ || defined __aarch64__
2546 return vabsq_s32(x);
2547#elif defined __SSE4_1__
2548 return (simd_int4) _mm_abs_epi32((__m128i)x);
2549#else
2550 simd_int4 mask = x >> 31; return (x ^ mask) - mask;
2551#endif
2552}
2553
2554static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x) {
2555#if defined __AVX2__
2556 return _mm256_abs_epi32(x);
2557#else
2558 return simd_make_int8(simd_abs(x.lo), simd_abs(x.hi));
2559#endif
2560}
2561
2562static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x) {
2563#if defined __AVX512F__
2564 return _mm512_abs_epi32(x);
2565#else
2566 return simd_make_int16(simd_abs(x.lo), simd_abs(x.hi));
2567#endif
2568}
2569
2570static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x) {
2571 return __tg_fabs(x);
2572}
2573
2574static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x) {
2575 return __tg_fabs(x);
2576}
2577
2578static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x) {
2579 return __tg_fabs(x);
2580}
2581
2582static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x) {
2583 return __tg_fabs(x);
2584}
2585
2586static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x) {
2587 return __tg_fabs(x);
2588}
2589
2590static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x) {
2591#if defined __arm64__ || defined __aarch64__
2592 return vabsq_s64(x);
2593#elif defined __AVX512VL__
2594 return (simd_long2) _mm_abs_epi64((__m128i)x);
2595#else
2596 simd_long2 mask = x >> 63; return (x ^ mask) - mask;
2597#endif
2598}
2599
2600static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x) {
2601 return simd_make_long3(simd_abs(simd_make_long4_undef(x)));
2602}
2603
2604static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x) {
2605#if defined __AVX512VL__
2606 return _mm256_abs_epi64(x);
2607#else
2608 return simd_make_long4(simd_abs(x.lo), simd_abs(x.hi));
2609#endif
2610}
2611
2612static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x) {
2613#if defined __AVX512F__
2614 return _mm512_abs_epi64(x);
2615#else
2616 return simd_make_long8(simd_abs(x.lo), simd_abs(x.hi));
2617#endif
2618}
2619
2620static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x) {
2621 return __tg_fabs(x);
2622}
2623
2624static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x) {
2625 return __tg_fabs(x);
2626}
2627
2628static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x) {
2629 return __tg_fabs(x);
2630}
2631
2632static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x) {
2633 return __tg_fabs(x);
2634}
2635
2636static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y) {
2637 return simd_make_char2(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
2638}
2639
2640static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y) {
2641 return simd_make_char3(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
2642}
2643
2644static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y) {
2645 return simd_make_char4(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
2646}
2647
2648static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y) {
2649#if defined __arm__ || defined __arm64__ || defined __aarch64__
2650 return vmin_s8(x, y);
2651#else
2652 return simd_make_char8(simd_min(simd_make_char16_undef(x), simd_make_char16_undef(y)));
2653#endif
2654
2655}
2656
2657static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y) {
2658#if defined __arm__ || defined __arm64__ || defined __aarch64__
2659 return vminq_s8(x, y);
2660#elif defined __SSE4_1__
2661 return (simd_char16) _mm_min_epi8((__m128i)x, (__m128i)y);
2662#else
2663 return simd_bitselect(x, y, y < x);
2664#endif
2665}
2666
2667static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y) {
2668#if defined __AVX2__
2669 return _mm256_min_epi8(x, y);
2670#else
2671 return simd_bitselect(x, y, y < x);
2672#endif
2673}
2674
2675static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y) {
2676#if defined __AVX512BW__
2677 return _mm512_min_epi8(x, y);
2678#else
2679 return simd_bitselect(x, y, y < x);
2680#endif
2681}
2682
2683static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y) {
2684 return simd_make_uchar2(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
2685}
2686
2687static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y) {
2688 return simd_make_uchar3(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
2689}
2690
2691static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y) {
2692 return simd_make_uchar4(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
2693}
2694
2695static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y) {
2696#if defined __arm__ || defined __arm64__ || defined __aarch64__
2697 return vmin_u8(x, y);
2698#else
2699 return simd_make_uchar8(simd_min(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y)));
2700#endif
2701
2702}
2703
2704static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y) {
2705#if defined __arm__ || defined __arm64__ || defined __aarch64__
2706 return vminq_u8(x, y);
2707#elif defined __SSE4_1__
2708 return (simd_uchar16) _mm_min_epu8((__m128i)x, (__m128i)y);
2709#else
2710 return simd_bitselect(x, y, y < x);
2711#endif
2712}
2713
2714static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y) {
2715#if defined __AVX2__
2716 return _mm256_min_epu8(x, y);
2717#else
2718 return simd_bitselect(x, y, y < x);
2719#endif
2720}
2721
2722static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y) {
2723#if defined __AVX512BW__
2724 return _mm512_min_epu8(x, y);
2725#else
2726 return simd_bitselect(x, y, y < x);
2727#endif
2728}
2729
2730static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y) {
2731 return simd_make_short2(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y)));
2732}
2733
2734static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y) {
2735 return simd_make_short3(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y)));
2736}
2737
2738static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y) {
2739#if defined __arm__ || defined __arm64__ || defined __aarch64__
2740 return vmin_s16(x, y);
2741#else
2742 return simd_make_short4(simd_min(simd_make_short8_undef(x), simd_make_short8_undef(y)));
2743#endif
2744
2745}
2746
2747static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y) {
2748#if defined __arm__ || defined __arm64__ || defined __aarch64__
2749 return vminq_s16(x, y);
2750#elif defined __SSE4_1__
2751 return (simd_short8) _mm_min_epi16((__m128i)x, (__m128i)y);
2752#else
2753 return simd_bitselect(x, y, y < x);
2754#endif
2755}
2756
2757static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y) {
2758#if defined __AVX2__
2759 return _mm256_min_epi16(x, y);
2760#else
2761 return simd_bitselect(x, y, y < x);
2762#endif
2763}
2764
2765static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y) {
2766#if defined __AVX512BW__
2767 return _mm512_min_epi16(x, y);
2768#else
2769 return simd_bitselect(x, y, y < x);
2770#endif
2771}
2772
2773static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y) {
2774 return simd_make_ushort2(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
2775}
2776
2777static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y) {
2778 return simd_make_ushort3(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
2779}
2780
2781static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y) {
2782#if defined __arm__ || defined __arm64__ || defined __aarch64__
2783 return vmin_u16(x, y);
2784#else
2785 return simd_make_ushort4(simd_min(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y)));
2786#endif
2787
2788}
2789
2790static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y) {
2791#if defined __arm__ || defined __arm64__ || defined __aarch64__
2792 return vminq_u16(x, y);
2793#elif defined __SSE4_1__
2794 return (simd_ushort8) _mm_min_epu16((__m128i)x, (__m128i)y);
2795#else
2796 return simd_bitselect(x, y, y < x);
2797#endif
2798}
2799
2800static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y) {
2801#if defined __AVX2__
2802 return _mm256_min_epu16(x, y);
2803#else
2804 return simd_bitselect(x, y, y < x);
2805#endif
2806}
2807
2808static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y) {
2809#if defined __AVX512BW__
2810 return _mm512_min_epu16(x, y);
2811#else
2812 return simd_bitselect(x, y, y < x);
2813#endif
2814}
2815
2816static inline SIMD_CFUNC _Float16 simd_min(_Float16 x, _Float16 y) {
2817 return __fminf16(x,y);
2818}
2819
2820static inline SIMD_CFUNC simd_half2 simd_min(simd_half2 x, simd_half2 y) {
2821 return __tg_fmin(x,y);
2822}
2823
2824static inline SIMD_CFUNC simd_half3 simd_min(simd_half3 x, simd_half3 y) {
2825 return __tg_fmin(x,y);
2826}
2827
2828static inline SIMD_CFUNC simd_half4 simd_min(simd_half4 x, simd_half4 y) {
2829 return __tg_fmin(x,y);
2830}
2831
2832static inline SIMD_CFUNC simd_half8 simd_min(simd_half8 x, simd_half8 y) {
2833 return __tg_fmin(x,y);
2834}
2835
2836static inline SIMD_CFUNC simd_half16 simd_min(simd_half16 x, simd_half16 y) {
2837 return __tg_fmin(x,y);
2838}
2839
2840static inline SIMD_CFUNC simd_half32 simd_min(simd_half32 x, simd_half32 y) {
2841 return __tg_fmin(x,y);
2842}
2843
2844static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y) {
2845#if defined __arm__ || defined __arm64__ || defined __aarch64__
2846 return vmin_s32(x, y);
2847#else
2848 return simd_make_int2(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y)));
2849#endif
2850
2851}
2852
2853static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y) {
2854 return simd_make_int3(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y)));
2855}
2856
2857static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y) {
2858#if defined __arm__ || defined __arm64__ || defined __aarch64__
2859 return vminq_s32(x, y);
2860#elif defined __SSE4_1__
2861 return (simd_int4) _mm_min_epi32((__m128i)x, (__m128i)y);
2862#else
2863 return simd_bitselect(x, y, y < x);
2864#endif
2865}
2866
2867static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y) {
2868#if defined __AVX2__
2869 return _mm256_min_epi32(x, y);
2870#else
2871 return simd_bitselect(x, y, y < x);
2872#endif
2873}
2874
2875static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y) {
2876#if defined __AVX512F__
2877 return _mm512_min_epi32(x, y);
2878#else
2879 return simd_bitselect(x, y, y < x);
2880#endif
2881}
2882
2883static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y) {
2884#if defined __arm__ || defined __arm64__ || defined __aarch64__
2885 return vmin_u32(x, y);
2886#else
2887 return simd_make_uint2(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
2888#endif
2889
2890}
2891
2892static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y) {
2893 return simd_make_uint3(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
2894}
2895
2896static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y) {
2897#if defined __arm__ || defined __arm64__ || defined __aarch64__
2898 return vminq_u32(x, y);
2899#elif defined __SSE4_1__
2900 return (simd_uint4) _mm_min_epu32((__m128i)x, (__m128i)y);
2901#else
2902 return simd_bitselect(x, y, y < x);
2903#endif
2904}
2905
2906static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y) {
2907#if defined __AVX2__
2908 return _mm256_min_epu32(x, y);
2909#else
2910 return simd_bitselect(x, y, y < x);
2911#endif
2912}
2913
2914static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y) {
2915#if defined __AVX512F__
2916 return _mm512_min_epu32(x, y);
2917#else
2918 return simd_bitselect(x, y, y < x);
2919#endif
2920}
2921
2922static inline SIMD_CFUNC float simd_min(float x, float y) {
2923 return __tg_fmin(x,y);
2924}
2925
2926static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y) {
2927 return __tg_fmin(x,y);
2928}
2929
2930static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y) {
2931 return __tg_fmin(x,y);
2932}
2933
2934static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y) {
2935 return __tg_fmin(x,y);
2936}
2937
2938static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y) {
2939 return __tg_fmin(x,y);
2940}
2941
2942static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y) {
2943 return __tg_fmin(x,y);
2944}
2945
2946static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y) {
2947#if defined __AVX512VL__
2948 return _mm_min_epi64(x, y);
2949#else
2950 return simd_bitselect(x, y, y < x);
2951#endif
2952}
2953
2954static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y) {
2955 return simd_make_long3(simd_min(simd_make_long4_undef(x), simd_make_long4_undef(y)));
2956}
2957
2958static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y) {
2959#if defined __AVX512VL__
2960 return _mm256_min_epi64(x, y);
2961#else
2962 return simd_bitselect(x, y, y < x);
2963#endif
2964}
2965
2966static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y) {
2967#if defined __AVX512F__
2968 return _mm512_min_epi64(x, y);
2969#else
2970 return simd_bitselect(x, y, y < x);
2971#endif
2972}
2973
2974static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y) {
2975#if defined __AVX512VL__
2976 return _mm_min_epu64(x, y);
2977#else
2978 return simd_bitselect(x, y, y < x);
2979#endif
2980}
2981
2982static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y) {
2983 return simd_make_ulong3(simd_min(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y)));
2984}
2985
2986static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y) {
2987#if defined __AVX512VL__
2988 return _mm256_min_epu64(x, y);
2989#else
2990 return simd_bitselect(x, y, y < x);
2991#endif
2992}
2993
2994static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y) {
2995#if defined __AVX512F__
2996 return _mm512_min_epu64(x, y);
2997#else
2998 return simd_bitselect(x, y, y < x);
2999#endif
3000}
3001
3002static inline SIMD_CFUNC double simd_min(double x, double y) {
3003 return __tg_fmin(x,y);
3004}
3005
3006static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y) {
3007 return __tg_fmin(x,y);
3008}
3009
3010static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y) {
3011 return __tg_fmin(x,y);
3012}
3013
3014static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y) {
3015 return __tg_fmin(x,y);
3016}
3017
3018static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y) {
3019 return __tg_fmin(x,y);
3020}
3021
3022static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y) {
3023 return simd_make_char2(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
3024}
3025
3026static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y) {
3027 return simd_make_char3(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
3028}
3029
3030static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y) {
3031 return simd_make_char4(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
3032}
3033
3034static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y) {
3035#if defined __arm__ || defined __arm64__ || defined __aarch64__
3036 return vmax_s8(x, y);
3037#else
3038 return simd_make_char8(simd_max(simd_make_char16_undef(x), simd_make_char16_undef(y)));
3039#endif
3040
3041}
3042
3043static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y) {
3044#if defined __arm__ || defined __arm64__ || defined __aarch64__
3045 return vmaxq_s8(x, y);
3046#elif defined __SSE4_1__
3047 return (simd_char16) _mm_max_epi8((__m128i)x, (__m128i)y);
3048#else
3049 return simd_bitselect(x, y, x < y);
3050#endif
3051}
3052
3053static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y) {
3054#if defined __AVX2__
3055 return _mm256_max_epi8(x, y);
3056#else
3057 return simd_bitselect(x, y, x < y);
3058#endif
3059}
3060
3061static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y) {
3062#if defined __AVX512BW__
3063 return _mm512_max_epi8(x, y);
3064#else
3065 return simd_bitselect(x, y, x < y);
3066#endif
3067}
3068
3069static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y) {
3070 return simd_make_uchar2(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
3071}
3072
3073static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y) {
3074 return simd_make_uchar3(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
3075}
3076
3077static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y) {
3078 return simd_make_uchar4(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
3079}
3080
3081static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y) {
3082#if defined __arm__ || defined __arm64__ || defined __aarch64__
3083 return vmax_u8(x, y);
3084#else
3085 return simd_make_uchar8(simd_max(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y)));
3086#endif
3087
3088}
3089
3090static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y) {
3091#if defined __arm__ || defined __arm64__ || defined __aarch64__
3092 return vmaxq_u8(x, y);
3093#elif defined __SSE4_1__
3094 return (simd_uchar16) _mm_max_epu8((__m128i)x, (__m128i)y);
3095#else
3096 return simd_bitselect(x, y, x < y);
3097#endif
3098}
3099
3100static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y) {
3101#if defined __AVX2__
3102 return _mm256_max_epu8(x, y);
3103#else
3104 return simd_bitselect(x, y, x < y);
3105#endif
3106}
3107
3108static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y) {
3109#if defined __AVX512BW__
3110 return _mm512_max_epu8(x, y);
3111#else
3112 return simd_bitselect(x, y, x < y);
3113#endif
3114}
3115
3116static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y) {
3117 return simd_make_short2(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y)));
3118}
3119
3120static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y) {
3121 return simd_make_short3(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y)));
3122}
3123
3124static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y) {
3125#if defined __arm__ || defined __arm64__ || defined __aarch64__
3126 return vmax_s16(x, y);
3127#else
3128 return simd_make_short4(simd_max(simd_make_short8_undef(x), simd_make_short8_undef(y)));
3129#endif
3130
3131}
3132
3133static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y) {
3134#if defined __arm__ || defined __arm64__ || defined __aarch64__
3135 return vmaxq_s16(x, y);
3136#elif defined __SSE4_1__
3137 return (simd_short8) _mm_max_epi16((__m128i)x, (__m128i)y);
3138#else
3139 return simd_bitselect(x, y, x < y);
3140#endif
3141}
3142
3143static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y) {
3144#if defined __AVX2__
3145 return _mm256_max_epi16(x, y);
3146#else
3147 return simd_bitselect(x, y, x < y);
3148#endif
3149}
3150
3151static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y) {
3152#if defined __AVX512BW__
3153 return _mm512_max_epi16(x, y);
3154#else
3155 return simd_bitselect(x, y, x < y);
3156#endif
3157}
3158
3159static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y) {
3160 return simd_make_ushort2(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
3161}
3162
3163static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y) {
3164 return simd_make_ushort3(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
3165}
3166
3167static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y) {
3168#if defined __arm__ || defined __arm64__ || defined __aarch64__
3169 return vmax_u16(x, y);
3170#else
3171 return simd_make_ushort4(simd_max(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y)));
3172#endif
3173
3174}
3175
3176static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y) {
3177#if defined __arm__ || defined __arm64__ || defined __aarch64__
3178 return vmaxq_u16(x, y);
3179#elif defined __SSE4_1__
3180 return (simd_ushort8) _mm_max_epu16((__m128i)x, (__m128i)y);
3181#else
3182 return simd_bitselect(x, y, x < y);
3183#endif
3184}
3185
3186static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y) {
3187#if defined __AVX2__
3188 return _mm256_max_epu16(x, y);
3189#else
3190 return simd_bitselect(x, y, x < y);
3191#endif
3192}
3193
3194static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y) {
3195#if defined __AVX512BW__
3196 return _mm512_max_epu16(x, y);
3197#else
3198 return simd_bitselect(x, y, x < y);
3199#endif
3200}
3201
3202static inline SIMD_CFUNC _Float16 simd_max(_Float16 x, _Float16 y) {
3203 return __fmaxf16(x,y);
3204}
3205
3206static inline SIMD_CFUNC simd_half2 simd_max(simd_half2 x, simd_half2 y) {
3207 return __tg_fmax(x,y);
3208}
3209
3210static inline SIMD_CFUNC simd_half3 simd_max(simd_half3 x, simd_half3 y) {
3211 return __tg_fmax(x,y);
3212}
3213
3214static inline SIMD_CFUNC simd_half4 simd_max(simd_half4 x, simd_half4 y) {
3215 return __tg_fmax(x,y);
3216}
3217
3218static inline SIMD_CFUNC simd_half8 simd_max(simd_half8 x, simd_half8 y) {
3219 return __tg_fmax(x,y);
3220}
3221
3222static inline SIMD_CFUNC simd_half16 simd_max(simd_half16 x, simd_half16 y) {
3223 return __tg_fmax(x,y);
3224}
3225
3226static inline SIMD_CFUNC simd_half32 simd_max(simd_half32 x, simd_half32 y) {
3227 return __tg_fmax(x,y);
3228}
3229
3230static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y) {
3231#if defined __arm__ || defined __arm64__ || defined __aarch64__
3232 return vmax_s32(x, y);
3233#else
3234 return simd_make_int2(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y)));
3235#endif
3236
3237}
3238
3239static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y) {
3240 return simd_make_int3(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y)));
3241}
3242
3243static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y) {
3244#if defined __arm__ || defined __arm64__ || defined __aarch64__
3245 return vmaxq_s32(x, y);
3246#elif defined __SSE4_1__
3247 return (simd_int4) _mm_max_epi32((__m128i)x, (__m128i)y);
3248#else
3249 return simd_bitselect(x, y, x < y);
3250#endif
3251}
3252
3253static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y) {
3254#if defined __AVX2__
3255 return _mm256_max_epi32(x, y);
3256#else
3257 return simd_bitselect(x, y, x < y);
3258#endif
3259}
3260
3261static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y) {
3262#if defined __AVX512F__
3263 return _mm512_max_epi32(x, y);
3264#else
3265 return simd_bitselect(x, y, x < y);
3266#endif
3267}
3268
3269static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y) {
3270#if defined __arm__ || defined __arm64__ || defined __aarch64__
3271 return vmax_u32(x, y);
3272#else
3273 return simd_make_uint2(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
3274#endif
3275
3276}
3277
3278static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y) {
3279 return simd_make_uint3(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
3280}
3281
3282static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y) {
3283#if defined __arm__ || defined __arm64__ || defined __aarch64__
3284 return vmaxq_u32(x, y);
3285#elif defined __SSE4_1__
3286 return (simd_uint4) _mm_max_epu32((__m128i)x, (__m128i)y);
3287#else
3288 return simd_bitselect(x, y, x < y);
3289#endif
3290}
3291
3292static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y) {
3293#if defined __AVX2__
3294 return _mm256_max_epu32(x, y);
3295#else
3296 return simd_bitselect(x, y, x < y);
3297#endif
3298}
3299
3300static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y) {
3301#if defined __AVX512F__
3302 return _mm512_max_epu32(x, y);
3303#else
3304 return simd_bitselect(x, y, x < y);
3305#endif
3306}
3307
3308static inline SIMD_CFUNC float simd_max(float x, float y) {
3309 return __tg_fmax(x,y);
3310}
3311
3312static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y) {
3313 return __tg_fmax(x,y);
3314}
3315
3316static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y) {
3317 return __tg_fmax(x,y);
3318}
3319
3320static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y) {
3321 return __tg_fmax(x,y);
3322}
3323
3324static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y) {
3325 return __tg_fmax(x,y);
3326}
3327
3328static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y) {
3329 return __tg_fmax(x,y);
3330}
3331
3332static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y) {
3333#if defined __AVX512VL__
3334 return _mm_max_epi64(x, y);
3335#else
3336 return simd_bitselect(x, y, x < y);
3337#endif
3338}
3339
3340static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y) {
3341 return simd_make_long3(simd_max(simd_make_long4_undef(x), simd_make_long4_undef(y)));
3342}
3343
3344static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y) {
3345#if defined __AVX512VL__
3346 return _mm256_max_epi64(x, y);
3347#else
3348 return simd_bitselect(x, y, x < y);
3349#endif
3350}
3351
3352static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y) {
3353#if defined __AVX512F__
3354 return _mm512_max_epi64(x, y);
3355#else
3356 return simd_bitselect(x, y, x < y);
3357#endif
3358}
3359
3360static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y) {
3361#if defined __AVX512VL__
3362 return _mm_max_epu64(x, y);
3363#else
3364 return simd_bitselect(x, y, x < y);
3365#endif
3366}
3367
3368static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y) {
3369 return simd_make_ulong3(simd_max(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y)));
3370}
3371
3372static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y) {
3373#if defined __AVX512VL__
3374 return _mm256_max_epu64(x, y);
3375#else
3376 return simd_bitselect(x, y, x < y);
3377#endif
3378}
3379
3380static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y) {
3381#if defined __AVX512F__
3382 return _mm512_max_epu64(x, y);
3383#else
3384 return simd_bitselect(x, y, x < y);
3385#endif
3386}
3387
3388static inline SIMD_CFUNC double simd_max(double x, double y) {
3389 return __tg_fmax(x,y);
3390}
3391
3392static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y) {
3393 return __tg_fmax(x,y);
3394}
3395
3396static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y) {
3397 return __tg_fmax(x,y);
3398}
3399
3400static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y) {
3401 return __tg_fmax(x,y);
3402}
3403
3404static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y) {
3405 return __tg_fmax(x,y);
3406}
3407
3408static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max) {
3409 return simd_min(simd_max(x, min), max);
3410}
3411
3412static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max) {
3413 return simd_min(simd_max(x, min), max);
3414}
3415
3416static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max) {
3417 return simd_min(simd_max(x, min), max);
3418}
3419
3420static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max) {
3421 return simd_min(simd_max(x, min), max);
3422}
3423
3424static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max) {
3425 return simd_min(simd_max(x, min), max);
3426}
3427
3428static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max) {
3429 return simd_min(simd_max(x, min), max);
3430}
3431
3432static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max) {
3433 return simd_min(simd_max(x, min), max);
3434}
3435
3436static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max) {
3437 return simd_min(simd_max(x, min), max);
3438}
3439
3440static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max) {
3441 return simd_min(simd_max(x, min), max);
3442}
3443
3444static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max) {
3445 return simd_min(simd_max(x, min), max);
3446}
3447
3448static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max) {
3449 return simd_min(simd_max(x, min), max);
3450}
3451
3452static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max) {
3453 return simd_min(simd_max(x, min), max);
3454}
3455
3456static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max) {
3457 return simd_min(simd_max(x, min), max);
3458}
3459
3460static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max) {
3461 return simd_min(simd_max(x, min), max);
3462}
3463
3464static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max) {
3465 return simd_min(simd_max(x, min), max);
3466}
3467
3468static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max) {
3469 return simd_min(simd_max(x, min), max);
3470}
3471
3472static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max) {
3473 return simd_min(simd_max(x, min), max);
3474}
3475
3476static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max) {
3477 return simd_min(simd_max(x, min), max);
3478}
3479
3480static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max) {
3481 return simd_min(simd_max(x, min), max);
3482}
3483
3484static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max) {
3485 return simd_min(simd_max(x, min), max);
3486}
3487
3488static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max) {
3489 return simd_min(simd_max(x, min), max);
3490}
3491
3492static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max) {
3493 return simd_min(simd_max(x, min), max);
3494}
3495
3496static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max) {
3497 return simd_min(simd_max(x, min), max);
3498}
3499
3500static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max) {
3501 return simd_min(simd_max(x, min), max);
3502}
3503
3504static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max) {
3505 return simd_min(simd_max(x, min), max);
3506}
3507
3508static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max) {
3509 return simd_min(simd_max(x, min), max);
3510}
3511
3512static inline SIMD_CFUNC _Float16 simd_clamp(_Float16 x, _Float16 min, _Float16 max) {
3513 return simd_min(simd_max(x, min), max);
3514}
3515
3516static inline SIMD_CFUNC simd_half2 simd_clamp(simd_half2 x, simd_half2 min, simd_half2 max) {
3517 return simd_min(simd_max(x, min), max);
3518}
3519
3520static inline SIMD_CFUNC simd_half3 simd_clamp(simd_half3 x, simd_half3 min, simd_half3 max) {
3521 return simd_min(simd_max(x, min), max);
3522}
3523
3524static inline SIMD_CFUNC simd_half4 simd_clamp(simd_half4 x, simd_half4 min, simd_half4 max) {
3525 return simd_min(simd_max(x, min), max);
3526}
3527
3528static inline SIMD_CFUNC simd_half8 simd_clamp(simd_half8 x, simd_half8 min, simd_half8 max) {
3529 return simd_min(simd_max(x, min), max);
3530}
3531
3532static inline SIMD_CFUNC simd_half16 simd_clamp(simd_half16 x, simd_half16 min, simd_half16 max) {
3533 return simd_min(simd_max(x, min), max);
3534}
3535
3536static inline SIMD_CFUNC simd_half32 simd_clamp(simd_half32 x, simd_half32 min, simd_half32 max) {
3537 return simd_min(simd_max(x, min), max);
3538}
3539
3540static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max) {
3541 return simd_min(simd_max(x, min), max);
3542}
3543
3544static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max) {
3545 return simd_min(simd_max(x, min), max);
3546}
3547
3548static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max) {
3549 return simd_min(simd_max(x, min), max);
3550}
3551
3552static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max) {
3553 return simd_min(simd_max(x, min), max);
3554}
3555
3556static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max) {
3557 return simd_min(simd_max(x, min), max);
3558}
3559
3560static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max) {
3561 return simd_min(simd_max(x, min), max);
3562}
3563
3564static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max) {
3565 return simd_min(simd_max(x, min), max);
3566}
3567
3568static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max) {
3569 return simd_min(simd_max(x, min), max);
3570}
3571
3572static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max) {
3573 return simd_min(simd_max(x, min), max);
3574}
3575
3576static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max) {
3577 return simd_min(simd_max(x, min), max);
3578}
3579
3580static inline SIMD_CFUNC float simd_clamp(float x, float min, float max) {
3581 return simd_min(simd_max(x, min), max);
3582}
3583
3584static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max) {
3585 return simd_min(simd_max(x, min), max);
3586}
3587
3588static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max) {
3589 return simd_min(simd_max(x, min), max);
3590}
3591
3592static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max) {
3593 return simd_min(simd_max(x, min), max);
3594}
3595
3596static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max) {
3597 return simd_min(simd_max(x, min), max);
3598}
3599
3600static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max) {
3601 return simd_min(simd_max(x, min), max);
3602}
3603
3604static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max) {
3605 return simd_min(simd_max(x, min), max);
3606}
3607
3608static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max) {
3609 return simd_min(simd_max(x, min), max);
3610}
3611
3612static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max) {
3613 return simd_min(simd_max(x, min), max);
3614}
3615
3616static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max) {
3617 return simd_min(simd_max(x, min), max);
3618}
3619
3620static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max) {
3621 return simd_min(simd_max(x, min), max);
3622}
3623
3624static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max) {
3625 return simd_min(simd_max(x, min), max);
3626}
3627
3628static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max) {
3629 return simd_min(simd_max(x, min), max);
3630}
3631
3632static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max) {
3633 return simd_min(simd_max(x, min), max);
3634}
3635
3636static inline SIMD_CFUNC double simd_clamp(double x, double min, double max) {
3637 return simd_min(simd_max(x, min), max);
3638}
3639
3640static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max) {
3641 return simd_min(simd_max(x, min), max);
3642}
3643
3644static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max) {
3645 return simd_min(simd_max(x, min), max);
3646}
3647
3648static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max) {
3649 return simd_min(simd_max(x, min), max);
3650}
3651
3652static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max) {
3653 return simd_min(simd_max(x, min), max);
3654}
3655
3656
3657static inline SIMD_CFUNC _Float16 simd_sign(_Float16 x) {
3658 return (x == 0 | x != x) ? 0 : __copysignf16(1,x);
3659}
3660
3661static inline SIMD_CFUNC simd_half2 simd_sign(simd_half2 x) {
3662 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3663}
3664
3665static inline SIMD_CFUNC simd_half3 simd_sign(simd_half3 x) {
3666 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3667}
3668
3669static inline SIMD_CFUNC simd_half4 simd_sign(simd_half4 x) {
3670 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3671}
3672
3673static inline SIMD_CFUNC simd_half8 simd_sign(simd_half8 x) {
3674 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3675}
3676
3677static inline SIMD_CFUNC simd_half16 simd_sign(simd_half16 x) {
3678 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3679}
3680
3681static inline SIMD_CFUNC simd_half32 simd_sign(simd_half32 x) {
3682 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3683}
3684
3685static inline SIMD_CFUNC float simd_sign(float x) {
3686 return (x == 0 | x != x) ? 0 : copysignf(1,x);
3687}
3688
3689static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x) {
3690 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3691}
3692
3693static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x) {
3694 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3695}
3696
3697static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x) {
3698 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3699}
3700
3701static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x) {
3702 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3703}
3704
3705static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x) {
3706 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3707}
3708
3709static inline SIMD_CFUNC double simd_sign(double x) {
3710 return (x == 0 | x != x) ? 0 : copysign(1,x);
3711}
3712
3713static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x) {
3714 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3715}
3716
3717static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x) {
3718 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3719}
3720
3721static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x) {
3722 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3723}
3724
3725static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x) {
3726 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
3727}
3728
3729static inline SIMD_CFUNC _Float16 simd_mix(_Float16 x, _Float16 y, _Float16 t) {
3730 return x + t*(y - x);
3731}
3732
3733static inline SIMD_CFUNC simd_half2 simd_mix(simd_half2 x, simd_half2 y, simd_half2 t) {
3734 return x + t*(y - x);
3735}
3736
3737static inline SIMD_CFUNC simd_half3 simd_mix(simd_half3 x, simd_half3 y, simd_half3 t) {
3738 return x + t*(y - x);
3739}
3740
3741static inline SIMD_CFUNC simd_half4 simd_mix(simd_half4 x, simd_half4 y, simd_half4 t) {
3742 return x + t*(y - x);
3743}
3744
3745static inline SIMD_CFUNC simd_half8 simd_mix(simd_half8 x, simd_half8 y, simd_half8 t) {
3746 return x + t*(y - x);
3747}
3748
3749static inline SIMD_CFUNC simd_half16 simd_mix(simd_half16 x, simd_half16 y, simd_half16 t) {
3750 return x + t*(y - x);
3751}
3752
3753static inline SIMD_CFUNC simd_half32 simd_mix(simd_half32 x, simd_half32 y, simd_half32 t) {
3754 return x + t*(y - x);
3755}
3756
3757static inline SIMD_CFUNC float simd_mix(float x, float y, float t) {
3758 return x + t*(y - x);
3759}
3760
3761static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t) {
3762 return x + t*(y - x);
3763}
3764
3765static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t) {
3766 return x + t*(y - x);
3767}
3768
3769static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t) {
3770 return x + t*(y - x);
3771}
3772
3773static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t) {
3774 return x + t*(y - x);
3775}
3776
3777static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t) {
3778 return x + t*(y - x);
3779}
3780
3781static inline SIMD_CFUNC double simd_mix(double x, double y, double t) {
3782 return x + t*(y - x);
3783}
3784
3785static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t) {
3786 return x + t*(y - x);
3787}
3788
3789static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t) {
3790 return x + t*(y - x);
3791}
3792
3793static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t) {
3794 return x + t*(y - x);
3795}
3796
3797static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t) {
3798 return x + t*(y - x);
3799}
3800
3801static inline SIMD_CFUNC _Float16 simd_recip(_Float16 x) {
3802#if __FAST_MATH__
3803 return simd_fast_recip(x);
3804#else
3805 return simd_precise_recip(x);
3806#endif
3807}
3808
3809static inline SIMD_CFUNC simd_half2 simd_recip(simd_half2 x) {
3810#if __FAST_MATH__
3811 return simd_fast_recip(x);
3812#else
3813 return simd_precise_recip(x);
3814#endif
3815}
3816
3817static inline SIMD_CFUNC simd_half3 simd_recip(simd_half3 x) {
3818#if __FAST_MATH__
3819 return simd_fast_recip(x);
3820#else
3821 return simd_precise_recip(x);
3822#endif
3823}
3824
3825static inline SIMD_CFUNC simd_half4 simd_recip(simd_half4 x) {
3826#if __FAST_MATH__
3827 return simd_fast_recip(x);
3828#else
3829 return simd_precise_recip(x);
3830#endif
3831}
3832
3833static inline SIMD_CFUNC simd_half8 simd_recip(simd_half8 x) {
3834#if __FAST_MATH__
3835 return simd_fast_recip(x);
3836#else
3837 return simd_precise_recip(x);
3838#endif
3839}
3840
3841static inline SIMD_CFUNC simd_half16 simd_recip(simd_half16 x) {
3842#if __FAST_MATH__
3843 return simd_fast_recip(x);
3844#else
3845 return simd_precise_recip(x);
3846#endif
3847}
3848
3849static inline SIMD_CFUNC simd_half32 simd_recip(simd_half32 x) {
3850#if __FAST_MATH__
3851 return simd_fast_recip(x);
3852#else
3853 return simd_precise_recip(x);
3854#endif
3855}
3856
3857static inline SIMD_CFUNC float simd_recip(float x) {
3858#if __FAST_MATH__
3859 return simd_fast_recip(x);
3860#else
3861 return simd_precise_recip(x);
3862#endif
3863}
3864
3865static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x) {
3866#if __FAST_MATH__
3867 return simd_fast_recip(x);
3868#else
3869 return simd_precise_recip(x);
3870#endif
3871}
3872
3873static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x) {
3874#if __FAST_MATH__
3875 return simd_fast_recip(x);
3876#else
3877 return simd_precise_recip(x);
3878#endif
3879}
3880
3881static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x) {
3882#if __FAST_MATH__
3883 return simd_fast_recip(x);
3884#else
3885 return simd_precise_recip(x);
3886#endif
3887}
3888
3889static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x) {
3890#if __FAST_MATH__
3891 return simd_fast_recip(x);
3892#else
3893 return simd_precise_recip(x);
3894#endif
3895}
3896
3897static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x) {
3898#if __FAST_MATH__
3899 return simd_fast_recip(x);
3900#else
3901 return simd_precise_recip(x);
3902#endif
3903}
3904
3905static inline SIMD_CFUNC double simd_recip(double x) {
3906#if __FAST_MATH__
3907 return simd_fast_recip(x);
3908#else
3909 return simd_precise_recip(x);
3910#endif
3911}
3912
3913static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x) {
3914#if __FAST_MATH__
3915 return simd_fast_recip(x);
3916#else
3917 return simd_precise_recip(x);
3918#endif
3919}
3920
3921static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x) {
3922#if __FAST_MATH__
3923 return simd_fast_recip(x);
3924#else
3925 return simd_precise_recip(x);
3926#endif
3927}
3928
3929static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x) {
3930#if __FAST_MATH__
3931 return simd_fast_recip(x);
3932#else
3933 return simd_precise_recip(x);
3934#endif
3935}
3936
3937static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x) {
3938#if __FAST_MATH__
3939 return simd_fast_recip(x);
3940#else
3941 return simd_precise_recip(x);
3942#endif
3943}
3944
3945static inline SIMD_CFUNC _Float16 simd_fast_recip(_Float16 x) {
3946#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
3947 return simd_fast_recip(simd_make_half4_undef(x)).x;
3948#else
3949 return simd_precise_recip(x);
3950#endif
3951}
3952
3953static inline SIMD_CFUNC simd_half2 simd_fast_recip(simd_half2 x) {
3954 return simd_fast_recip(simd_make_half4_undef(x)).lo;
3955}
3956
3957static inline SIMD_CFUNC simd_half3 simd_fast_recip(simd_half3 x) {
3958 return simd_make_half3(simd_fast_recip(simd_make_half4_undef(x)));
3959}
3960
3961static inline SIMD_CFUNC simd_half4 simd_fast_recip(simd_half4 x) {
3962#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
3963 simd_half4 r = vrecpe_f16(x);
3964 return r * vrecps_f16(x, r);
3965#else
3966 return simd_precise_recip(x);
3967#endif
3968}
3969
3970static inline SIMD_CFUNC simd_half8 simd_fast_recip(simd_half8 x) {
3971#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
3972 simd_half8 r = vrecpeq_f16(x);
3973 return r * vrecpsq_f16(x, r);
3974#else
3975 return simd_precise_recip(x);
3976#endif
3977}
3978
3979static inline SIMD_CFUNC simd_half16 simd_fast_recip(simd_half16 x) {
3980 return simd_make_half16(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
3981}
3982
3983static inline SIMD_CFUNC simd_half32 simd_fast_recip(simd_half32 x) {
3984 return simd_make_half32(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
3985}
3986
3987static inline SIMD_CFUNC float simd_fast_recip(float x) {
3988#if defined __AVX512VL__
3989 simd_float4 x4 = simd_make_float4(x);
3990 return ((simd_float4)_mm_rcp14_ss(x4, x4)).x;
3991#elif defined __SSE__
3992 return ((simd_float4)_mm_rcp_ss(simd_make_float4(x))).x;
3993#elif defined __ARM_NEON
3994 return simd_fast_recip(simd_make_float2_undef(x)).x;
3995#else
3996 return simd_precise_recip(x);
3997#endif
3998}
3999
4000static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x) {
4001#if defined __SSE__
4002 return simd_make_float2(simd_fast_recip(simd_make_float4_undef(x)));
4003#elif defined __ARM_NEON
4004 simd_float2 r = vrecpe_f32(x);
4005 return r * vrecps_f32(x, r);
4006#else
4007 return simd_precise_recip(x);
4008#endif
4009}
4010
4011static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x) {
4012 return simd_make_float3(simd_fast_recip(simd_make_float4_undef(x)));
4013}
4014
4015static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x) {
4016#if defined __AVX512VL__
4017 return _mm_rcp14_ps(x);
4018#elif defined __SSE__
4019 return _mm_rcp_ps(x);
4020#elif defined __ARM_NEON
4021 simd_float4 r = vrecpeq_f32(x);
4022 return r * vrecpsq_f32(x, r);
4023#else
4024 return simd_precise_recip(x);
4025#endif
4026}
4027
4028static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x) {
4029#if defined __AVX512VL__
4030 return _mm256_rcp14_ps(x);
4031#elif defined __AVX__
4032 return _mm256_rcp_ps(x);
4033#else
4034 return simd_make_float8(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
4035#endif
4036}
4037
4038static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x) {
4039#if defined __AVX512F__
4040 return _mm512_rcp14_ps(x);
4041#else
4042 return simd_make_float16(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
4043#endif
4044}
4045
4046static inline SIMD_CFUNC double simd_fast_recip(double x) {
4047 return simd_precise_recip(x);
4048}
4049
4050static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x) {
4051 return simd_precise_recip(x);
4052}
4053
4054static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x) {
4055 return simd_precise_recip(x);
4056}
4057
4058static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x) {
4059 return simd_precise_recip(x);
4060}
4061
4062static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x) {
4063 return simd_precise_recip(x);
4064}
4065
4066static inline SIMD_CFUNC _Float16 simd_precise_recip(_Float16 x) {
4067#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4068 return simd_precise_recip(simd_make_half4_undef(x)).x;
4069#else
4070 return 1/x;
4071#endif
4072}
4073
4074static inline SIMD_CFUNC simd_half2 simd_precise_recip(simd_half2 x) {
4075 return simd_precise_recip(simd_make_half4_undef(x)).lo;
4076}
4077
4078static inline SIMD_CFUNC simd_half3 simd_precise_recip(simd_half3 x) {
4079 return simd_make_half3(simd_precise_recip(simd_make_half4_undef(x)));
4080}
4081
4082static inline SIMD_CFUNC simd_half4 simd_precise_recip(simd_half4 x) {
4083#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4084 simd_half4 r = simd_fast_recip(x);
4085 return r*vrecps_f16(x, r);
4086#else
4087 return 1/x;
4088#endif
4089}
4090
4091static inline SIMD_CFUNC simd_half8 simd_precise_recip(simd_half8 x) {
4092#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4093 simd_half8 r = simd_fast_recip(x);
4094 return r*vrecpsq_f16(x, r);
4095#else
4096 return 1/x;
4097#endif
4098}
4099
4100static inline SIMD_CFUNC simd_half16 simd_precise_recip(simd_half16 x) {
4101 return simd_make_half16(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4102}
4103
4104static inline SIMD_CFUNC simd_half32 simd_precise_recip(simd_half32 x) {
4105 return simd_make_half32(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4106}
4107
4108static inline SIMD_CFUNC float simd_precise_recip(float x) {
4109#if defined __SSE__
4110 float r = simd_fast_recip(x);
4111 return r*(2 - (x == 0 ? -INFINITY : x)*r);
4112#elif defined __ARM_NEON
4113 return simd_precise_recip(simd_make_float2_undef(x)).x;
4114#else
4115 return 1/x;
4116#endif
4117}
4118
4119static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x) {
4120#if defined __SSE__
4121 return simd_make_float2(simd_precise_recip(simd_make_float4_undef(x)));
4122#elif defined __ARM_NEON
4123 simd_float2 r = simd_fast_recip(x);
4124 return r*vrecps_f32(x, r);
4125#else
4126 return 1/x;
4127#endif
4128}
4129
4130static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x) {
4131 return simd_make_float3(simd_precise_recip(simd_make_float4_undef(x)));
4132}
4133
4134static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x) {
4135#if defined __SSE__
4136 simd_float4 r = simd_fast_recip(x);
4137 return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
4138#elif defined __ARM_NEON
4139 simd_float4 r = simd_fast_recip(x);
4140 return r*vrecpsq_f32(x, r);
4141#else
4142 return 1/x;
4143#endif
4144}
4145
4146static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x) {
4147#if defined __AVX__
4148 simd_float8 r = simd_fast_recip(x);
4149 return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
4150#else
4151 return simd_make_float8(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4152#endif
4153}
4154
4155static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x) {
4156#if defined __AVX512F__
4157 simd_float16 r = simd_fast_recip(x);
4158 return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
4159#else
4160 return simd_make_float16(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
4161#endif
4162}
4163
4164static inline SIMD_CFUNC double simd_precise_recip(double x) {
4165 return 1/x;
4166}
4167
4168static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x) {
4169 return 1/x;
4170}
4171
4172static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x) {
4173 return 1/x;
4174}
4175
4176static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x) {
4177 return 1/x;
4178}
4179
4180static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x) {
4181 return 1/x;
4182}
4183
4184static inline SIMD_CFUNC _Float16 simd_rsqrt(_Float16 x) {
4185#if __FAST_MATH__
4186 return simd_fast_rsqrt(x);
4187#else
4188 return simd_precise_rsqrt(x);
4189#endif
4190}
4191
4192static inline SIMD_CFUNC simd_half2 simd_rsqrt(simd_half2 x) {
4193#if __FAST_MATH__
4194 return simd_fast_rsqrt(x);
4195#else
4196 return simd_precise_rsqrt(x);
4197#endif
4198}
4199
4200static inline SIMD_CFUNC simd_half3 simd_rsqrt(simd_half3 x) {
4201#if __FAST_MATH__
4202 return simd_fast_rsqrt(x);
4203#else
4204 return simd_precise_rsqrt(x);
4205#endif
4206}
4207
4208static inline SIMD_CFUNC simd_half4 simd_rsqrt(simd_half4 x) {
4209#if __FAST_MATH__
4210 return simd_fast_rsqrt(x);
4211#else
4212 return simd_precise_rsqrt(x);
4213#endif
4214}
4215
4216static inline SIMD_CFUNC simd_half8 simd_rsqrt(simd_half8 x) {
4217#if __FAST_MATH__
4218 return simd_fast_rsqrt(x);
4219#else
4220 return simd_precise_rsqrt(x);
4221#endif
4222}
4223
4224static inline SIMD_CFUNC simd_half16 simd_rsqrt(simd_half16 x) {
4225#if __FAST_MATH__
4226 return simd_fast_rsqrt(x);
4227#else
4228 return simd_precise_rsqrt(x);
4229#endif
4230}
4231
4232static inline SIMD_CFUNC simd_half32 simd_rsqrt(simd_half32 x) {
4233#if __FAST_MATH__
4234 return simd_fast_rsqrt(x);
4235#else
4236 return simd_precise_rsqrt(x);
4237#endif
4238}
4239
4240static inline SIMD_CFUNC float simd_rsqrt(float x) {
4241#if __FAST_MATH__
4242 return simd_fast_rsqrt(x);
4243#else
4244 return simd_precise_rsqrt(x);
4245#endif
4246}
4247
4248static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x) {
4249#if __FAST_MATH__
4250 return simd_fast_rsqrt(x);
4251#else
4252 return simd_precise_rsqrt(x);
4253#endif
4254}
4255
4256static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x) {
4257#if __FAST_MATH__
4258 return simd_fast_rsqrt(x);
4259#else
4260 return simd_precise_rsqrt(x);
4261#endif
4262}
4263
4264static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x) {
4265#if __FAST_MATH__
4266 return simd_fast_rsqrt(x);
4267#else
4268 return simd_precise_rsqrt(x);
4269#endif
4270}
4271
4272static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x) {
4273#if __FAST_MATH__
4274 return simd_fast_rsqrt(x);
4275#else
4276 return simd_precise_rsqrt(x);
4277#endif
4278}
4279
4280static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x) {
4281#if __FAST_MATH__
4282 return simd_fast_rsqrt(x);
4283#else
4284 return simd_precise_rsqrt(x);
4285#endif
4286}
4287
4288static inline SIMD_CFUNC double simd_rsqrt(double x) {
4289#if __FAST_MATH__
4290 return simd_fast_rsqrt(x);
4291#else
4292 return simd_precise_rsqrt(x);
4293#endif
4294}
4295
4296static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x) {
4297#if __FAST_MATH__
4298 return simd_fast_rsqrt(x);
4299#else
4300 return simd_precise_rsqrt(x);
4301#endif
4302}
4303
4304static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x) {
4305#if __FAST_MATH__
4306 return simd_fast_rsqrt(x);
4307#else
4308 return simd_precise_rsqrt(x);
4309#endif
4310}
4311
4312static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x) {
4313#if __FAST_MATH__
4314 return simd_fast_rsqrt(x);
4315#else
4316 return simd_precise_rsqrt(x);
4317#endif
4318}
4319
4320static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x) {
4321#if __FAST_MATH__
4322 return simd_fast_rsqrt(x);
4323#else
4324 return simd_precise_rsqrt(x);
4325#endif
4326}
4327
4328static inline SIMD_CFUNC _Float16 simd_fast_rsqrt(_Float16 x) {
4329#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4330 return simd_fast_rsqrt(simd_make_half4_undef(x)).x;
4331#else
4332 return simd_precise_rsqrt(x);
4333#endif
4334}
4335
4336static inline SIMD_CFUNC simd_half2 simd_fast_rsqrt(simd_half2 x) {
4337 return simd_fast_rsqrt(simd_make_half4_undef(x)).lo;
4338}
4339
4340static inline SIMD_CFUNC simd_half3 simd_fast_rsqrt(simd_half3 x) {
4341 return simd_make_half3(simd_fast_rsqrt(simd_make_half4_undef(x)));
4342}
4343
4344static inline SIMD_CFUNC simd_half4 simd_fast_rsqrt(simd_half4 x) {
4345#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4346 simd_half4 r = vrsqrte_f16(x);
4347 return r * vrsqrts_f16(x, r*r);
4348#else
4349 return simd_precise_rsqrt(x);
4350#endif
4351}
4352
4353static inline SIMD_CFUNC simd_half8 simd_fast_rsqrt(simd_half8 x) {
4354#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4355 simd_half8 r = vrsqrteq_f16(x);
4356 return r * vrsqrtsq_f16(x, r*r);
4357#else
4358 return simd_precise_rsqrt(x);
4359#endif
4360}
4361
4362static inline SIMD_CFUNC simd_half16 simd_fast_rsqrt(simd_half16 x) {
4363 return simd_make_half16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4364}
4365
4366static inline SIMD_CFUNC simd_half32 simd_fast_rsqrt(simd_half32 x) {
4367 return simd_make_half32(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4368}
4369
4370static inline SIMD_CFUNC float simd_fast_rsqrt(float x) {
4371#if defined __AVX512VL__
4372 simd_float4 x4 = simd_make_float4(x);
4373 return ((simd_float4)_mm_rsqrt14_ss(x4, x4)).x;
4374#elif defined __SSE__
4375 return ((simd_float4)_mm_rsqrt_ss(simd_make_float4(x))).x;
4376#elif defined __ARM_NEON
4377 return simd_fast_rsqrt(simd_make_float2_undef(x)).x;
4378#else
4379 return simd_precise_rsqrt(x);
4380#endif
4381}
4382
4383static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x) {
4384#if defined __SSE__
4385 return simd_make_float2(simd_fast_rsqrt(simd_make_float4_undef(x)));
4386#elif defined __ARM_NEON
4387 simd_float2 r = vrsqrte_f32(x);
4388 return r * vrsqrts_f32(x, r*r);
4389#else
4390 return simd_precise_rsqrt(x);
4391#endif
4392}
4393
4394static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x) {
4395 return simd_make_float3(simd_fast_rsqrt(simd_make_float4_undef(x)));
4396}
4397
4398static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x) {
4399#if defined __AVX512VL__
4400 return _mm_rsqrt14_ps(x);
4401#elif defined __SSE__
4402 return _mm_rsqrt_ps(x);
4403#elif defined __ARM_NEON
4404 simd_float4 r = vrsqrteq_f32(x);
4405 return r * vrsqrtsq_f32(x, r*r);
4406#else
4407 return simd_precise_rsqrt(x);
4408#endif
4409}
4410
4411static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x) {
4412#if defined __AVX512VL__
4413 return _mm256_rsqrt14_ps(x);
4414#elif defined __AVX__
4415 return _mm256_rsqrt_ps(x);
4416#else
4417 return simd_make_float8(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4418#endif
4419}
4420
4421static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x) {
4422#if defined __AVX512F__
4423 return _mm512_rsqrt14_ps(x);
4424#else
4425 return simd_make_float16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
4426#endif
4427}
4428
4429static inline SIMD_CFUNC double simd_fast_rsqrt(double x) {
4430 return simd_precise_rsqrt(x);
4431}
4432
4433static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x) {
4434 return simd_precise_rsqrt(x);
4435}
4436
4437static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x) {
4438 return simd_precise_rsqrt(x);
4439}
4440
4441static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x) {
4442 return simd_precise_rsqrt(x);
4443}
4444
4445static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x) {
4446 return simd_precise_rsqrt(x);
4447}
4448
4449static inline SIMD_CFUNC _Float16 simd_precise_rsqrt(_Float16 x) {
4450#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4451 return simd_precise_rsqrt(simd_make_half4_undef(x)).x;
4452#else
4453 return 1/__sqrtf16(x);
4454#endif
4455}
4456
4457static inline SIMD_CFUNC simd_half2 simd_precise_rsqrt(simd_half2 x) {
4458 return simd_precise_rsqrt(simd_make_half4_undef(x)).lo;
4459}
4460
4461static inline SIMD_CFUNC simd_half3 simd_precise_rsqrt(simd_half3 x) {
4462 return simd_make_half3(simd_precise_rsqrt(simd_make_half4_undef(x)));
4463}
4464
4465static inline SIMD_CFUNC simd_half4 simd_precise_rsqrt(simd_half4 x) {
4466#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4467 simd_half4 r = simd_fast_rsqrt(x);
4468 return r*vrsqrts_f16(x, r*r);
4469#else
4470 return 1/__tg_sqrt(x);
4471#endif
4472}
4473
4474static inline SIMD_CFUNC simd_half8 simd_precise_rsqrt(simd_half8 x) {
4475#if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
4476 simd_half8 r = simd_fast_rsqrt(x);
4477 return r*vrsqrtsq_f16(x, r*r);
4478#else
4479 return 1/__tg_sqrt(x);
4480#endif
4481}
4482
4483static inline SIMD_CFUNC simd_half16 simd_precise_rsqrt(simd_half16 x) {
4484 return simd_make_half16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4485}
4486
4487static inline SIMD_CFUNC simd_half32 simd_precise_rsqrt(simd_half32 x) {
4488 return simd_make_half32(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4489}
4490
4491static inline SIMD_CFUNC float simd_precise_rsqrt(float x) {
4492#if defined __SSE__
4493 float r = simd_fast_rsqrt(x);
4494 return r*(1.5f - 0.5f*(r == INFINITY ? -INFINITY : x)*r*r);
4495#elif defined __ARM_NEON
4496 return simd_precise_rsqrt(simd_make_float2_undef(x)).x;
4497#else
4498 return 1/sqrtf(x);
4499#endif
4500}
4501
4502static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x) {
4503#if defined __SSE__
4504 return simd_make_float2(simd_precise_rsqrt(simd_make_float4_undef(x)));
4505#elif defined __ARM_NEON
4506 simd_float2 r = simd_fast_rsqrt(x);
4507 return r*vrsqrts_f32(x, r*r);
4508#else
4509 return 1/__tg_sqrt(x);
4510#endif
4511}
4512
4513static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x) {
4514 return simd_make_float3(simd_precise_rsqrt(simd_make_float4_undef(x)));
4515}
4516
4517static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x) {
4518#if defined __SSE__
4519 simd_float4 r = simd_fast_rsqrt(x);
4520 return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
4521#elif defined __ARM_NEON
4522 simd_float4 r = simd_fast_rsqrt(x);
4523 return r*vrsqrtsq_f32(x, r*r);
4524#else
4525 return 1/__tg_sqrt(x);
4526#endif
4527}
4528
4529static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x) {
4530#if defined __AVX__
4531 simd_float8 r = simd_fast_rsqrt(x);
4532 return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
4533#else
4534 return simd_make_float8(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4535#endif
4536}
4537
4538static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x) {
4539#if defined __AVX512F__
4540 simd_float16 r = simd_fast_rsqrt(x);
4541 return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
4542#else
4543 return simd_make_float16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
4544#endif
4545}
4546
4547static inline SIMD_CFUNC double simd_precise_rsqrt(double x) {
4548 return 1/sqrt(x);
4549}
4550
4551static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x) {
4552 return 1/__tg_sqrt(x);
4553}
4554
4555static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x) {
4556 return 1/__tg_sqrt(x);
4557}
4558
4559static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x) {
4560 return 1/__tg_sqrt(x);
4561}
4562
4563static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x) {
4564 return 1/__tg_sqrt(x);
4565}
4566
4567static inline SIMD_CFUNC _Float16 simd_fract(_Float16 x) {
4568 return __fminf16(x - __floorf16(x), 0x1.ffcp-1f16);
4569}
4570
4571static inline SIMD_CFUNC simd_half2 simd_fract(simd_half2 x) {
4572 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4573}
4574
4575static inline SIMD_CFUNC simd_half3 simd_fract(simd_half3 x) {
4576 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4577}
4578
4579static inline SIMD_CFUNC simd_half4 simd_fract(simd_half4 x) {
4580 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4581}
4582
4583static inline SIMD_CFUNC simd_half8 simd_fract(simd_half8 x) {
4584 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4585}
4586
4587static inline SIMD_CFUNC simd_half16 simd_fract(simd_half16 x) {
4588 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4589}
4590
4591static inline SIMD_CFUNC simd_half32 simd_fract(simd_half32 x) {
4592 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16);
4593}
4594
4595static inline SIMD_CFUNC float simd_fract(float x) {
4596 return fminf(x - floorf(x), 0x1.fffffep-1f);
4597}
4598
4599static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x) {
4600 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4601}
4602
4603static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x) {
4604 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4605}
4606
4607static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x) {
4608 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4609}
4610
4611static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x) {
4612 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4613}
4614
4615static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x) {
4616 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
4617}
4618
4619static inline SIMD_CFUNC double simd_fract(double x) {
4620 return fmin(x - floor(x), 0x1.fffffffffffffp-1);
4621}
4622
4623static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x) {
4624 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4625}
4626
4627static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x) {
4628 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4629}
4630
4631static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x) {
4632 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4633}
4634
4635static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x) {
4636 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
4637}
4638
4639static inline SIMD_CFUNC _Float16 simd_step(_Float16 edge, _Float16 x) {
4640 return !(x < edge);
4641}
4642
4643static inline SIMD_CFUNC simd_half2 simd_step(simd_half2 edge, simd_half2 x) {
4644 return simd_bitselect((simd_half2)1, 0, x < edge);
4645}
4646
4647static inline SIMD_CFUNC simd_half3 simd_step(simd_half3 edge, simd_half3 x) {
4648 return simd_bitselect((simd_half3)1, 0, x < edge);
4649}
4650
4651static inline SIMD_CFUNC simd_half4 simd_step(simd_half4 edge, simd_half4 x) {
4652 return simd_bitselect((simd_half4)1, 0, x < edge);
4653}
4654
4655static inline SIMD_CFUNC simd_half8 simd_step(simd_half8 edge, simd_half8 x) {
4656 return simd_bitselect((simd_half8)1, 0, x < edge);
4657}
4658
4659static inline SIMD_CFUNC simd_half16 simd_step(simd_half16 edge, simd_half16 x) {
4660 return simd_bitselect((simd_half16)1, 0, x < edge);
4661}
4662
4663static inline SIMD_CFUNC simd_half32 simd_step(simd_half32 edge, simd_half32 x) {
4664 return simd_bitselect((simd_half32)1, 0, x < edge);
4665}
4666
4667static inline SIMD_CFUNC float simd_step(float edge, float x) {
4668 return !(x < edge);
4669}
4670
4671static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x) {
4672 return simd_bitselect((simd_float2)1, 0, x < edge);
4673}
4674
4675static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x) {
4676 return simd_bitselect((simd_float3)1, 0, x < edge);
4677}
4678
4679static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x) {
4680 return simd_bitselect((simd_float4)1, 0, x < edge);
4681}
4682
4683static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x) {
4684 return simd_bitselect((simd_float8)1, 0, x < edge);
4685}
4686
4687static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x) {
4688 return simd_bitselect((simd_float16)1, 0, x < edge);
4689}
4690
4691static inline SIMD_CFUNC double simd_step(double edge, double x) {
4692 return !(x < edge);
4693}
4694
4695static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x) {
4696 return simd_bitselect((simd_double2)1, 0, x < edge);
4697}
4698
4699static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x) {
4700 return simd_bitselect((simd_double3)1, 0, x < edge);
4701}
4702
4703static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x) {
4704 return simd_bitselect((simd_double4)1, 0, x < edge);
4705}
4706
4707static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x) {
4708 return simd_bitselect((simd_double8)1, 0, x < edge);
4709}
4710
4711static inline SIMD_CFUNC _Float16 simd_smoothstep(_Float16 edge0, _Float16 edge1, _Float16 x) {
4712 _Float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4713 return t*t*(3 - 2*t);
4714}
4715
4716static inline SIMD_CFUNC simd_half2 simd_smoothstep(simd_half2 edge0, simd_half2 edge1, simd_half2 x) {
4717 simd_half2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4718 return t*t*(3 - 2*t);
4719}
4720
4721static inline SIMD_CFUNC simd_half3 simd_smoothstep(simd_half3 edge0, simd_half3 edge1, simd_half3 x) {
4722 simd_half3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4723 return t*t*(3 - 2*t);
4724}
4725
4726static inline SIMD_CFUNC simd_half4 simd_smoothstep(simd_half4 edge0, simd_half4 edge1, simd_half4 x) {
4727 simd_half4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4728 return t*t*(3 - 2*t);
4729}
4730
4731static inline SIMD_CFUNC simd_half8 simd_smoothstep(simd_half8 edge0, simd_half8 edge1, simd_half8 x) {
4732 simd_half8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4733 return t*t*(3 - 2*t);
4734}
4735
4736static inline SIMD_CFUNC simd_half16 simd_smoothstep(simd_half16 edge0, simd_half16 edge1, simd_half16 x) {
4737 simd_half16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4738 return t*t*(3 - 2*t);
4739}
4740
4741static inline SIMD_CFUNC simd_half32 simd_smoothstep(simd_half32 edge0, simd_half32 edge1, simd_half32 x) {
4742 simd_half32 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4743 return t*t*(3 - 2*t);
4744}
4745
4746static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x) {
4747 float t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4748 return t*t*(3 - 2*t);
4749}
4750
4751static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x) {
4752 simd_float2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4753 return t*t*(3 - 2*t);
4754}
4755
4756static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x) {
4757 simd_float3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4758 return t*t*(3 - 2*t);
4759}
4760
4761static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x) {
4762 simd_float4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4763 return t*t*(3 - 2*t);
4764}
4765
4766static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x) {
4767 simd_float8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4768 return t*t*(3 - 2*t);
4769}
4770
4771static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x) {
4772 simd_float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4773 return t*t*(3 - 2*t);
4774}
4775
4776static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x) {
4777 double t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4778 return t*t*(3 - 2*t);
4779}
4780
4781static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x) {
4782 simd_double2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4783 return t*t*(3 - 2*t);
4784}
4785
4786static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x) {
4787 simd_double3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4788 return t*t*(3 - 2*t);
4789}
4790
4791static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x) {
4792 simd_double4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4793 return t*t*(3 - 2*t);
4794}
4795
4796static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x) {
4797 simd_double8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
4798 return t*t*(3 - 2*t);
4799}
4800
4801static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x) {
4802 return x.x + x.y;
4803}
4804
4805static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x) {
4806 return x.x + x.y + x.z;
4807}
4808
4809static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x) {
4810 return simd_reduce_add(x.lo + x.hi);
4811}
4812
4813static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x) {
4814 return simd_reduce_add(x.lo + x.hi);
4815}
4816
4817static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x) {
4818#if defined __arm64__ || defined __aarch64__
4819 return vaddvq_s8(x);
4820#else
4821 return simd_reduce_add(x.lo + x.hi);
4822#endif
4823}
4824
4825static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x) {
4826 return simd_reduce_add(x.lo + x.hi);
4827}
4828
4829static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x) {
4830 return simd_reduce_add(x.lo + x.hi);
4831}
4832
4833static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x) {
4834 return x.x + x.y;
4835}
4836
4837static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x) {
4838 return x.x + x.y + x.z;
4839}
4840
4841static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x) {
4842 return simd_reduce_add(x.lo + x.hi);
4843}
4844
4845static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x) {
4846 return simd_reduce_add(x.lo + x.hi);
4847}
4848
4849static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x) {
4850#if defined __arm64__ || defined __aarch64__
4851 return vaddvq_u8(x);
4852#else
4853 return simd_reduce_add(x.lo + x.hi);
4854#endif
4855}
4856
4857static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x) {
4858 return simd_reduce_add(x.lo + x.hi);
4859}
4860
4861static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x) {
4862 return simd_reduce_add(x.lo + x.hi);
4863}
4864
4865static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x) {
4866 return x.x + x.y;
4867}
4868
4869static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x) {
4870 return x.x + x.y + x.z;
4871}
4872
4873static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x) {
4874 return simd_reduce_add(x.lo + x.hi);
4875}
4876
4877static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x) {
4878#if defined __arm64__ || defined __aarch64__
4879 return vaddvq_s16(x);
4880#else
4881 return simd_reduce_add(x.lo + x.hi);
4882#endif
4883}
4884
4885static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x) {
4886 return simd_reduce_add(x.lo + x.hi);
4887}
4888
4889static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x) {
4890 return simd_reduce_add(x.lo + x.hi);
4891}
4892
4893static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x) {
4894 return x.x + x.y;
4895}
4896
4897static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x) {
4898 return x.x + x.y + x.z;
4899}
4900
4901static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x) {
4902 return simd_reduce_add(x.lo + x.hi);
4903}
4904
4905static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x) {
4906#if defined __arm64__ || defined __aarch64__
4907 return vaddvq_u16(x);
4908#else
4909 return simd_reduce_add(x.lo + x.hi);
4910#endif
4911}
4912
4913static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x) {
4914 return simd_reduce_add(x.lo + x.hi);
4915}
4916
4917static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x) {
4918 return simd_reduce_add(x.lo + x.hi);
4919}
4920
4921static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x) {
4922 return x.x + x.y;
4923}
4924
4925static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x) {
4926 return x.x + x.y + x.z;
4927}
4928
4929static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x) {
4930#if defined __arm64__ || defined __aarch64__
4931 return vaddvq_s32(x);
4932#else
4933 return simd_reduce_add(x.lo + x.hi);
4934#endif
4935}
4936
4937static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x) {
4938 return simd_reduce_add(x.lo + x.hi);
4939}
4940
4941static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x) {
4942 return simd_reduce_add(x.lo + x.hi);
4943}
4944
4945static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x) {
4946 return x.x + x.y;
4947}
4948
4949static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x) {
4950 return x.x + x.y + x.z;
4951}
4952
4953static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x) {
4954#if defined __arm64__ || defined __aarch64__
4955 return vaddvq_u32(x);
4956#else
4957 return simd_reduce_add(x.lo + x.hi);
4958#endif
4959}
4960
4961static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x) {
4962 return simd_reduce_add(x.lo + x.hi);
4963}
4964
4965static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x) {
4966 return simd_reduce_add(x.lo + x.hi);
4967}
4968
4969static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x) {
4970 return x.x + x.y;
4971}
4972
4973static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x) {
4974 return x.x + x.y + x.z;
4975}
4976
4977static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x) {
4978 return simd_reduce_add(x.lo + x.hi);
4979}
4980
4981static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x) {
4982 return simd_reduce_add(x.lo + x.hi);
4983}
4984
4985static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x) {
4986 return x.x + x.y;
4987}
4988
4989static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x) {
4990 return x.x + x.y + x.z;
4991}
4992
4993static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x) {
4994 return simd_reduce_add(x.lo + x.hi);
4995}
4996
4997static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x) {
4998 return simd_reduce_add(x.lo + x.hi);
4999}
5000
5001static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half2 x) {
5002 return x.x + x.y;
5003}
5004
5005static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half3 x) {
5006 return x.x + x.y + x.z;
5007}
5008
5009static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half4 x) {
5010 return simd_reduce_add(x.lo + x.hi);
5011}
5012
5013static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half8 x) {
5014 return simd_reduce_add(x.lo + x.hi);
5015}
5016
5017static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half16 x) {
5018 return simd_reduce_add(x.lo + x.hi);
5019}
5020
5021static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half32 x) {
5022 return simd_reduce_add(x.lo + x.hi);
5023}
5024
5025static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x) {
5026 return x.x + x.y;
5027}
5028
5029static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x) {
5030 return x.x + x.y + x.z;
5031}
5032
5033static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x) {
5034 return simd_reduce_add(x.lo + x.hi);
5035}
5036
5037static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x) {
5038 return simd_reduce_add(x.lo + x.hi);
5039}
5040
5041static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x) {
5042 return simd_reduce_add(x.lo + x.hi);
5043}
5044
5045static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x) {
5046 return x.x + x.y;
5047}
5048
5049static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x) {
5050 return x.x + x.y + x.z;
5051}
5052
5053static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x) {
5054 return simd_reduce_add(x.lo + x.hi);
5055}
5056
5057static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x) {
5058 return simd_reduce_add(x.lo + x.hi);
5059}
5060
5061static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x) {
5062 return x.y < x.x ? x.y : x.x;
5063}
5064
5065static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x) {
5066 char t = x.z < x.x ? x.z : x.x;
5067 return x.y < t ? x.y : t;
5068}
5069
5070static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x) {
5071 return simd_reduce_min(simd_min(x.lo, x.hi));
5072}
5073
5074static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x) {
5075 return simd_reduce_min(simd_min(x.lo, x.hi));
5076}
5077
5078static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x) {
5079#if defined __arm64__ || defined __aarch64__
5080 return vminvq_s8(x);
5081#else
5082 return simd_reduce_min(simd_min(x.lo, x.hi));
5083#endif
5084}
5085
5086static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x) {
5087 return simd_reduce_min(simd_min(x.lo, x.hi));
5088}
5089
5090static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x) {
5091 return simd_reduce_min(simd_min(x.lo, x.hi));
5092}
5093
5094static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x) {
5095 return x.y < x.x ? x.y : x.x;
5096}
5097
5098static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x) {
5099 unsigned char t = x.z < x.x ? x.z : x.x;
5100 return x.y < t ? x.y : t;
5101}
5102
5103static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x) {
5104 return simd_reduce_min(simd_min(x.lo, x.hi));
5105}
5106
5107static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x) {
5108 return simd_reduce_min(simd_min(x.lo, x.hi));
5109}
5110
5111static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x) {
5112#if defined __arm64__ || defined __aarch64__
5113 return vminvq_u8(x);
5114#else
5115 return simd_reduce_min(simd_min(x.lo, x.hi));
5116#endif
5117}
5118
5119static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x) {
5120 return simd_reduce_min(simd_min(x.lo, x.hi));
5121}
5122
5123static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x) {
5124 return simd_reduce_min(simd_min(x.lo, x.hi));
5125}
5126
5127static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x) {
5128 return x.y < x.x ? x.y : x.x;
5129}
5130
5131static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x) {
5132 short t = x.z < x.x ? x.z : x.x;
5133 return x.y < t ? x.y : t;
5134}
5135
5136static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x) {
5137 return simd_reduce_min(simd_min(x.lo, x.hi));
5138}
5139
5140static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x) {
5141#if defined __arm64__ || defined __aarch64__
5142 return vminvq_s16(x);
5143#else
5144 return simd_reduce_min(simd_min(x.lo, x.hi));
5145#endif
5146}
5147
5148static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x) {
5149 return simd_reduce_min(simd_min(x.lo, x.hi));
5150}
5151
5152static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x) {
5153 return simd_reduce_min(simd_min(x.lo, x.hi));
5154}
5155
5156static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x) {
5157 return x.y < x.x ? x.y : x.x;
5158}
5159
5160static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x) {
5161 unsigned short t = x.z < x.x ? x.z : x.x;
5162 return x.y < t ? x.y : t;
5163}
5164
5165static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x) {
5166 return simd_reduce_min(simd_min(x.lo, x.hi));
5167}
5168
5169static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x) {
5170#if defined __arm64__ || defined __aarch64__
5171 return vminvq_u16(x);
5172#else
5173 return simd_reduce_min(simd_min(x.lo, x.hi));
5174#endif
5175}
5176
5177static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x) {
5178 return simd_reduce_min(simd_min(x.lo, x.hi));
5179}
5180
5181static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x) {
5182 return simd_reduce_min(simd_min(x.lo, x.hi));
5183}
5184
5185static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x) {
5186 return x.y < x.x ? x.y : x.x;
5187}
5188
5189static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x) {
5190 int t = x.z < x.x ? x.z : x.x;
5191 return x.y < t ? x.y : t;
5192}
5193
5194static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x) {
5195#if defined __arm64__ || defined __aarch64__
5196 return vminvq_s32(x);
5197#else
5198 return simd_reduce_min(simd_min(x.lo, x.hi));
5199#endif
5200}
5201
5202static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x) {
5203 return simd_reduce_min(simd_min(x.lo, x.hi));
5204}
5205
5206static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x) {
5207 return simd_reduce_min(simd_min(x.lo, x.hi));
5208}
5209
5210static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x) {
5211 return x.y < x.x ? x.y : x.x;
5212}
5213
5214static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x) {
5215 unsigned int t = x.z < x.x ? x.z : x.x;
5216 return x.y < t ? x.y : t;
5217}
5218
5219static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x) {
5220#if defined __arm64__ || defined __aarch64__
5221 return vminvq_u32(x);
5222#else
5223 return simd_reduce_min(simd_min(x.lo, x.hi));
5224#endif
5225}
5226
5227static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x) {
5228 return simd_reduce_min(simd_min(x.lo, x.hi));
5229}
5230
5231static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x) {
5232 return simd_reduce_min(simd_min(x.lo, x.hi));
5233}
5234
5235static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x) {
5236 return x.y < x.x ? x.y : x.x;
5237}
5238
5239static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x) {
5240 simd_long1 t = x.z < x.x ? x.z : x.x;
5241 return x.y < t ? x.y : t;
5242}
5243
5244static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x) {
5245 return simd_reduce_min(simd_min(x.lo, x.hi));
5246}
5247
5248static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x) {
5249 return simd_reduce_min(simd_min(x.lo, x.hi));
5250}
5251
5252static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x) {
5253 return x.y < x.x ? x.y : x.x;
5254}
5255
5256static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x) {
5257 simd_ulong1 t = x.z < x.x ? x.z : x.x;
5258 return x.y < t ? x.y : t;
5259}
5260
5261static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x) {
5262 return simd_reduce_min(simd_min(x.lo, x.hi));
5263}
5264
5265static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x) {
5266 return simd_reduce_min(simd_min(x.lo, x.hi));
5267}
5268
5269static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half2 x) {
5270 return __fminf16(x.x, x.y);
5271}
5272
5273static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half3 x) {
5274 return __fminf16(__fminf16(x.x, x.z), x.y);
5275}
5276
5277static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half4 x) {
5278 return simd_reduce_min(simd_min(x.lo, x.hi));
5279}
5280
5281static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half8 x) {
5282 return simd_reduce_min(simd_min(x.lo, x.hi));
5283}
5284
5285static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half16 x) {
5286 return simd_reduce_min(simd_min(x.lo, x.hi));
5287}
5288
5289static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half32 x) {
5290 return simd_reduce_min(simd_min(x.lo, x.hi));
5291}
5292
5293static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x) {
5294 return fmin(x.x, x.y);
5295}
5296
5297static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x) {
5298 return fmin(fmin(x.x, x.z), x.y);
5299}
5300
5301static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x) {
5302#if defined __arm64__ || defined __aarch64__
5303 return vminvq_f32(x);
5304#else
5305 return simd_reduce_min(simd_min(x.lo, x.hi));
5306#endif
5307}
5308
5309static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x) {
5310 return simd_reduce_min(simd_min(x.lo, x.hi));
5311}
5312
5313static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x) {
5314 return simd_reduce_min(simd_min(x.lo, x.hi));
5315}
5316
5317static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x) {
5318#if defined __arm64__ || defined __aarch64__
5319 return vminvq_f64(x);
5320#else
5321 return fmin(x.x, x.y);
5322#endif
5323}
5324
5325static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x) {
5326 return fmin(fmin(x.x, x.z), x.y);
5327}
5328
5329static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x) {
5330 return simd_reduce_min(simd_min(x.lo, x.hi));
5331}
5332
5333static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x) {
5334 return simd_reduce_min(simd_min(x.lo, x.hi));
5335}
5336
5337static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x) {
5338 return x.y > x.x ? x.y : x.x;
5339}
5340
5341static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x) {
5342 char t = x.z > x.x ? x.z : x.x;
5343 return x.y > t ? x.y : t;
5344}
5345
5346static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x) {
5347 return simd_reduce_max(simd_max(x.lo, x.hi));
5348}
5349
5350static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x) {
5351 return simd_reduce_max(simd_max(x.lo, x.hi));
5352}
5353
5354static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x) {
5355#if defined __arm64__ || defined __aarch64__
5356 return vmaxvq_s8(x);
5357#else
5358 return simd_reduce_max(simd_max(x.lo, x.hi));
5359#endif
5360}
5361
5362static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x) {
5363 return simd_reduce_max(simd_max(x.lo, x.hi));
5364}
5365
5366static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x) {
5367 return simd_reduce_max(simd_max(x.lo, x.hi));
5368}
5369
5370static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x) {
5371 return x.y > x.x ? x.y : x.x;
5372}
5373
5374static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x) {
5375 unsigned char t = x.z > x.x ? x.z : x.x;
5376 return x.y > t ? x.y : t;
5377}
5378
5379static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x) {
5380 return simd_reduce_max(simd_max(x.lo, x.hi));
5381}
5382
5383static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x) {
5384 return simd_reduce_max(simd_max(x.lo, x.hi));
5385}
5386
5387static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x) {
5388#if defined __arm64__ || defined __aarch64__
5389 return vmaxvq_u8(x);
5390#else
5391 return simd_reduce_max(simd_max(x.lo, x.hi));
5392#endif
5393}
5394
5395static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x) {
5396 return simd_reduce_max(simd_max(x.lo, x.hi));
5397}
5398
5399static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x) {
5400 return simd_reduce_max(simd_max(x.lo, x.hi));
5401}
5402
5403static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x) {
5404 return x.y > x.x ? x.y : x.x;
5405}
5406
5407static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x) {
5408 short t = x.z > x.x ? x.z : x.x;
5409 return x.y > t ? x.y : t;
5410}
5411
5412static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x) {
5413 return simd_reduce_max(simd_max(x.lo, x.hi));
5414}
5415
5416static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x) {
5417#if defined __arm64__ || defined __aarch64__
5418 return vmaxvq_s16(x);
5419#else
5420 return simd_reduce_max(simd_max(x.lo, x.hi));
5421#endif
5422}
5423
5424static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x) {
5425 return simd_reduce_max(simd_max(x.lo, x.hi));
5426}
5427
5428static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x) {
5429 return simd_reduce_max(simd_max(x.lo, x.hi));
5430}
5431
5432static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x) {
5433 return x.y > x.x ? x.y : x.x;
5434}
5435
5436static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x) {
5437 unsigned short t = x.z > x.x ? x.z : x.x;
5438 return x.y > t ? x.y : t;
5439}
5440
5441static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x) {
5442 return simd_reduce_max(simd_max(x.lo, x.hi));
5443}
5444
5445static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x) {
5446#if defined __arm64__ || defined __aarch64__
5447 return vmaxvq_u16(x);
5448#else
5449 return simd_reduce_max(simd_max(x.lo, x.hi));
5450#endif
5451}
5452
5453static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x) {
5454 return simd_reduce_max(simd_max(x.lo, x.hi));
5455}
5456
5457static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x) {
5458 return simd_reduce_max(simd_max(x.lo, x.hi));
5459}
5460
5461static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x) {
5462 return x.y > x.x ? x.y : x.x;
5463}
5464
5465static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x) {
5466 int t = x.z > x.x ? x.z : x.x;
5467 return x.y > t ? x.y : t;
5468}
5469
5470static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x) {
5471#if defined __arm64__ || defined __aarch64__
5472 return vmaxvq_s32(x);
5473#else
5474 return simd_reduce_max(simd_max(x.lo, x.hi));
5475#endif
5476}
5477
5478static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x) {
5479 return simd_reduce_max(simd_max(x.lo, x.hi));
5480}
5481
5482static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x) {
5483 return simd_reduce_max(simd_max(x.lo, x.hi));
5484}
5485
5486static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x) {
5487 return x.y > x.x ? x.y : x.x;
5488}
5489
5490static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x) {
5491 unsigned int t = x.z > x.x ? x.z : x.x;
5492 return x.y > t ? x.y : t;
5493}
5494
5495static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x) {
5496#if defined __arm64__ || defined __aarch64__
5497 return vmaxvq_u32(x);
5498#else
5499 return simd_reduce_max(simd_max(x.lo, x.hi));
5500#endif
5501}
5502
5503static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x) {
5504 return simd_reduce_max(simd_max(x.lo, x.hi));
5505}
5506
5507static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x) {
5508 return simd_reduce_max(simd_max(x.lo, x.hi));
5509}
5510
5511static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x) {
5512 return x.y > x.x ? x.y : x.x;
5513}
5514
5515static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x) {
5516 simd_long1 t = x.z > x.x ? x.z : x.x;
5517 return x.y > t ? x.y : t;
5518}
5519
5520static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x) {
5521 return simd_reduce_max(simd_max(x.lo, x.hi));
5522}
5523
5524static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x) {
5525 return simd_reduce_max(simd_max(x.lo, x.hi));
5526}
5527
5528static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x) {
5529 return x.y > x.x ? x.y : x.x;
5530}
5531
5532static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x) {
5533 simd_ulong1 t = x.z > x.x ? x.z : x.x;
5534 return x.y > t ? x.y : t;
5535}
5536
5537static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x) {
5538 return simd_reduce_max(simd_max(x.lo, x.hi));
5539}
5540
5541static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x) {
5542 return simd_reduce_max(simd_max(x.lo, x.hi));
5543}
5544
5545static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half2 x) {
5546 return __fmaxf16(x.x, x.y);
5547}
5548
5549static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half3 x) {
5550 return __fmaxf16(__fmaxf16(x.x, x.z), x.y);
5551}
5552
5553static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half4 x) {
5554 return simd_reduce_max(simd_max(x.lo, x.hi));
5555}
5556
5557static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half8 x) {
5558 return simd_reduce_max(simd_max(x.lo, x.hi));
5559}
5560
5561static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half16 x) {
5562 return simd_reduce_max(simd_max(x.lo, x.hi));
5563}
5564
5565static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half32 x) {
5566 return simd_reduce_max(simd_max(x.lo, x.hi));
5567}
5568
5569static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x) {
5570 return fmax(x.x, x.y);
5571}
5572
5573static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x) {
5574 return fmax(fmax(x.x, x.z), x.y);
5575}
5576
5577static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x) {
5578#if defined __arm64__ || defined __aarch64__
5579 return vmaxvq_f32(x);
5580#else
5581 return simd_reduce_max(simd_max(x.lo, x.hi));
5582#endif
5583}
5584
5585static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x) {
5586 return simd_reduce_max(simd_max(x.lo, x.hi));
5587}
5588
5589static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x) {
5590 return simd_reduce_max(simd_max(x.lo, x.hi));
5591}
5592
5593static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x) {
5594#if defined __arm64__ || defined __aarch64__
5595 return vmaxvq_f64(x);
5596#else
5597 return fmax(x.x, x.y);
5598#endif
5599}
5600
5601static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x) {
5602 return fmax(fmax(x.x, x.z), x.y);
5603}
5604
5605static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x) {
5606 return simd_reduce_max(simd_max(x.lo, x.hi));
5607}
5608
5609static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x) {
5610 return simd_reduce_max(simd_max(x.lo, x.hi));
5611}
5612
5613#ifdef __cplusplus
5614}
5615#endif
5616#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */
5617#endif /* SIMD_COMMON_HEADER */