zig/lib/libc/musl/src/math/sqrtl.c at master

  1#include <stdint.h>
  2#include <math.h>
  3#include <float.h>
  4#include "libm.h"
  5
  6#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
  7long double sqrtl(long double x)
  8{
  9	return sqrt(x);
 10}
 11#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
 12#include "sqrt_data.h"
 13
 14#define FENV_SUPPORT 1
 15
 16typedef struct {
 17	uint64_t hi;
 18	uint64_t lo;
 19} u128;
 20
 21/* top: 16 bit sign+exponent, x: significand.  */
 22static inline long double mkldbl(uint64_t top, u128 x)
 23{
 24	union ldshape u;
 25#if LDBL_MANT_DIG == 113
 26	u.i2.hi = x.hi;
 27	u.i2.lo = x.lo;
 28	u.i2.hi &= 0x0000ffffffffffff;
 29	u.i2.hi |= top << 48;
 30#elif LDBL_MANT_DIG == 64
 31	u.i.se = top;
 32	u.i.m = x.lo;
 33	/* force the top bit on non-zero (and non-subnormal) results.  */
 34	if (top & 0x7fff)
 35		u.i.m |= 0x8000000000000000;
 36#endif
 37	return u.f;
 38}
 39
 40/* return: top 16 bit is sign+exp and following bits are the significand.  */
 41static inline u128 asu128(long double x)
 42{
 43	union ldshape u = {.f=x};
 44	u128 r;
 45#if LDBL_MANT_DIG == 113
 46	r.hi = u.i2.hi;
 47	r.lo = u.i2.lo;
 48#elif LDBL_MANT_DIG == 64
 49	r.lo = u.i.m<<49;
 50	/* ignore the top bit: pseudo numbers are not handled. */
 51	r.hi = u.i.m>>15;
 52	r.hi &= 0x0000ffffffffffff;
 53	r.hi |= (uint64_t)u.i.se << 48;
 54#endif
 55	return r;
 56}
 57
 58/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
 59static inline uint32_t mul32(uint32_t a, uint32_t b)
 60{
 61	return (uint64_t)a*b >> 32;
 62}
 63
 64/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
 65static inline uint64_t mul64(uint64_t a, uint64_t b)
 66{
 67	uint64_t ahi = a>>32;
 68	uint64_t alo = a&0xffffffff;
 69	uint64_t bhi = b>>32;
 70	uint64_t blo = b&0xffffffff;
 71	return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
 72}
 73
 74static inline u128 add64(u128 a, uint64_t b)
 75{
 76	u128 r;
 77	r.lo = a.lo + b;
 78	r.hi = a.hi;
 79	if (r.lo < a.lo)
 80		r.hi++;
 81	return r;
 82}
 83
 84static inline u128 add128(u128 a, u128 b)
 85{
 86	u128 r;
 87	r.lo = a.lo + b.lo;
 88	r.hi = a.hi + b.hi;
 89	if (r.lo < a.lo)
 90		r.hi++;
 91	return r;
 92}
 93
 94static inline u128 sub64(u128 a, uint64_t b)
 95{
 96	u128 r;
 97	r.lo = a.lo - b;
 98	r.hi = a.hi;
 99	if (a.lo < b)
100		r.hi--;
101	return r;
102}
103
104static inline u128 sub128(u128 a, u128 b)
105{
106	u128 r;
107	r.lo = a.lo - b.lo;
108	r.hi = a.hi - b.hi;
109	if (a.lo < b.lo)
110		r.hi--;
111	return r;
112}
113
114/* a<<n, 0 <= n <= 127 */
115static inline u128 lsh(u128 a, int n)
116{
117	if (n == 0)
118		return a;
119	if (n >= 64) {
120		a.hi = a.lo<<(n-64);
121		a.lo = 0;
122	} else {
123		a.hi = (a.hi<<n) | (a.lo>>(64-n));
124		a.lo = a.lo<<n;
125	}
126	return a;
127}
128
129/* a>>n, 0 <= n <= 127 */
130static inline u128 rsh(u128 a, int n)
131{
132	if (n == 0)
133		return a;
134	if (n >= 64) {
135		a.lo = a.hi>>(n-64);
136		a.hi = 0;
137	} else {
138		a.lo = (a.lo>>n) | (a.hi<<(64-n));
139		a.hi = a.hi>>n;
140	}
141	return a;
142}
143
144/* returns a*b exactly.  */
145static inline u128 mul64_128(uint64_t a, uint64_t b)
146{
147	u128 r;
148	uint64_t ahi = a>>32;
149	uint64_t alo = a&0xffffffff;
150	uint64_t bhi = b>>32;
151	uint64_t blo = b&0xffffffff;
152	uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32);
153	uint64_t lo2 = (alo*blo)&0xffffffff;
154	r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32);
155	r.lo = (lo1<<32) + lo2;
156	return r;
157}
158
159/* returns a*b*2^-128 - e, with error 0 <= e < 7.  */
160static inline u128 mul128(u128 a, u128 b)
161{
162	u128 hi = mul64_128(a.hi, b.hi);
163	uint64_t m1 = mul64(a.hi, b.lo);
164	uint64_t m2 = mul64(a.lo, b.hi);
165	return add64(add64(hi, m1), m2);
166}
167
168/* returns a*b % 2^128.  */
169static inline u128 mul128_tail(u128 a, u128 b)
170{
171	u128 lo = mul64_128(a.lo, b.lo);
172	lo.hi += a.hi*b.lo + a.lo*b.hi;
173	return lo;
174}
175
176
177/* see sqrt.c for detailed comments.  */
178
179long double sqrtl(long double x)
180{
181	u128 ix, ml;
182	uint64_t top;
183
184	ix = asu128(x);
185	top = ix.hi >> 48;
186	if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
187		/* x < 0x1p-16382 or inf or nan.  */
188		if (2*ix.hi == 0 && ix.lo == 0)
189			return x;
190		if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
191			return x;
192		if (top >= 0x7fff)
193			return __math_invalidl(x);
194		/* x is subnormal, normalize it.  */
195		ix = asu128(x * 0x1p112);
196		top = ix.hi >> 48;
197		top -= 112;
198	}
199
200	/* x = 4^e m; with int e and m in [1, 4) */
201	int even = top & 1;
202	ml = lsh(ix, 15);
203	ml.hi |= 0x8000000000000000;
204	if (even) ml = rsh(ml, 1);
205	top = (top + 0x3fff) >> 1;
206
207	/* r ~ 1/sqrt(m) */
208	const uint64_t three = 0xc0000000;
209	uint64_t r, s, d, u, i;
210	i = (ix.hi >> 42) % 128;
211	r = (uint32_t)__rsqrt_tab[i] << 16;
212	/* |r sqrt(m) - 1| < 0x1p-8 */
213	s = mul32(ml.hi>>32, r);
214	d = mul32(s, r);
215	u = three - d;
216	r = mul32(u, r) << 1;
217	/* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
218	r = r<<32;
219	s = mul64(ml.hi, r);
220	d = mul64(s, r);
221	u = (three<<32) - d;
222	r = mul64(u, r) << 1;
223	/* |r sqrt(m) - 1| < 0x1.a5p-31 */
224	s = mul64(u, s) << 1;
225	d = mul64(s, r);
226	u = (three<<32) - d;
227	r = mul64(u, r) << 1;
228	/* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
229
230	const u128 threel = {.hi=three<<32, .lo=0};
231	u128 rl, sl, dl, ul;
232	rl.hi = r;
233	rl.lo = 0;
234	sl = mul128(ml, rl);
235	dl = mul128(sl, rl);
236	ul = sub128(threel, dl);
237	sl = mul128(ul, sl); /* repr: 3.125 */
238	/* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
239	sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1));
240	/* s < sqrt(m) < s + 1 ULP + tiny */
241
242	long double y;
243	u128 d2, d1, d0;
244	d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl));
245	d1 = sub128(sl, d0);
246	d2 = add128(add64(sl, 1), d1);
247	sl = add64(sl, d1.hi >> 63);
248	y = mkldbl(top, sl);
249	if (FENV_SUPPORT) {
250		/* handle rounding modes and inexact exception.  */
251		top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1;
252		top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48;
253		y += mkldbl(top, (u128){0});
254	}
255	return y;
256}
257#else
258#error unsupported long double format
259#endif