master
  1/* origin: FreeBSD /usr/src/lib/msun/ld80/e_rem_pio2.c */
  2/*
  3 * ====================================================
  4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  5 * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
  6 *
  7 * Developed at SunSoft, a Sun Microsystems, Inc. business.
  8 * Permission to use, copy, modify, and distribute this
  9 * software is freely granted, provided that this notice
 10 * is preserved.
 11 * ====================================================
 12 *
 13 * Optimized by Bruce D. Evans.
 14 */
 15#include "libm.h"
 16#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 17/* ld80 and ld128 version of __rem_pio2(x,y)
 18 *
 19 * return the remainder of x rem pi/2 in y[0]+y[1]
 20 * use __rem_pio2_large() for large x
 21 */
 22
 23static const long double toint = 1.5/LDBL_EPSILON;
 24
 25#if LDBL_MANT_DIG == 64
 26/* u ~< 0x1p25*pi/2 */
 27#define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.m>>48) < ((0x3fff + 25)<<16 | 0x921f>>1 | 0x8000))
 28#define QUOBITS(x) ((uint32_t)(int32_t)x & 0x7fffffff)
 29#define ROUND1 22
 30#define ROUND2 61
 31#define NX 3
 32#define NY 2
 33/*
 34 * invpio2:  64 bits of 2/pi
 35 * pio2_1:   first  39 bits of pi/2
 36 * pio2_1t:  pi/2 - pio2_1
 37 * pio2_2:   second 39 bits of pi/2
 38 * pio2_2t:  pi/2 - (pio2_1+pio2_2)
 39 * pio2_3:   third  39 bits of pi/2
 40 * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
 41 */
 42static const double
 43pio2_1 =  1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */
 44pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */
 45pio2_3 =  6.36831716351370313614e-25; /*  0x18a2e037074000.0p-133 */
 46static const long double
 47pio4    =  0x1.921fb54442d1846ap-1L,
 48invpio2 =  6.36619772367581343076e-01L, /*  0xa2f9836e4e44152a.0p-64 */
 49pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */
 50pio2_2t =  6.36831716351095013979e-25L, /*  0xc51701b839a25205.0p-144 */
 51pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */
 52#elif LDBL_MANT_DIG == 113
 53/* u ~< 0x1p45*pi/2 */
 54#define SMALL(u) (((u.i.se & 0x7fffU)<<16 | u.i.top) < ((0x3fff + 45)<<16 | 0x921f))
 55#define QUOBITS(x) ((uint32_t)(int64_t)x & 0x7fffffff)
 56#define ROUND1 51
 57#define ROUND2 119
 58#define NX 5
 59#define NY 3
 60static const long double
 61#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes
 62pio4    =  0x1.921fb54442d18469898cc51701b8p-1L,
 63#endif
 64invpio2 =  6.3661977236758134307553505349005747e-01L,	/*  0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
 65pio2_1  =  1.5707963267948966192292994253909555e+00L,	/*  0x1921fb54442d18469800000000000.0p-112 */
 66pio2_1t =  2.0222662487959507323996846200947577e-21L,	/*  0x13198a2e03707344a4093822299f3.0p-181 */
 67pio2_2  =  2.0222662487959507323994779168837751e-21L,	/*  0x13198a2e03707344a400000000000.0p-181 */
 68pio2_2t =  2.0670321098263988236496903051604844e-43L,	/*  0x127044533e63a0105df531d89cd91.0p-254 */
 69pio2_3  =  2.0670321098263988236499468110329591e-43L,	/*  0x127044533e63a0105e00000000000.0p-254 */
 70pio2_3t = -2.5650587247459238361625433492959285e-65L;	/* -0x159c4ec64ddaeb5f78671cbfb2210.0p-327 */
 71#endif
 72
 73int __rem_pio2l(long double x, long double *y)
 74{
 75	union ldshape u,uz;
 76	long double z,w,t,r,fn;
 77	double tx[NX],ty[NY];
 78	int ex,ey,n,i;
 79
 80	u.f = x;
 81	ex = u.i.se & 0x7fff;
 82	if (SMALL(u)) {
 83		/* rint(x/(pi/2)) */
 84		fn = x*invpio2 + toint - toint;
 85		n = QUOBITS(fn);
 86		r = x-fn*pio2_1;
 87		w = fn*pio2_1t;  /* 1st round good to 102/180 bits (ld80/ld128) */
 88#ifdef __wasilibc_unmodified_upstream // Wasm doesn't have alternate rounding modes
 89		/* Matters with directed rounding. */
 90		if (predict_false(r - w < -pio4)) {
 91			n--;
 92			fn--;
 93			r = x - fn*pio2_1;
 94			w = fn*pio2_1t;
 95		} else if (predict_false(r - w > pio4)) {
 96			n++;
 97			fn++;
 98			r = x - fn*pio2_1;
 99			w = fn*pio2_1t;
100		}
101#endif
102		y[0] = r-w;
103		u.f = y[0];
104		ey = u.i.se & 0x7fff;
105		if (ex - ey > ROUND1) {  /* 2nd iteration needed, good to 141/248 (ld80/ld128) */
106			t = r;
107			w = fn*pio2_2;
108			r = t-w;
109			w = fn*pio2_2t-((t-r)-w);
110			y[0] = r-w;
111			u.f = y[0];
112			ey = u.i.se & 0x7fff;
113			if (ex - ey > ROUND2) {  /* 3rd iteration, good to 180/316 bits */
114				t = r; /* will cover all possible cases (not verified for ld128) */
115				w = fn*pio2_3;
116				r = t-w;
117				w = fn*pio2_3t-((t-r)-w);
118				y[0] = r-w;
119			}
120		}
121		y[1] = (r - y[0]) - w;
122		return n;
123	}
124	/*
125	 * all other (large) arguments
126	 */
127	if (ex == 0x7fff) {                /* x is inf or NaN */
128		y[0] = y[1] = x - x;
129		return 0;
130	}
131	/* set z = scalbn(|x|,-ilogb(x)+23) */
132	uz.f = x;
133	uz.i.se = 0x3fff + 23;
134	z = uz.f;
135	for (i=0; i < NX - 1; i++) {
136		tx[i] = (double)(int32_t)z;
137		z     = (z-tx[i])*0x1p24;
138	}
139	tx[i] = z;
140	while (tx[i] == 0)
141		i--;
142	n = __rem_pio2_large(tx, ty, ex-0x3fff-23, i+1, NY);
143	w = ty[1];
144	if (NY == 3)
145		w += ty[2];
146	r = ty[0] + w;
147	/* TODO: for ld128 this does not follow the recommendation of the
148	comments of __rem_pio2_large which seem wrong if |ty[0]| > |ty[1]+ty[2]| */
149	w -= r - ty[0];
150	if (u.i.se >> 15) {
151		y[0] = -r;
152		y[1] = -w;
153		return -n;
154	}
155	y[0] = r;
156	y[1] = w;
157	return n;
158}
159#endif