zig/lib/compiler_rt/sqrt.zig at master

  1//! Ported from musl, which is MIT licensed.
  2//! https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
  3//!
  4//! https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
  5//! https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
  6//! https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
  7
  8const std = @import("std");
  9const builtin = @import("builtin");
 10const arch = builtin.cpu.arch;
 11const math = std.math;
 12const common = @import("common.zig");
 13
 14pub const panic = common.panic;
 15
 16comptime {
 17    @export(&__sqrth, .{ .name = "__sqrth", .linkage = common.linkage, .visibility = common.visibility });
 18    @export(&sqrtf, .{ .name = "sqrtf", .linkage = common.linkage, .visibility = common.visibility });
 19    @export(&sqrt, .{ .name = "sqrt", .linkage = common.linkage, .visibility = common.visibility });
 20    @export(&__sqrtx, .{ .name = "__sqrtx", .linkage = common.linkage, .visibility = common.visibility });
 21    if (common.want_ppc_abi) {
 22        @export(&sqrtq, .{ .name = "sqrtf128", .linkage = common.linkage, .visibility = common.visibility });
 23    } else if (common.want_sparc_abi) {
 24        @export(&_Qp_sqrt, .{ .name = "_Qp_sqrt", .linkage = common.linkage, .visibility = common.visibility });
 25    }
 26    @export(&sqrtq, .{ .name = "sqrtq", .linkage = common.linkage, .visibility = common.visibility });
 27    @export(&sqrtl, .{ .name = "sqrtl", .linkage = common.linkage, .visibility = common.visibility });
 28}
 29
 30pub fn __sqrth(x: f16) callconv(.c) f16 {
 31    var ix: u16 = @bitCast(x);
 32    var top = ix >> 10;
 33
 34    // special case handling.
 35    if (top -% 0x01 >= 0x1F - 0x01) {
 36        @branchHint(.unlikely);
 37        // x < 0x1p-14 or inf or nan.
 38        if (ix & 0x7FFF == 0) return x;
 39        if (ix == 0x7C00) return x;
 40        if (ix > 0x7C00) return math.nan(f16);
 41        // x is subnormal, normalize it.
 42        ix = @bitCast(x * 0x1p10);
 43        top = (ix >> 10) -% 10;
 44    }
 45
 46    // argument reduction:
 47    // x = 4^e m; with integer e, and m in [1, 4)
 48    // m: fixed point representation [2.14]
 49    // 2^e is the exponent part of the result.
 50    const even = (top & 1) != 0;
 51    const m = if (even) (ix << 4) & 0x7FFF else (ix << 5) | 0x8000;
 52    top = (top +% 0x0F) >> 1;
 53
 54    // approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
 55    // the fixed point representations are
 56    //   m: 2.14 r: 0.16, s: 2.14, d: 2.14, u: 2.14, three: 2.14
 57    const three: u16 = 0xC000;
 58    const i: usize = @intCast((ix >> 4) & 0x7F);
 59    const r = __rsqrt_tab[i];
 60    // |r*sqrt(m) - 1| < 0x1p-8
 61    var s = mul16(m, r);
 62    // |s/sqrt(m) - 1| < 0x1p-8
 63    const d = mul16(s, r);
 64    const u = three - d;
 65    s = mul16(s, u); // repr: 3.13
 66    // -0x1.20p-13 < s/sqrt(m) - 1 < 0x7Dp-16
 67    s = (s - 1) >> 3; // repr: 6.10
 68    // s < sqrt(m) < s + 0x1.24p-10
 69
 70    // compute nearest rounded result:
 71    // the nearest result to 10 bits is either s or s+0x1p-10,
 72    // we can decide by comparing (2^10 s + 0.5)^2 to 2^20 m.
 73    const d0 = (m << 6) -% s *% s;
 74    const d1 = s -% d0;
 75    const d2 = d1 +% s +% 1;
 76    s += d1 >> 15;
 77    s &= 0x03FF;
 78    s |= top << 10;
 79    const y: f16 = @bitCast(s);
 80
 81    // handle rounding modes and inexact exception:
 82    // only (s+1)^2 == 2^6 m case is exact otherwise
 83    // add a tiny value to cause the fenv effects.
 84    if (d2 != 0) {
 85        @branchHint(.likely);
 86        var tiny: u16 = 0x0001;
 87        tiny |= (d1 ^ d2) & 0x8000;
 88        const t: f16 = @bitCast(tiny);
 89        return y + t;
 90    }
 91
 92    return y;
 93}
 94
 95pub fn sqrtf(x: f32) callconv(.c) f32 {
 96    var ix: u32 = @bitCast(x);
 97    var top = ix >> 23;
 98
 99    // special case handling.
100    if (top -% 0x01 >= 0xFF - 0x01) {
101        @branchHint(.unlikely);
102        // x < 0x1p-126 or inf or nan.
103        if (ix & 0x7FFF_FFFF == 0) return x;
104        if (ix == 0x7F80_0000) return x;
105        if (ix > 0x7F80_0000) return math.nan(f32);
106        // x is subnormal, normalize it.
107        ix = @bitCast(x * 0x1p23);
108        top = (ix >> 23) -% 23;
109    }
110
111    // argument reduction:
112    // x = 4^e m; with integer e, and m in [1, 4)
113    // m: fixed point representation [2.30]
114    // 2^e is the exponent part of the result.
115    const even = (top & 1) != 0;
116    const m = if (even) (ix << 7) & 0x7FFF_FFFF else (ix << 8) | 0x8000_0000;
117    top = (top +% 0x7F) >> 1;
118
119    // approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
120    // the fixed point representations are
121    //   m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
122    const three: u32 = 0xC000_0000;
123    var i: usize = @intCast((ix >> 17) & 0x3F);
124    if (even) i += 64;
125    var r = @as(u32, @intCast(__rsqrt_tab[i])) << 16;
126    // |r*sqrt(m) - 1| < 0x1p-8
127    var s = mul32(m, r);
128    // |s/sqrt(m) - 1| < 0x1p-8
129    var d = mul32(s, r);
130    var u = three - d;
131    r = mul32(r, u) << 1;
132    // |r*sqrt(m) - 1| < 0x1.7bp-16
133    s = mul32(s, u) << 1;
134    // |s/sqrt(m) - 1| < 0x1.7bp-16
135    d = mul32(s, r);
136    u = three - d;
137    s = mul32(s, u); // repr: 3.29
138    // -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31
139    s = (s - 1) >> 6; // repr: 9.23
140    // s < sqrt(m) < s + 0x1.08p-23
141
142    // compute nearest rounded result:
143    // the nearest result to 23 bits is either s or s+0x1p-23,
144    // we can decide by comparing (2^23 s + 0.5)^2 to 2^46 m.
145    const d0 = (m << 16) -% s *% s;
146    const d1 = s -% d0;
147    const d2 = d1 +% s +% 1;
148    s += d1 >> 31;
149    s &= 0x007F_FFFF;
150    s |= top << 23;
151    const y: f32 = @bitCast(s);
152
153    // handle rounding modes and inexact exception:
154    // only (s+1)^2 == 2^16 m case is exact otherwise
155    // add a tiny value to cause the fenv effects.
156    if (d2 != 0) {
157        @branchHint(.likely);
158        var tiny: u32 = 0x0100_0000;
159        tiny |= (d1 ^ d2) & 0x8000_0000;
160        const t: f32 = @bitCast(tiny);
161        return y + t;
162    }
163
164    return y;
165}
166
167pub fn sqrt(x: f64) callconv(.c) f64 {
168    var ix: u64 = @bitCast(x);
169    var top = ix >> 52;
170
171    // special case handling.
172    if (top -% 0x001 >= 0x7FF - 0x001) {
173        @branchHint(.unlikely);
174        // x < 0x1p-1022 or inf or nan.
175        if (ix & 0x7FFF_FFFF_FFFF_FFFF == 0) return x;
176        if (ix == 0x7FF0_0000_0000_0000) return x;
177        if (ix > 0x7FF0_0000_0000_0000) return math.nan(f64);
178        // x is subnormal, normalize it.
179        ix = @bitCast(x * 0x1p52);
180        top = (ix >> 52) -% 52;
181    }
182
183    // argument reduction:
184    // x = 4^e m; with integer e, and m in [1, 4)
185    // m: fixed point representation [2.62]
186    // 2^e is the exponent part of the result.
187    const even = (top & 1) != 0;
188    const m = if (even) (ix << 10) & 0x7FFF_FFFF_FFFF_FFFF else (ix << 11) | 0x8000_0000_0000_0000;
189    top = (top +% 0x3FF) >> 1;
190
191    // approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
192    //
193    // initial estimate:
194    // 7bit table lookup (1bit exponent and 6bit significand).
195    //
196    // iterative approximation:
197    // using 2 goldschmidt iterations with 32bit int arithmetics
198    // and a final iteration with 64bit int arithmetics.
199    //
200    // details:
201    //
202    // the relative error (e = r0 sqrt(m)-1) of a linear estimate
203    // (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
204    // a table lookup is faster and needs one less iteration
205    // 6 bit lookup table (128b) gives |e| < 0x1.f9p-8
206    // 7 bit lookup table (256b) gives |e| < 0x1.fdp-9
207    // for single and double prec 6bit is enough but for quad
208    // prec 7bit is needed (or modified iterations). to avoid
209    // one more iteration >=13bit table would be needed (16k).
210    //
211    // a newton-raphson iteration for r is
212    //   w = r*r
213    //   u = 3 - m*w
214    //   r = r*u/2
215    // can use a goldschmidt iteration for s at the end or
216    //   s = m*r
217    //
218    // first goldschmidt iteration is
219    //   s = m*r
220    //   u = 3 - s*r
221    //   r = r*u/2
222    //   s = s*u/2
223    // next goldschmidt iteration is
224    //   u = 3 - s*r
225    //   r = r*u/2
226    //   s = s*u/2
227    // and at the end r is not computed only s.
228    //
229    // they use the same amount of operations and converge at the
230    // same quadratic rate, i.e. if
231    //   r1 sqrt(m) - 1 = e, then
232    //   r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
233    // the advantage of goldschmidt is that the mul for s and r
234    // are independent (computed in parallel), however it is not
235    // "self synchronizing": it only uses the input m in the
236    // first iteration so rounding errors accumulate. at the end
237    // or when switching to larger precision arithmetics rounding
238    // errors dominate so the first iteration should be used.
239    //
240    // the fixed point representations are
241    //   m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
242    // and after switching to 64 bit
243    //   m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62
244    const three: struct { u32, u64 } = .{
245        0xC000_0000,
246        0xC000_0000_0000_0000,
247    };
248    var r: struct { u32, u64 } = undefined;
249    var s: struct { u32, u64 } = undefined;
250    var d: struct { u32, u64 } = undefined;
251    var u: struct { u32, u64 } = undefined;
252    const i: usize = @intCast((ix >> 46) & 0x7F);
253    r[0] = @intCast(__rsqrt_tab[i]);
254    r[0] <<= 16;
255    // |r sqrt(m) - 1| < 0x1.fdp-9
256    s[0] = mul32(@intCast(m >> 32), r[0]);
257    // |s/sqrt(m) - 1| < 0x1.fdp-9
258    d[0] = mul32(s[0], r[0]);
259    u[0] = three[0] - d[0];
260    r[0] = mul32(r[0], u[0]) << 1;
261    // |r sqrt(m) - 1| < 0x1.7bp-16
262    s[0] = mul32(s[0], u[0]) << 1;
263    // |s/sqrt(m) - 1| < 0x1.7bp-16
264    d[0] = mul32(s[0], r[0]);
265    u[0] = three[0] - d[0];
266    r[0] = mul32(r[0], u[0]) << 1;
267    // |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case)
268    r[1] = @intCast(r[0]);
269    r[1] <<= 32;
270    s[1] = mul64(m, r[1]);
271    d[1] = mul64(s[1], r[1]);
272    u[1] = three[1] - d[1];
273    s[1] = mul64(s[1], u[1]); // repr: 3.61
274    // -0x1p-57 < s - sqrt(m) < 0x1.8001p-61
275    s[1] = (s[1] - 2) >> 9; // repr: 12.52
276    // -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63
277
278    // s < sqrt(m) < s + 0x1.09p-52
279    // compute nearest rounded result:
280    // the nearest result to 52 bits is either s or s+0x1p-52,
281    // we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.
282    const d0 = (m << 42) -% s[1] *% s[1];
283    const d1 = s[1] -% d0;
284    const d2 = d1 +% s[1] +% 1;
285    s[1] += d1 >> 63;
286    s[1] &= 0x000F_FFFF_FFFF_FFFF;
287    s[1] |= top << 52;
288    const y: f64 = @bitCast(s[1]);
289
290    // handle rounding modes and inexact exception:
291    // only (s+1)^2 == 2^42 m case is exact otherwise
292    // add a tiny value to cause the fenv effects.
293    if (d2 != 0) {
294        @branchHint(.likely);
295        var tiny: u64 = 0x0010_0000_0000_0000;
296        tiny |= (d1 ^ d2) & 0x8000_0000_0000_0000;
297        const t: f64 = @bitCast(tiny);
298        return y + t;
299    }
300
301    return y;
302}
303
304pub fn __sqrtx(x: f80) callconv(.c) f80 {
305    var ix: u80 = @bitCast(x);
306    var top = ix >> 64;
307
308    // special case handling.
309    if (top -% 0x0001 >= 0x7FFF - 0x0001) {
310        @branchHint(.unlikely);
311        // x < 0x1p-16382 or inf or nan.
312        if (ix & 0x7FFF_FFFF_FFFF_FFFF_FFFF == 0) return x;
313        if (ix == 0x7FFF_8000_0000_0000_0000) return x;
314        if (ix > 0x7FFF_8000_0000_0000_0000) return math.nan(f80);
315        // x is subnormal, normalize it.
316        ix = @bitCast(x * 0x1p63);
317        top = (ix >> 64) -% 63;
318    }
319
320    // argument reduction:
321    // x = 4^e m; with integer e, and m in [1, 4)
322    // m: fixed point representation [2.78]
323    // 2^e is the exponent part of the result.
324    const even = (top & 1) != 0;
325    const m = if (even) (ix << 15) & 0x7FFF_FFFF_FFFF_FFFF_FFFF else ix << 16;
326    top = (top +% 0x3FFF) >> 1;
327
328    // approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
329    // the fixed point representations are
330    //   m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
331    // and after switching to 64 bit
332    //   m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62
333    // and after switching to 80 bit
334    //   m: 2.78 r: 0.80, s: 2.78, d: 2.78, u: 2.78, three: 2.78
335    const three: struct { u32, u64, u80 } = .{
336        0xC000_0000,
337        0xC000_0000_0000_0000,
338        0xC000_0000_0000_0000_0000,
339    };
340    var r: struct { u32, u64, u80 } = undefined;
341    var s: struct { u32, u64, u80 } = undefined;
342    var d: struct { u32, u64, u80 } = undefined;
343    var u: struct { u32, u64, u80 } = undefined;
344    var i: usize = @intCast((ix >> 57) & 0x3F);
345    if (even) i += 64;
346    r[0] = @intCast(__rsqrt_tab[i]);
347    r[0] <<= 16;
348    // |r sqrt(m) - 1| < 0x1p-8
349    s[0] = mul32(@intCast(m >> 48), r[0]);
350    d[0] = mul32(s[0], r[0]);
351    u[0] = three[0] - d[0];
352    r[0] = mul32(u[0], r[0]) << 1;
353    // |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit
354    r[1] = @intCast(r[0]);
355    r[1] <<= 32;
356    s[1] = mul64(@intCast(m >> 16), r[1]);
357    d[1] = mul64(s[1], r[1]);
358    u[1] = three[1] - d[1];
359    r[1] = mul64(u[1], r[1]) << 1;
360    // |r sqrt(m) - 1| < 0x1.a5p-31
361    s[1] = mul64(u[1], s[1]) << 1;
362    d[1] = mul64(s[1], r[1]);
363    u[1] = three[1] - d[1];
364    r[1] = mul64(u[1], r[1]) << 1;
365    // |r sqrt(m) - 1| < 0x1.c001p-59, switch to 80bit
366    r[2] = @intCast(r[1]);
367    r[2] <<= 16;
368    s[2] = mul80(m, r[2]);
369    d[2] = mul80(s[2], r[2]);
370    u[2] = three[2] - d[2];
371    s[2] = mul80(u[2], s[2]); // repr: 3.77
372    s[2] = (s[2] - 4) >> 14; // repr: 17.63
373    // s < sqrt(m) < s + 1 ULP + tiny
374
375    // compute nearest rounded result:
376    // the nearest result to 63 bits is either s or s+0x1p-63,
377    // we can decide by comparing (2^63 s + 0.5)^2 to 2^126 m
378    const d0 = (m << 48) -% mul80_tail(s[2], s[2]);
379    const d1 = s[2] -% d0;
380    const d2 = d1 +% s[2] +% 1;
381    s[2] += d1 >> 79;
382    s[2] &= 0x0000_7FFF_FFFF_FFFF_FFFF;
383    s[2] |= 0x0000_8000_0000_0000_0000;
384    s[2] |= top << 64;
385    const y: f80 = @bitCast(s[2]);
386
387    // handle rounding modes and inexact exception:
388    // only (s+1)^2 == 2^48 m case is exact otherwise
389    // add a tiny value to cause the fenv effects.
390    if (d2 != 0) {
391        @branchHint(.likely);
392        var tiny: u80 = 0x0001_8000_0000_0000_0000;
393        tiny |= (d1 ^ d2) & 0x8000_0000_0000_0000_0000;
394        const t: f80 = @bitCast(tiny);
395        return y + t;
396    }
397
398    return y;
399}
400
401pub fn sqrtq(x: f128) callconv(.c) f128 {
402    var ix: u128 = @bitCast(x);
403    var top = ix >> 112;
404
405    // special case handling.
406    if (top -% 0x0001 >= 0x7FFF - 0x0001) {
407        @branchHint(.unlikely);
408        // x < 0x1p-16382 or inf or nan.
409        if (ix & 0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF == 0) return x;
410        if (ix == 0x7FFF_0000_0000_0000_0000_0000_0000_0000) return x;
411        if (ix > 0x7FFF_0000_0000_0000_0000_0000_0000_0000) return math.nan(f128);
412        // x is subnormal, normalize it.
413        ix = @bitCast(x * 0x1p112);
414        top = (ix >> 112) -% 112;
415    }
416
417    // argument reduction:
418    // x = 4^e m; with integer e, and m in [1, 4)
419    // m: fixed point representation [2.126]
420    // 2^e is the exponent part of the result.
421    const even = (top & 1) != 0;
422    const m = if (even) (ix << 14) & 0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF else (ix << 15) | 0x8000_0000_0000_0000_0000_0000_0000_0000;
423    top = (top +% 0x3FFF) >> 1;
424
425    // approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
426    // the fixed point representations are
427    //   m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
428    // and after switching to 64 bit
429    //   m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62
430    // and after switching to 128 bit
431    //   m: 2.126 r: 0.128, s: 2.126, d: 2.126, u: 2.126, three: 2.126
432    const three: struct { u32, u64, u128 } = .{
433        0xC000_0000,
434        0xC000_0000_0000_0000,
435        0xC000_0000_0000_0000_0000_0000_0000_0000,
436    };
437    var r: struct { u32, u64, u128 } = undefined;
438    var s: struct { u32, u64, u128 } = undefined;
439    var d: struct { u32, u64, u128 } = undefined;
440    var u: struct { u32, u64, u128 } = undefined;
441    const i: usize = @intCast((ix >> 106) & 0x7F);
442    r[0] = @intCast(__rsqrt_tab[i]);
443    r[0] <<= 16;
444    // |r sqrt(m) - 1| < 0x1p-8
445    s[0] = mul32(@intCast(m >> 96), r[0]);
446    d[0] = mul32(s[0], r[0]);
447    u[0] = three[0] - d[0];
448    r[0] = mul32(u[0], r[0]) << 1;
449    // |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit
450    r[1] = @intCast(r[0]);
451    r[1] <<= 32;
452    s[1] = mul64(@intCast(m >> 64), r[1]);
453    d[1] = mul64(s[1], r[1]);
454    u[1] = three[1] - d[1];
455    r[1] = mul64(u[1], r[1]) << 1;
456    // |r sqrt(m) - 1| < 0x1.a5p-31
457    s[1] = mul64(u[1], s[1]) << 1;
458    d[1] = mul64(s[1], r[1]);
459    u[1] = three[1] - d[1];
460    r[1] = mul64(u[1], r[1]) << 1;
461    // |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit
462    r[2] = @intCast(r[1]);
463    r[2] <<= 64;
464    s[2] = mul128(m, r[2]);
465    d[2] = mul128(s[2], r[2]);
466    u[2] = three[2] - d[2];
467    s[2] = mul128(u[2], s[2]); // repr: 3.125
468    // -0x1p-116 < s - sqrt(m) < 0x3.8001p-125
469    s[2] = (s[2] - 4) >> 13; // repr: 16.122
470    // s < sqrt(m) < s + 1 ULP + tiny
471
472    // compute nearest rounded result:
473    // the nearest result to 122 bits is either s or s+0x1p-122,
474    // we can decide by comparing (2^122 s + 0.5)^2 to 2^244 m
475    const d0 = (m << 98) -% s[2] *% s[2];
476    const d1 = s[2] -% d0;
477    const d2 = d1 +% s[2] +% 1;
478    s[2] += d1 >> 127;
479    s[2] &= 0x0000_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF;
480    s[2] |= top << 112;
481    const y: f128 = @bitCast(s[2]);
482
483    // handle rounding modes and inexact exception:
484    // only (s+1)^2 == 2^98 m case is exact otherwise
485    // add a tiny value to cause the fenv effects.
486    if (d2 != 0) {
487        @branchHint(.likely);
488        var tiny: u128 = 0x0001_0000_0000_0000_0000_0000_0000_0000;
489        tiny |= (d1 ^ d2) & 0x8000_0000_0000_0000_0000_0000_0000_0000;
490        const t: f128 = @bitCast(tiny);
491        return y + t;
492    }
493
494    return y;
495}
496
497fn _Qp_sqrt(c: *f128, a: *f128) callconv(.c) void {
498    c.* = sqrt(@floatCast(a.*));
499}
500
501pub fn sqrtl(x: c_longdouble) callconv(.c) c_longdouble {
502    switch (@typeInfo(c_longdouble).float.bits) {
503        16 => return __sqrth(x),
504        32 => return sqrtf(x),
505        64 => return sqrt(x),
506        80 => return __sqrtx(x),
507        128 => return sqrtq(x),
508        else => @compileError("unreachable"),
509    }
510}
511
512const __rsqrt_tab: [128]u16 = .{
513    0xB451, 0xB2F0, 0xB196, 0xB044, 0xAEF9, 0xADB6, 0xAC79, 0xAB43,
514    0xAA14, 0xA8EB, 0xA7C8, 0xA6AA, 0xA592, 0xA480, 0xA373, 0xA26B,
515    0xA168, 0xA06A, 0x9F70, 0x9E7B, 0x9D8A, 0x9C9D, 0x9BB5, 0x9AD1,
516    0x99F0, 0x9913, 0x983A, 0x9765, 0x9693, 0x95C4, 0x94F8, 0x9430,
517    0x936B, 0x92A9, 0x91EA, 0x912E, 0x9075, 0x8FBE, 0x8F0A, 0x8E59,
518    0x8DAA, 0x8CFE, 0x8C54, 0x8BAC, 0x8B07, 0x8A64, 0x89C4, 0x8925,
519    0x8889, 0x87EE, 0x8756, 0x86C0, 0x862B, 0x8599, 0x8508, 0x8479,
520    0x83EC, 0x8361, 0x82D8, 0x8250, 0x81C9, 0x8145, 0x80C2, 0x8040,
521    0xFF02, 0xFD0E, 0xFB25, 0xF947, 0xF773, 0xF5AA, 0xF3EA, 0xF234,
522    0xF087, 0xEEE3, 0xED47, 0xEBB3, 0xEA27, 0xE8A3, 0xE727, 0xE5B2,
523    0xE443, 0xE2DC, 0xE17A, 0xE020, 0xDECB, 0xDD7D, 0xDC34, 0xDAF1,
524    0xD9B3, 0xD87B, 0xD748, 0xD61A, 0xD4F1, 0xD3CD, 0xD2AD, 0xD192,
525    0xD07B, 0xCF69, 0xCE5B, 0xCD51, 0xCC4A, 0xCB48, 0xCA4A, 0xC94F,
526    0xC858, 0xC764, 0xC674, 0xC587, 0xC49D, 0xC3B7, 0xC2D4, 0xC1F4,
527    0xC116, 0xC03C, 0xBF65, 0xBE90, 0xBDBE, 0xBCEF, 0xBC23, 0xBB59,
528    0xBA91, 0xB9CC, 0xB90A, 0xB84A, 0xB78C, 0xB6D0, 0xB617, 0xB560,
529};
530
531inline fn mul16(a: u16, b: u16) u16 {
532    return @intCast(@as(u32, @intCast(a)) * @as(u32, @intCast(b)) >> 16);
533}
534
535inline fn mul32(a: u32, b: u32) u32 {
536    return @intCast(@as(u64, @intCast(a)) * @as(u64, @intCast(b)) >> 32);
537}
538
539inline fn mul64(a: u64, b: u64) u64 {
540    return @intCast(@as(u128, @intCast(a)) * @as(u128, @intCast(b)) >> 64);
541}
542
543inline fn mul80(a: u80, b: u80) u80 {
544    const ahi = a >> 40;
545    const alo = a & 0xFF_FFFF_FFFF;
546    const bhi = b >> 40;
547    const blo = b & 0xFF_FFFF_FFFF;
548    return ahi * bhi + (ahi * blo >> 40) + (alo * bhi >> 40);
549}
550
551inline fn mul128(a: u128, b: u128) u128 {
552    const ahi = a >> 64;
553    const alo = a & 0xFFFF_FFFF_FFFF_FFFF;
554    const bhi = b >> 64;
555    const blo = b & 0xFFFF_FFFF_FFFF_FFFF;
556    return ahi * bhi + (ahi * blo >> 64) + (alo * bhi >> 64);
557}
558
559inline fn mul80_tail(a: u80, b: u80) u80 {
560    const ahi = a >> 40;
561    const alo = a & 0xFF_FFFF_FFFF;
562    const bhi = b >> 40;
563    const blo = b & 0xFF_FFFF_FFFF;
564    return alo * blo +% ((ahi * blo) << 40) +% ((alo * bhi) << 40);
565}
566
567test "__sqrth" {
568    // sqrt(±0) is ±0
569    try std.testing.expectEqual(__sqrth(0x0.0p0), 0x0.0p0);
570    try std.testing.expectEqual(__sqrth(-0x0.0p0), -0x0.0p0);
571    // sqrt(+max) is finite
572    try std.testing.expectEqual(__sqrth(0x1.FFCp15), 0x1.FFCp7);
573    // sqrt(4)=2
574    try std.testing.expectEqual(__sqrth(0x1p2), 0x1p1);
575    // sqrt(x) for x=1, 1±ulp
576    try std.testing.expectEqual(__sqrth(0x1p0), 0x1p0);
577    try std.testing.expectEqual(__sqrth(0x1.004p0), 0x1p0);
578    try std.testing.expectEqual(__sqrth(0x1.FF8p-1), 0x1.FFCp-1);
579    // sqrt(+min) is non-zero
580    try std.testing.expectEqual(__sqrth(0x1p-14), 0x1p-7);
581    // sqrt(min subnormal) is non-zero
582    try std.testing.expectEqual(__sqrth(0x0.004p-14), 0x1p-12);
583    // sqrt(inf) is inf
584    try std.testing.expect(math.isInf(__sqrth(math.inf(f16))));
585    // sqrt(nan) is nan
586    try std.testing.expect(math.isNan(__sqrth(math.nan(f16))));
587    // sqrt(-ve) is nan
588    try std.testing.expect(math.isNan(__sqrth(-0x1p-14)));
589    try std.testing.expect(math.isNan(__sqrth(-0x1p+0)));
590    try std.testing.expect(math.isNan(__sqrth(-math.inf(f16))));
591    // random arguments
592    try std.testing.expectEqual(__sqrth(0x1.1p14), 0x1.08p7);
593    try std.testing.expectEqual(__sqrth(0x1.C9p-12), 0x1.56p-6);
594    try std.testing.expectEqual(__sqrth(0x1.CE8p-7), 0x1.E68p-4);
595    try std.testing.expectEqual(__sqrth(0x1.134p-7), 0x1.778p-4);
596    try std.testing.expectEqual(__sqrth(0x1.E9Cp-10), 0x1.62p-5);
597    try std.testing.expectEqual(__sqrth(0x1.3Dp9), 0x1.92Cp4);
598    try std.testing.expectEqual(__sqrth(0x1.AA4p8), 0x1.4A4p4);
599    try std.testing.expectEqual(__sqrth(0x1.8A8p4), 0x1.3DCp2);
600    try std.testing.expectEqual(__sqrth(0x1.8Fp-7), 0x1.C4p-4);
601    try std.testing.expectEqual(__sqrth(0x1.584p-11), 0x1.A3Cp-6);
602}
603
604test "sqrtf" {
605    // sqrt(±0) is ±0
606    try std.testing.expectEqual(sqrtf(0x0.0p0), 0x0.0p0);
607    try std.testing.expectEqual(sqrtf(-0x0.0p0), -0x0.0p0);
608    // sqrt(+max) is finite
609    try std.testing.expectEqual(sqrtf(0x1.FFFFFEp127), 0x1.FFFFFEp63);
610    // sqrt(4)=2
611    try std.testing.expectEqual(sqrtf(0x1p2), 0x1p1);
612    // sqrt(x) for x=1, 1±ulp
613    try std.testing.expectEqual(sqrtf(0x1p0), 0x1p0);
614    try std.testing.expectEqual(sqrtf(0x1.000002p0), 0x1p0);
615    try std.testing.expectEqual(sqrtf(0x1.FFFFFEp-1), 0x1.FFFFFEp-1);
616    // sqrt(+min) is non-zero
617    try std.testing.expectEqual(sqrtf(0x1p-126), 0x1p-63);
618    // sqrt(min subnormal) is non-zero
619    try std.testing.expectEqual(sqrtf(0x0.000002p-126), 0x1.6a09e6p-75);
620    // sqrt(inf) is inf
621    try std.testing.expect(math.isInf(sqrtf(math.inf(f32))));
622    // sqrt(nan) is nan
623    try std.testing.expect(math.isNan(sqrtf(math.nan(f32))));
624    // sqrt(-ve) is nan
625    try std.testing.expect(math.isNan(sqrtf(-0x1p-149)));
626    try std.testing.expect(math.isNan(sqrtf(-0x1p0)));
627    try std.testing.expect(math.isNan(sqrtf(-math.inf(f32))));
628    // random arguments
629    try std.testing.expectEqual(sqrtf(0x1.4DD57Ep77), 0x1.9D6DA8p38);
630    try std.testing.expectEqual(sqrtf(0x1.871848p102), 0x1.3C6AFAp51);
631    try std.testing.expectEqual(sqrtf(0x1.A1D748p-112), 0x1.470EFCp-56);
632    try std.testing.expectEqual(sqrtf(0x1.E626C2p18), 0x1.60C80Ep9);
633    try std.testing.expectEqual(sqrtf(0x1.E80E66p-29), 0x1.F3E282p-15);
634    try std.testing.expectEqual(sqrtf(0x1.B47204p89), 0x1.D8B732p44);
635    try std.testing.expectEqual(sqrtf(0x1.77F45p15), 0x1.B6BC3Ap7);
636    try std.testing.expectEqual(sqrtf(0x1.AD5F5p-48), 0x1.4B8A72p-24);
637    try std.testing.expectEqual(sqrtf(0x1.91A39p-76), 0x1.40A7A8p-38);
638    try std.testing.expectEqual(sqrtf(0x1.DAE088p79), 0x1.ED16DCp39);
639}
640
641test "sqrt" {
642    // sqrt(±0) is ±0
643    try std.testing.expectEqual(sqrt(0x0.0p0), 0x0.0p0);
644    try std.testing.expectEqual(sqrt(-0x0.0p0), -0x0.0p0);
645    // sqrt(+max) is finite
646    try std.testing.expectEqual(sqrt(math.floatMax(f64)), 0x1.FFFFFFFFFFFFFp511);
647    // sqrt(4)=2
648    try std.testing.expectEqual(sqrt(0x1p2), 0x1p1);
649    // sqrt(x) for x=1, 1±ulp
650    try std.testing.expectEqual(sqrt(0x1p0), 0x1p0);
651    try std.testing.expectEqual(sqrt(0x1p0 + math.floatEps(f64)), 0x1p0);
652    try std.testing.expectEqual(sqrt(0x1p0 - math.floatEps(f64)), 0x1.FFFFFFFFFFFFFp-1);
653    // sqrt(+min) is non-zero
654    try std.testing.expectEqual(sqrt(math.floatMin(f64)), 0x1p-511);
655    // sqrt(min subnormal) is non-zero
656    try std.testing.expectEqual(sqrt(math.floatTrueMin(f64)), 0x1p-537);
657    // sqrt(inf) is inf
658    try std.testing.expect(math.isInf(sqrt(math.inf(f64))));
659    // sqrt(nan) is nan
660    try std.testing.expect(math.isNan(sqrt(math.nan(f64))));
661    // sqrt(-ve) is nan
662    try std.testing.expect(math.isNan(sqrt(-0x1p-1074)));
663    try std.testing.expect(math.isNan(sqrt(-0x1p0)));
664    try std.testing.expect(math.isNan(sqrt(-math.inf(f64))));
665    // random arguments
666    try std.testing.expectEqual(sqrt(0x1.27D3510D4789Bp471), 0x1.852E97E58CFB7p235);
667    try std.testing.expectEqual(sqrt(0x1.8C4FCD5A07846p791), 0x1.C27504E56D938p395);
668    try std.testing.expectEqual(sqrt(0x1.B1B69324F96E7p-137), 0x1.D73BD0414D8BFp-69);
669    try std.testing.expectEqual(sqrt(0x1.1CBD179A811FEp278), 0x1.0DFCB9A114A61p139);
670    try std.testing.expectEqual(sqrt(0x1.1D0C7EFB04A56p917), 0x1.7E0708A25DDCDp458);
671    try std.testing.expectEqual(sqrt(0x1.21B355DA8C94Bp-249), 0x1.8121CBE2608E3p-125);
672    try std.testing.expectEqual(sqrt(0x1.63024D4C5E987p487), 0x1.AA56AEA589DCDp243);
673    try std.testing.expectEqual(sqrt(0x1.45AC3BE941F6Ep339), 0x1.9857F3F453E2Dp169);
674    try std.testing.expectEqual(sqrt(0x1.3B719C733AA24p267), 0x1.91E12E3AC8F71p133);
675    try std.testing.expectEqual(sqrt(0x1.0B150433A2275p357), 0x1.71CAB87F8277Cp178);
676}
677
678test "__sqrtx" {
679    // sqrt(±0) is ±0
680    try std.testing.expectEqual(__sqrtx(0x0.0p0), 0x0.0p0);
681    try std.testing.expectEqual(__sqrtx(-0x0.0p0), -0x0.0p0);
682    // sqrt(+max) is finite
683    try std.testing.expectEqual(__sqrtx(math.floatMax(f80)), 0x1.FFFFFFFFFFFFFFFEp8191);
684    // sqrt(4)=2
685    try std.testing.expectEqual(__sqrtx(0x1p2), 0x1p1);
686    // sqrt(x) for x=1, 1±ulp
687    try std.testing.expectEqual(__sqrtx(0x1p0), 0x1p0);
688    try std.testing.expectEqual(__sqrtx(0x1p0 + math.floatEps(f80)), 0x1p0);
689    try std.testing.expectEqual(__sqrtx(0x1p0 - math.floatEps(f80)), 0x1.FFFFFFFFFFFFFFFEp-1);
690    // sqrt(+min) is non-zero
691    try std.testing.expectEqual(__sqrtx(math.floatMin(f80)), 0x1p-8191);
692    // sqrt(min subnormal) is non-zero
693    try std.testing.expectEqual(__sqrtx(math.floatTrueMin(f80)), 0x1.6A09E667F3BCC908p-8223);
694    // sqrt(inf) is inf
695    try std.testing.expect(math.isInf(__sqrtx(math.inf(f80))));
696    // sqrt(nan) is nan
697    try std.testing.expect(math.isNan(__sqrtx(math.nan(f80))));
698    // sqrt(-ve) is nan
699    try std.testing.expect(math.isNan(__sqrtx(-0x1p-16442)));
700    try std.testing.expect(math.isNan(__sqrtx(-0x1p0)));
701    try std.testing.expect(math.isNan(__sqrtx(-math.inf(f80))));
702    // random arguments
703    try std.testing.expectEqual(__sqrtx(0x1.087F3953486918A4p15482), 0x1.0436BBE03D02F32p7741);
704    try std.testing.expectEqual(__sqrtx(0x1.530CF9E2AE84D8Fp-6330), 0x1.269CFEF51933BE58p-3165);
705    try std.testing.expectEqual(__sqrtx(0x1.3F971515EADD574Ap5713), 0x1.9483232AB780B006p2856);
706    try std.testing.expectEqual(__sqrtx(0x1.4CC0DC7379222954p864), 0x1.23DD4D0A4758C2Cp432);
707    try std.testing.expectEqual(__sqrtx(0x1.920E5649559A839Ep-3181), 0x1.C5B5BC0F98DD83D2p-1591);
708    try std.testing.expectEqual(__sqrtx(0x1.2E59726F87CD1746p-629), 0x1.8973327E95CB350Cp-315);
709    try std.testing.expectEqual(__sqrtx(0x1.D3A16391F57B4D64p-9034), 0x1.59FF08B7DEEF5DB2p-4517);
710    try std.testing.expectEqual(__sqrtx(0x1.E7053D8DAA49BCEEp-11411), 0x1.F35AA3EA5E18E344p-5706);
711    try std.testing.expectEqual(__sqrtx(0x1.797ED0B05DD4A984p7521), 0x1.B7A22E40C6A7867Ap3760);
712    try std.testing.expectEqual(__sqrtx(0x1.FC50806445C7226Ap15371), 0x1.FE2766142653F5BEp7685);
713}
714
715test "sqrtq" {
716    // sqrt(±0) is ±0
717    try std.testing.expectEqual(sqrtq(0x0.0p0), 0x0.0p0);
718    try std.testing.expectEqual(sqrtq(-0x0.0p0), -0x0.0p0);
719    // sqrt(+max) is finite
720    try std.testing.expectEqual(sqrtq(math.floatMax(f128)), 0x1.FFFFFFFFFFFFFFFFFFFFFFFFFFFFp8191);
721    // sqrt(4)=2
722    try std.testing.expectEqual(sqrtq(0x1p2), 0x1p1);
723    // sqrt(x) for x=1, 1±ulp
724    try std.testing.expectEqual(sqrtq(0x1p0), 0x1p0);
725    try std.testing.expectEqual(sqrtq(0x1p0 + math.floatEps(f128)), 0x1p0);
726    try std.testing.expectEqual(sqrtq(0x1p0 - math.floatEps(f128)), 0x1.FFFFFFFFFFFFFFFFFFFFFFFFFFFFp-1);
727    // sqrt(+min) is non-zero
728    try std.testing.expectEqual(sqrtq(math.floatMin(f128)), 0x1p-8191);
729    // sqrt(min subnormal) is non-zero
730    try std.testing.expectEqual(sqrtq(math.floatTrueMin(f128)), 0x1p-8247);
731    // sqrt(inf) is inf
732    try std.testing.expect(math.isInf(sqrtq(math.inf(f128))));
733    // sqrt(nan) is nan
734    try std.testing.expect(math.isNan(sqrtq(math.nan(f128))));
735    // sqrt(-ve) is nan
736    try std.testing.expect(math.isNan(sqrtq(-0x1p-16442)));
737    try std.testing.expect(math.isNan(sqrtq(-0x1p0)));
738    try std.testing.expect(math.isNan(sqrtq(-math.inf(f128))));
739    // random arguments
740    try std.testing.expectEqual(sqrtq(0x1.B6942D29A331751600C9F3AF7E5Fp3363), 0x1.D9DE9AFEF0F2D25586A50CA39D4Dp1681);
741    try std.testing.expectEqual(sqrtq(0x1.5E65C405F84D471A8070ADD7A42Dp11765), 0x1.A78F7F9452B4D9EC2403C81D9D42p5882);
742    try std.testing.expectEqual(sqrtq(0x1.B42334D68F8016D8AE6F5E22B044p-5624), 0x1.4E247A7F2FF2A325E9377BB09C8p-2812);
743    try std.testing.expectEqual(sqrtq(0x1.E61715047F80F2E0B9382B38E06Bp10062), 0x1.60C25D9DFDC0116B78EF5AFDE0E9p5031);
744    try std.testing.expectEqual(sqrtq(0x1.2ED0B53B494CB55A7B04E653D40Ep-1026), 0x1.166CE78D658D2453D700B04C5748p-513);
745    try std.testing.expectEqual(sqrtq(0x1.1BA756B9790E78A4E6F0B083AA89p1835), 0x1.7D1767EA3303DB7A46940033988p917);
746    try std.testing.expectEqual(sqrtq(0x1.5B6C574319C1120335C8E1609704p4512), 0x1.2A3A8A415BB1648C548FBA2A4182p2256);
747    try std.testing.expectEqual(sqrtq(0x1.FF91E8CDEE1552A2B74E77B602Ep14953), 0x1.FFC8F171267D4FE75CBE7AB4D851p7476);
748    try std.testing.expectEqual(sqrtq(0x1.9B1837CFC629A1B6B1BB97099E7Dp2892), 0x1.4468511B909EAF8641BD59105A6Bp1446);
749    try std.testing.expectEqual(sqrtq(0x1.0E2115475E64A92340914E7F7B37p-13951), 0x1.73E536F82F414134012F55BA5368p-6976);
750}