master
1const std = @import("std");
2const builtin = @import("builtin");
3
4const common = @import("common.zig");
5const normalize = common.normalize;
6const wideMultiply = common.wideMultiply;
7
8pub const panic = common.panic;
9
10comptime {
11 if (common.want_ppc_abi) {
12 @export(&__divtf3, .{ .name = "__divkf3", .linkage = common.linkage, .visibility = common.visibility });
13 } else if (common.want_sparc_abi) {
14 @export(&_Qp_div, .{ .name = "_Qp_div", .linkage = common.linkage, .visibility = common.visibility });
15 }
16 @export(&__divtf3, .{ .name = "__divtf3", .linkage = common.linkage, .visibility = common.visibility });
17}
18
19pub fn __divtf3(a: f128, b: f128) callconv(.c) f128 {
20 return div(a, b);
21}
22
23fn _Qp_div(c: *f128, a: *const f128, b: *const f128) callconv(.c) void {
24 c.* = div(a.*, b.*);
25}
26
27inline fn div(a: f128, b: f128) f128 {
28 const Z = std.meta.Int(.unsigned, 128);
29
30 const significandBits = std.math.floatMantissaBits(f128);
31 const exponentBits = std.math.floatExponentBits(f128);
32
33 const signBit = (@as(Z, 1) << (significandBits + exponentBits));
34 const maxExponent = ((1 << exponentBits) - 1);
35 const exponentBias = (maxExponent >> 1);
36
37 const implicitBit = (@as(Z, 1) << significandBits);
38 const quietBit = implicitBit >> 1;
39 const significandMask = implicitBit - 1;
40
41 const absMask = signBit - 1;
42 const exponentMask = absMask ^ significandMask;
43 const qnanRep = exponentMask | quietBit;
44 const infRep: Z = @bitCast(std.math.inf(f128));
45
46 const aExponent: u32 = @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent);
47 const bExponent: u32 = @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent);
48 const quotientSign: Z = (@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) & signBit;
49
50 var aSignificand: Z = @as(Z, @bitCast(a)) & significandMask;
51 var bSignificand: Z = @as(Z, @bitCast(b)) & significandMask;
52 var scale: i32 = 0;
53
54 // Detect if a or b is zero, denormal, infinity, or NaN.
55 if (aExponent -% 1 >= maxExponent - 1 or bExponent -% 1 >= maxExponent - 1) {
56 const aAbs: Z = @as(Z, @bitCast(a)) & absMask;
57 const bAbs: Z = @as(Z, @bitCast(b)) & absMask;
58
59 // NaN / anything = qNaN
60 if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit);
61 // anything / NaN = qNaN
62 if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit);
63
64 if (aAbs == infRep) {
65 // infinity / infinity = NaN
66 if (bAbs == infRep) {
67 return @bitCast(qnanRep);
68 }
69 // infinity / anything else = +/- infinity
70 else {
71 return @bitCast(aAbs | quotientSign);
72 }
73 }
74
75 // anything else / infinity = +/- 0
76 if (bAbs == infRep) return @bitCast(quotientSign);
77
78 if (aAbs == 0) {
79 // zero / zero = NaN
80 if (bAbs == 0) {
81 return @bitCast(qnanRep);
82 }
83 // zero / anything else = +/- zero
84 else {
85 return @bitCast(quotientSign);
86 }
87 }
88 // anything else / zero = +/- infinity
89 if (bAbs == 0) return @bitCast(infRep | quotientSign);
90
91 // one or both of a or b is denormal, the other (if applicable) is a
92 // normal number. Renormalize one or both of a and b, and set scale to
93 // include the necessary exponent adjustment.
94 if (aAbs < implicitBit) scale +%= normalize(f128, &aSignificand);
95 if (bAbs < implicitBit) scale -%= normalize(f128, &bSignificand);
96 }
97
98 // Set the implicit significand bit. If we fell through from the
99 // denormal path it was already set by normalize( ), but setting it twice
100 // won't hurt anything.
101 aSignificand |= implicitBit;
102 bSignificand |= implicitBit;
103 var quotientExponent: i32 = @as(i32, @bitCast(aExponent -% bExponent)) +% scale;
104
105 // Align the significand of b as a Q63 fixed-point number in the range
106 // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
107 // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
108 // is accurate to about 3.5 binary digits.
109 const q63b: u64 = @truncate(bSignificand >> 49);
110 var recip64 = @as(u64, 0x7504f333F9DE6484) -% q63b;
111 // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
112
113 // Now refine the reciprocal estimate using a Newton-Raphson iteration:
114 //
115 // x1 = x0 * (2 - x0 * b)
116 //
117 // This doubles the number of correct binary digits in the approximation
118 // with each iteration.
119 var correction64: u64 = undefined;
120 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
121 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
122 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
123 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
124 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
125 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
126 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
127 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
128 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
129 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
130
131 // The reciprocal may have overflowed to zero if the upper half of b is
132 // exactly 1.0. This would sabatoge the full-width final stage of the
133 // computation that follows, so we adjust the reciprocal down by one bit.
134 recip64 -%= 1;
135
136 // We need to perform one more iteration to get us to 112 binary digits;
137 // The last iteration needs to happen with extra precision.
138 const q127blo: u64 = @truncate(bSignificand << 15);
139 var correction: u128 = undefined;
140 var reciprocal: u128 = undefined;
141
142 // NOTE: This operation is equivalent to __multi3, which is not implemented
143 // in some architecture
144 var r64q63: u128 = undefined;
145 var r64q127: u128 = undefined;
146 var r64cH: u128 = undefined;
147 var r64cL: u128 = undefined;
148 var dummy: u128 = undefined;
149 wideMultiply(u128, recip64, q63b, &dummy, &r64q63);
150 wideMultiply(u128, recip64, q127blo, &dummy, &r64q127);
151
152 correction = -%(r64q63 + (r64q127 >> 64));
153
154 const cHi: u64 = @truncate(correction >> 64);
155 const cLo: u64 = @truncate(correction);
156
157 wideMultiply(u128, recip64, cHi, &dummy, &r64cH);
158 wideMultiply(u128, recip64, cLo, &dummy, &r64cL);
159
160 reciprocal = r64cH + (r64cL >> 64);
161
162 // Adjust the final 128-bit reciprocal estimate downward to ensure that it
163 // is strictly smaller than the infinitely precise exact reciprocal. Because
164 // the computation of the Newton-Raphson step is truncating at every step,
165 // this adjustment is small; most of the work is already done.
166 reciprocal -%= 2;
167
168 // The numerical reciprocal is accurate to within 2^-112, lies in the
169 // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
170 // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b
171 // in Q127 with the following properties:
172 //
173 // 1. q < a/b
174 // 2. q is in the interval [0.5, 2.0)
175 // 3. The error in q is bounded away from 2^-113 (actually, we have a
176 // couple of bits to spare, but this is all we need).
177
178 // We need a 128 x 128 multiply high to compute q.
179 var quotient: u128 = undefined;
180 var quotientLo: u128 = undefined;
181 wideMultiply(u128, aSignificand << 2, reciprocal, "ient, "ientLo);
182
183 // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
184 // In either case, we are going to compute a residual of the form
185 //
186 // r = a - q*b
187 //
188 // We know from the construction of q that r satisfies:
189 //
190 // 0 <= r < ulp(q)*b
191 //
192 // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
193 // already have the correct result. The exact halfway case cannot occur.
194 // We also take this time to right shift quotient if it falls in the [1,2)
195 // range and adjust the exponent accordingly.
196 var residual: u128 = undefined;
197 var qb: u128 = undefined;
198
199 if (quotient < (implicitBit << 1)) {
200 wideMultiply(u128, quotient, bSignificand, &dummy, &qb);
201 residual = (aSignificand << 113) -% qb;
202 quotientExponent -%= 1;
203 } else {
204 quotient >>= 1;
205 wideMultiply(u128, quotient, bSignificand, &dummy, &qb);
206 residual = (aSignificand << 112) -% qb;
207 }
208
209 const writtenExponent = quotientExponent +% exponentBias;
210
211 if (writtenExponent >= maxExponent) {
212 // If we have overflowed the exponent, return infinity.
213 return @bitCast(infRep | quotientSign);
214 } else if (writtenExponent < 1) {
215 if (writtenExponent == 0) {
216 // Check whether the rounded result is normal.
217 const round = @intFromBool((residual << 1) > bSignificand);
218 // Clear the implicit bit.
219 var absResult = quotient & significandMask;
220 // Round.
221 absResult += round;
222 if ((absResult & ~significandMask) > 0) {
223 // The rounded result is normal; return it.
224 return @bitCast(absResult | quotientSign);
225 }
226 }
227 // Flush denormals to zero. In the future, it would be nice to add
228 // code to round them correctly.
229 return @bitCast(quotientSign);
230 } else {
231 const round = @intFromBool((residual << 1) >= bSignificand);
232 // Clear the implicit bit
233 var absResult = quotient & significandMask;
234 // Insert the exponent
235 absResult |= @as(Z, @intCast(writtenExponent)) << significandBits;
236 // Round
237 absResult +%= round;
238 // Insert the sign and return
239 return @bitCast(absResult | quotientSign);
240 }
241}
242
243test {
244 _ = @import("divtf3_test.zig");
245}