master
1const std = @import("std");
2const builtin = @import("builtin");
3const arch = builtin.cpu.arch;
4
5const common = @import("common.zig");
6const normalize = common.normalize;
7const wideMultiply = common.wideMultiply;
8
9pub const panic = common.panic;
10
11comptime {
12 @export(&__divxf3, .{ .name = "__divxf3", .linkage = common.linkage, .visibility = common.visibility });
13}
14
15pub fn __divxf3(a: f80, b: f80) callconv(.c) f80 {
16 const T = f80;
17 const Z = std.meta.Int(.unsigned, @bitSizeOf(T));
18
19 const significandBits = std.math.floatMantissaBits(T);
20 const fractionalBits = std.math.floatFractionalBits(T);
21 const exponentBits = std.math.floatExponentBits(T);
22
23 const signBit = (@as(Z, 1) << (significandBits + exponentBits));
24 const maxExponent = ((1 << exponentBits) - 1);
25 const exponentBias = (maxExponent >> 1);
26
27 const integerBit = (@as(Z, 1) << fractionalBits);
28 const quietBit = integerBit >> 1;
29 const significandMask = (@as(Z, 1) << significandBits) - 1;
30
31 const absMask = signBit - 1;
32 const qnanRep = @as(Z, @bitCast(std.math.nan(T))) | quietBit;
33 const infRep: Z = @bitCast(std.math.inf(T));
34
35 const aExponent: u32 = @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent);
36 const bExponent: u32 = @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent);
37 const quotientSign: Z = (@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) & signBit;
38
39 var aSignificand: Z = @as(Z, @bitCast(a)) & significandMask;
40 var bSignificand: Z = @as(Z, @bitCast(b)) & significandMask;
41 var scale: i32 = 0;
42
43 // Detect if a or b is zero, denormal, infinity, or NaN.
44 if (aExponent -% 1 >= maxExponent - 1 or bExponent -% 1 >= maxExponent - 1) {
45 const aAbs: Z = @as(Z, @bitCast(a)) & absMask;
46 const bAbs: Z = @as(Z, @bitCast(b)) & absMask;
47
48 // NaN / anything = qNaN
49 if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit);
50 // anything / NaN = qNaN
51 if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit);
52
53 if (aAbs == infRep) {
54 // infinity / infinity = NaN
55 if (bAbs == infRep) {
56 return @bitCast(qnanRep);
57 }
58 // infinity / anything else = +/- infinity
59 else {
60 return @bitCast(aAbs | quotientSign);
61 }
62 }
63
64 // anything else / infinity = +/- 0
65 if (bAbs == infRep) return @bitCast(quotientSign);
66
67 if (aAbs == 0) {
68 // zero / zero = NaN
69 if (bAbs == 0) {
70 return @bitCast(qnanRep);
71 }
72 // zero / anything else = +/- zero
73 else {
74 return @bitCast(quotientSign);
75 }
76 }
77 // anything else / zero = +/- infinity
78 if (bAbs == 0) return @bitCast(infRep | quotientSign);
79
80 // one or both of a or b is denormal, the other (if applicable) is a
81 // normal number. Renormalize one or both of a and b, and set scale to
82 // include the necessary exponent adjustment.
83 if (aAbs < integerBit) scale +%= normalize(T, &aSignificand);
84 if (bAbs < integerBit) scale -%= normalize(T, &bSignificand);
85 }
86 var quotientExponent: i32 = @as(i32, @bitCast(aExponent -% bExponent)) +% scale;
87
88 // Align the significand of b as a Q63 fixed-point number in the range
89 // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
90 // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
91 // is accurate to about 3.5 binary digits.
92 const q63b: u64 = @intCast(bSignificand);
93 var recip64 = @as(u64, 0x7504f333F9DE6484) -% q63b;
94 // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
95
96 // Now refine the reciprocal estimate using a Newton-Raphson iteration:
97 //
98 // x1 = x0 * (2 - x0 * b)
99 //
100 // This doubles the number of correct binary digits in the approximation
101 // with each iteration.
102 var correction64: u64 = undefined;
103 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
104 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
105 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
106 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
107 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
108 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
109 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
110 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
111 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
112 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
113
114 // The reciprocal may have overflowed to zero if the upper half of b is
115 // exactly 1.0. This would sabatoge the full-width final stage of the
116 // computation that follows, so we adjust the reciprocal down by one bit.
117 recip64 -%= 1;
118
119 // We need to perform one more iteration to get us to 112 binary digits;
120 // The last iteration needs to happen with extra precision.
121
122 // NOTE: This operation is equivalent to __multi3, which is not implemented
123 // in some architechures
124 var reciprocal: u128 = undefined;
125 var correction: u128 = undefined;
126 var dummy: u128 = undefined;
127 wideMultiply(u128, recip64, q63b, &dummy, &correction);
128
129 correction = -%correction;
130
131 const cHi: u64 = @truncate(correction >> 64);
132 const cLo: u64 = @truncate(correction);
133
134 var r64cH: u128 = undefined;
135 var r64cL: u128 = undefined;
136 wideMultiply(u128, recip64, cHi, &dummy, &r64cH);
137 wideMultiply(u128, recip64, cLo, &dummy, &r64cL);
138
139 reciprocal = r64cH + (r64cL >> 64);
140
141 // Adjust the final 128-bit reciprocal estimate downward to ensure that it
142 // is strictly smaller than the infinitely precise exact reciprocal. Because
143 // the computation of the Newton-Raphson step is truncating at every step,
144 // this adjustment is small; most of the work is already done.
145 reciprocal -%= 2;
146
147 // The numerical reciprocal is accurate to within 2^-112, lies in the
148 // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
149 // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b
150 // in Q127 with the following properties:
151 //
152 // 1. q < a/b
153 // 2. q is in the interval [0.5, 2.0)
154 // 3. The error in q is bounded away from 2^-63 (actually, we have
155 // many bits to spare, but this is all we need).
156
157 // We need a 128 x 128 multiply high to compute q.
158 var quotient128: u128 = undefined;
159 var quotientLo: u128 = undefined;
160 wideMultiply(u128, aSignificand << 2, reciprocal, "ient128, "ientLo);
161
162 // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
163 // Right shift the quotient if it falls in the [1,2) range and adjust the
164 // exponent accordingly.
165 const quotient: u64 = if (quotient128 < (integerBit << 1)) b: {
166 quotientExponent -= 1;
167 break :b @intCast(quotient128);
168 } else @intCast(quotient128 >> 1);
169
170 // We are going to compute a residual of the form
171 //
172 // r = a - q*b
173 //
174 // We know from the construction of q that r satisfies:
175 //
176 // 0 <= r < ulp(q)*b
177 //
178 // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
179 // already have the correct result. The exact halfway case cannot occur.
180 const residual: u64 = -%(quotient *% q63b);
181
182 const writtenExponent = quotientExponent + exponentBias;
183 if (writtenExponent >= maxExponent) {
184 // If we have overflowed the exponent, return infinity.
185 return @bitCast(infRep | quotientSign);
186 } else if (writtenExponent < 1) {
187 if (writtenExponent == 0) {
188 // Check whether the rounded result is normal.
189 if (residual > (bSignificand >> 1)) { // round
190 if (quotient == (integerBit - 1)) // If the rounded result is normal, return it
191 return @bitCast(@as(Z, @bitCast(std.math.floatMin(T))) | quotientSign);
192 }
193 }
194 // Flush denormals to zero. In the future, it would be nice to add
195 // code to round them correctly.
196 return @bitCast(quotientSign);
197 } else {
198 const round = @intFromBool(residual > (bSignificand >> 1));
199 // Insert the exponent
200 var absResult = quotient | (@as(Z, @intCast(writtenExponent)) << significandBits);
201 // Round
202 absResult +%= round;
203 // Insert the sign and return
204 return @bitCast(absResult | quotientSign | integerBit);
205 }
206}
207
208test {
209 _ = @import("divxf3_test.zig");
210}