master
1const std = @import("std");
2const math = std.math;
3const Log2Int = std.math.Log2Int;
4const assert = std.debug.assert;
5const expect = std.testing.expect;
6
7/// Returns x * 2^n.
8pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
9 const T = @TypeOf(x);
10 const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
11
12 const exponent_bits = math.floatExponentBits(T);
13 const mantissa_bits = math.floatMantissaBits(T);
14 const fractional_bits = math.floatFractionalBits(T);
15
16 const max_biased_exponent = 2 * math.floatExponentMax(T);
17 const mantissa_mask = @as(TBits, (1 << mantissa_bits) - 1);
18
19 const repr = @as(TBits, @bitCast(x));
20 const sign_bit = repr & (1 << (exponent_bits + mantissa_bits));
21
22 if (math.isNan(x) or !math.isFinite(x))
23 return x;
24
25 var exponent: i32 = @as(i32, @intCast((repr << 1) >> (mantissa_bits + 1)));
26 if (exponent == 0)
27 exponent += (@as(i32, exponent_bits) + @intFromBool(T == f80)) - @clz(repr << 1);
28
29 if (n >= 0) {
30 if (n > max_biased_exponent - exponent) {
31 // Overflow. Return +/- inf
32 return @as(T, @bitCast(@as(TBits, @bitCast(math.inf(T))) | sign_bit));
33 } else if (exponent + n <= 0) {
34 // Result is subnormal
35 return @as(T, @bitCast((repr << @as(Log2Int(TBits), @intCast(n))) | sign_bit));
36 } else if (exponent <= 0) {
37 // Result is normal, but needs shifting
38 var result = @as(TBits, @intCast(n + exponent)) << mantissa_bits;
39 result |= (repr << @as(Log2Int(TBits), @intCast(1 - exponent))) & mantissa_mask;
40 return @as(T, @bitCast(result | sign_bit));
41 }
42
43 // Result needs no shifting
44 return @as(T, @bitCast(repr + (@as(TBits, @intCast(n)) << mantissa_bits)));
45 } else {
46 if (n <= -exponent) {
47 if (n < -(mantissa_bits + exponent))
48 return @as(T, @bitCast(sign_bit)); // Severe underflow. Return +/- 0
49
50 // Result underflowed, we need to shift and round
51 const shift = @as(Log2Int(TBits), @intCast(@min(-n, -(exponent + n) + 1)));
52 const exact_tie: bool = @ctz(repr) == shift - 1;
53 var result = repr & mantissa_mask;
54
55 if (T != f80) // Include integer bit
56 result |= @as(TBits, @intFromBool(exponent > 0)) << fractional_bits;
57 result = @as(TBits, @intCast((result >> (shift - 1))));
58
59 // Round result, including round-to-even for exact ties
60 result = ((result + 1) >> 1) & ~@as(TBits, @intFromBool(exact_tie));
61 return @as(T, @bitCast(result | sign_bit));
62 }
63
64 // Result is exact, and needs no shifting
65 return @as(T, @bitCast(repr - (@as(TBits, @intCast(-n)) << mantissa_bits)));
66 }
67}
68
69test ldexp {
70 // subnormals
71 try expect(ldexp(@as(f16, 0x1.1FFp14), -14 - 9 - 15) == math.floatTrueMin(f16));
72 try expect(ldexp(@as(f32, 0x1.3FFFFFp-1), -126 - 22) == math.floatTrueMin(f32));
73 try expect(ldexp(@as(f64, 0x1.7FFFFFFFFFFFFp-1), -1022 - 51) == math.floatTrueMin(f64));
74 try expect(ldexp(@as(f80, 0x1.7FFFFFFFFFFFFFFEp-1), -16382 - 62) == math.floatTrueMin(f80));
75 try expect(ldexp(@as(f128, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128));
76
77 try expect(ldexp(math.floatMax(f32), -128 - 149) > 0.0);
78 try expect(ldexp(math.floatMax(f32), -128 - 149 - 1) == 0.0);
79
80 @setEvalBranchQuota(10_000);
81
82 inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
83 const fractional_bits = math.floatFractionalBits(T);
84
85 const min_exponent = math.floatExponentMin(T);
86 const max_exponent = math.floatExponentMax(T);
87 const exponent_bias = max_exponent;
88
89 // basic usage
90 try expect(ldexp(@as(T, 1.5), 4) == 24.0);
91
92 // normals -> subnormals
93 try expect(math.isNormal(ldexp(@as(T, 1.0), min_exponent)));
94 try expect(!math.isNormal(ldexp(@as(T, 1.0), min_exponent - 1)));
95
96 // normals -> zero
97 try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits) > 0.0);
98 try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits - 1) == 0.0);
99
100 // subnormals -> zero
101 try expect(ldexp(math.floatTrueMin(T), 0) > 0.0);
102 try expect(ldexp(math.floatTrueMin(T), -1) == 0.0);
103
104 // Multiplications might flush the denormals to zero, esp. at
105 // runtime, so we manually construct the constants here instead.
106 const Z = std.meta.Int(.unsigned, @bitSizeOf(T));
107 const EightTimesTrueMin = @as(T, @bitCast(@as(Z, 8)));
108 const TwoTimesTrueMin = @as(T, @bitCast(@as(Z, 2)));
109
110 // subnormals -> subnormals
111 try expect(ldexp(math.floatTrueMin(T), 3) == EightTimesTrueMin);
112 try expect(ldexp(EightTimesTrueMin, -2) == TwoTimesTrueMin);
113 try expect(ldexp(EightTimesTrueMin, -3) == math.floatTrueMin(T));
114
115 // subnormals -> normals (+)
116 try expect(ldexp(math.floatTrueMin(T), fractional_bits) == math.floatMin(T));
117 try expect(ldexp(math.floatTrueMin(T), fractional_bits - 1) == math.floatMin(T) * 0.5);
118
119 // subnormals -> normals (-)
120 try expect(ldexp(-math.floatTrueMin(T), fractional_bits) == -math.floatMin(T));
121 try expect(ldexp(-math.floatTrueMin(T), fractional_bits - 1) == -math.floatMin(T) * 0.5);
122
123 // subnormals -> float limits (+inf)
124 try expect(math.isFinite(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1)));
125 try expect(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == math.inf(T));
126
127 // subnormals -> float limits (-inf)
128 try expect(math.isFinite(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1)));
129 try expect(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == -math.inf(T));
130
131 // infinity -> infinity
132 try expect(ldexp(math.inf(T), math.maxInt(i32)) == math.inf(T));
133 try expect(ldexp(math.inf(T), math.minInt(i32)) == math.inf(T));
134 try expect(ldexp(math.inf(T), max_exponent) == math.inf(T));
135 try expect(ldexp(math.inf(T), min_exponent) == math.inf(T));
136 try expect(ldexp(-math.inf(T), math.maxInt(i32)) == -math.inf(T));
137 try expect(ldexp(-math.inf(T), math.minInt(i32)) == -math.inf(T));
138
139 // extremely large n
140 try expect(ldexp(math.floatMax(T), math.maxInt(i32)) == math.inf(T));
141 try expect(ldexp(math.floatMax(T), -math.maxInt(i32)) == 0.0);
142 try expect(ldexp(math.floatMax(T), math.minInt(i32)) == 0.0);
143 try expect(ldexp(math.floatTrueMin(T), math.maxInt(i32)) == math.inf(T));
144 try expect(ldexp(math.floatTrueMin(T), -math.maxInt(i32)) == 0.0);
145 try expect(ldexp(math.floatTrueMin(T), math.minInt(i32)) == 0.0);
146 }
147}