zig/lib/std/math/ldexp.zig at master

  1const std = @import("std");
  2const math = std.math;
  3const Log2Int = std.math.Log2Int;
  4const assert = std.debug.assert;
  5const expect = std.testing.expect;
  6
  7/// Returns x * 2^n.
  8pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
  9    const T = @TypeOf(x);
 10    const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
 11
 12    const exponent_bits = math.floatExponentBits(T);
 13    const mantissa_bits = math.floatMantissaBits(T);
 14    const fractional_bits = math.floatFractionalBits(T);
 15
 16    const max_biased_exponent = 2 * math.floatExponentMax(T);
 17    const mantissa_mask = @as(TBits, (1 << mantissa_bits) - 1);
 18
 19    const repr = @as(TBits, @bitCast(x));
 20    const sign_bit = repr & (1 << (exponent_bits + mantissa_bits));
 21
 22    if (math.isNan(x) or !math.isFinite(x))
 23        return x;
 24
 25    var exponent: i32 = @as(i32, @intCast((repr << 1) >> (mantissa_bits + 1)));
 26    if (exponent == 0)
 27        exponent += (@as(i32, exponent_bits) + @intFromBool(T == f80)) - @clz(repr << 1);
 28
 29    if (n >= 0) {
 30        if (n > max_biased_exponent - exponent) {
 31            // Overflow. Return +/- inf
 32            return @as(T, @bitCast(@as(TBits, @bitCast(math.inf(T))) | sign_bit));
 33        } else if (exponent + n <= 0) {
 34            // Result is subnormal
 35            return @as(T, @bitCast((repr << @as(Log2Int(TBits), @intCast(n))) | sign_bit));
 36        } else if (exponent <= 0) {
 37            // Result is normal, but needs shifting
 38            var result = @as(TBits, @intCast(n + exponent)) << mantissa_bits;
 39            result |= (repr << @as(Log2Int(TBits), @intCast(1 - exponent))) & mantissa_mask;
 40            return @as(T, @bitCast(result | sign_bit));
 41        }
 42
 43        // Result needs no shifting
 44        return @as(T, @bitCast(repr + (@as(TBits, @intCast(n)) << mantissa_bits)));
 45    } else {
 46        if (n <= -exponent) {
 47            if (n < -(mantissa_bits + exponent))
 48                return @as(T, @bitCast(sign_bit)); // Severe underflow. Return +/- 0
 49
 50            // Result underflowed, we need to shift and round
 51            const shift = @as(Log2Int(TBits), @intCast(@min(-n, -(exponent + n) + 1)));
 52            const exact_tie: bool = @ctz(repr) == shift - 1;
 53            var result = repr & mantissa_mask;
 54
 55            if (T != f80) // Include integer bit
 56                result |= @as(TBits, @intFromBool(exponent > 0)) << fractional_bits;
 57            result = @as(TBits, @intCast((result >> (shift - 1))));
 58
 59            // Round result, including round-to-even for exact ties
 60            result = ((result + 1) >> 1) & ~@as(TBits, @intFromBool(exact_tie));
 61            return @as(T, @bitCast(result | sign_bit));
 62        }
 63
 64        // Result is exact, and needs no shifting
 65        return @as(T, @bitCast(repr - (@as(TBits, @intCast(-n)) << mantissa_bits)));
 66    }
 67}
 68
 69test ldexp {
 70    // subnormals
 71    try expect(ldexp(@as(f16, 0x1.1FFp14), -14 - 9 - 15) == math.floatTrueMin(f16));
 72    try expect(ldexp(@as(f32, 0x1.3FFFFFp-1), -126 - 22) == math.floatTrueMin(f32));
 73    try expect(ldexp(@as(f64, 0x1.7FFFFFFFFFFFFp-1), -1022 - 51) == math.floatTrueMin(f64));
 74    try expect(ldexp(@as(f80, 0x1.7FFFFFFFFFFFFFFEp-1), -16382 - 62) == math.floatTrueMin(f80));
 75    try expect(ldexp(@as(f128, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128));
 76
 77    try expect(ldexp(math.floatMax(f32), -128 - 149) > 0.0);
 78    try expect(ldexp(math.floatMax(f32), -128 - 149 - 1) == 0.0);
 79
 80    @setEvalBranchQuota(10_000);
 81
 82    inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
 83        const fractional_bits = math.floatFractionalBits(T);
 84
 85        const min_exponent = math.floatExponentMin(T);
 86        const max_exponent = math.floatExponentMax(T);
 87        const exponent_bias = max_exponent;
 88
 89        // basic usage
 90        try expect(ldexp(@as(T, 1.5), 4) == 24.0);
 91
 92        // normals -> subnormals
 93        try expect(math.isNormal(ldexp(@as(T, 1.0), min_exponent)));
 94        try expect(!math.isNormal(ldexp(@as(T, 1.0), min_exponent - 1)));
 95
 96        // normals -> zero
 97        try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits) > 0.0);
 98        try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits - 1) == 0.0);
 99
100        // subnormals -> zero
101        try expect(ldexp(math.floatTrueMin(T), 0) > 0.0);
102        try expect(ldexp(math.floatTrueMin(T), -1) == 0.0);
103
104        // Multiplications might flush the denormals to zero, esp. at
105        // runtime, so we manually construct the constants here instead.
106        const Z = std.meta.Int(.unsigned, @bitSizeOf(T));
107        const EightTimesTrueMin = @as(T, @bitCast(@as(Z, 8)));
108        const TwoTimesTrueMin = @as(T, @bitCast(@as(Z, 2)));
109
110        // subnormals -> subnormals
111        try expect(ldexp(math.floatTrueMin(T), 3) == EightTimesTrueMin);
112        try expect(ldexp(EightTimesTrueMin, -2) == TwoTimesTrueMin);
113        try expect(ldexp(EightTimesTrueMin, -3) == math.floatTrueMin(T));
114
115        // subnormals -> normals (+)
116        try expect(ldexp(math.floatTrueMin(T), fractional_bits) == math.floatMin(T));
117        try expect(ldexp(math.floatTrueMin(T), fractional_bits - 1) == math.floatMin(T) * 0.5);
118
119        // subnormals -> normals (-)
120        try expect(ldexp(-math.floatTrueMin(T), fractional_bits) == -math.floatMin(T));
121        try expect(ldexp(-math.floatTrueMin(T), fractional_bits - 1) == -math.floatMin(T) * 0.5);
122
123        // subnormals -> float limits (+inf)
124        try expect(math.isFinite(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1)));
125        try expect(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == math.inf(T));
126
127        // subnormals -> float limits (-inf)
128        try expect(math.isFinite(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1)));
129        try expect(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == -math.inf(T));
130
131        // infinity -> infinity
132        try expect(ldexp(math.inf(T), math.maxInt(i32)) == math.inf(T));
133        try expect(ldexp(math.inf(T), math.minInt(i32)) == math.inf(T));
134        try expect(ldexp(math.inf(T), max_exponent) == math.inf(T));
135        try expect(ldexp(math.inf(T), min_exponent) == math.inf(T));
136        try expect(ldexp(-math.inf(T), math.maxInt(i32)) == -math.inf(T));
137        try expect(ldexp(-math.inf(T), math.minInt(i32)) == -math.inf(T));
138
139        // extremely large n
140        try expect(ldexp(math.floatMax(T), math.maxInt(i32)) == math.inf(T));
141        try expect(ldexp(math.floatMax(T), -math.maxInt(i32)) == 0.0);
142        try expect(ldexp(math.floatMax(T), math.minInt(i32)) == 0.0);
143        try expect(ldexp(math.floatTrueMin(T), math.maxInt(i32)) == math.inf(T));
144        try expect(ldexp(math.floatTrueMin(T), -math.maxInt(i32)) == 0.0);
145        try expect(ldexp(math.floatTrueMin(T), math.minInt(i32)) == 0.0);
146    }
147}