master
1const std = @import("std");
2
3pub inline fn extendf(
4 comptime dst_t: type,
5 comptime src_t: type,
6 a: std.meta.Int(.unsigned, @typeInfo(src_t).float.bits),
7) dst_t {
8 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).float.bits);
9 const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits);
10 const srcSigBits = std.math.floatMantissaBits(src_t);
11 const dstSigBits = std.math.floatMantissaBits(dst_t);
12
13 // Various constants whose values follow from the type parameters.
14 // Any reasonable optimizer will fold and propagate all of these.
15 const srcBits = @bitSizeOf(src_t);
16 const srcExpBits = srcBits - srcSigBits - 1;
17 const srcInfExp = (1 << srcExpBits) - 1;
18 const srcExpBias = srcInfExp >> 1;
19
20 const srcMinNormal = 1 << srcSigBits;
21 const srcInfinity = srcInfExp << srcSigBits;
22 const srcSignMask = 1 << (srcSigBits + srcExpBits);
23 const srcAbsMask = srcSignMask - 1;
24 const srcQNaN = 1 << (srcSigBits - 1);
25 const srcNaNCode = srcQNaN - 1;
26
27 const dstBits = @bitSizeOf(dst_t);
28 const dstExpBits = dstBits - dstSigBits - 1;
29 const dstInfExp = (1 << dstExpBits) - 1;
30 const dstExpBias = dstInfExp >> 1;
31
32 const dstMinNormal: dst_rep_t = @as(dst_rep_t, 1) << dstSigBits;
33
34 // Break a into a sign and representation of the absolute value
35 const aRep: src_rep_t = @bitCast(a);
36 const aAbs: src_rep_t = aRep & srcAbsMask;
37 const sign: src_rep_t = aRep & srcSignMask;
38 var absResult: dst_rep_t = undefined;
39
40 if (aAbs -% srcMinNormal < srcInfinity - srcMinNormal) {
41 // a is a normal number.
42 // Extend to the destination type by shifting the significand and
43 // exponent into the proper position and rebiasing the exponent.
44 absResult = @as(dst_rep_t, aAbs) << (dstSigBits - srcSigBits);
45 absResult += (dstExpBias - srcExpBias) << dstSigBits;
46 } else if (aAbs >= srcInfinity) {
47 // a is NaN or infinity.
48 // Conjure the result by beginning with infinity, then setting the qNaN
49 // bit (if needed) and right-aligning the rest of the trailing NaN
50 // payload field.
51 absResult = dstInfExp << dstSigBits;
52 absResult |= @as(dst_rep_t, aAbs & srcQNaN) << (dstSigBits - srcSigBits);
53 absResult |= @as(dst_rep_t, aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
54 } else if (aAbs != 0) {
55 // a is denormal.
56 // renormalize the significand and clear the leading bit, then insert
57 // the correct adjusted exponent in the destination type.
58 const scale: u32 = @clz(aAbs) - @clz(@as(src_rep_t, srcMinNormal));
59 absResult = @as(dst_rep_t, aAbs) << @intCast(dstSigBits - srcSigBits + scale);
60 absResult ^= dstMinNormal;
61 const resultExponent: u32 = dstExpBias - srcExpBias - scale + 1;
62 absResult |= @as(dst_rep_t, @intCast(resultExponent)) << dstSigBits;
63 } else {
64 // a is zero.
65 absResult = 0;
66 }
67
68 // Apply the signbit to (dst_t)abs(a).
69 const result: dst_rep_t align(@alignOf(dst_t)) = absResult | @as(dst_rep_t, sign) << (dstBits - srcBits);
70 return @bitCast(result);
71}
72
73pub inline fn extend_f80(comptime src_t: type, a: std.meta.Int(.unsigned, @typeInfo(src_t).float.bits)) f80 {
74 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).float.bits);
75 const src_sig_bits = std.math.floatMantissaBits(src_t);
76 const dst_int_bit = 0x8000000000000000;
77 const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
78
79 const dst_exp_bias = 16383;
80
81 const src_bits = @bitSizeOf(src_t);
82 const src_exp_bits = src_bits - src_sig_bits - 1;
83 const src_inf_exp = (1 << src_exp_bits) - 1;
84 const src_exp_bias = src_inf_exp >> 1;
85
86 const src_min_normal = 1 << src_sig_bits;
87 const src_inf = src_inf_exp << src_sig_bits;
88 const src_sign_mask = 1 << (src_sig_bits + src_exp_bits);
89 const src_abs_mask = src_sign_mask - 1;
90 const src_qnan = 1 << (src_sig_bits - 1);
91 const src_nan_code = src_qnan - 1;
92
93 var dst: std.math.F80 = undefined;
94
95 // Break a into a sign and representation of the absolute value
96 const a_abs = a & src_abs_mask;
97 const sign: u16 = if (a & src_sign_mask != 0) 0x8000 else 0;
98
99 if (a_abs -% src_min_normal < src_inf - src_min_normal) {
100 // a is a normal number.
101 // Extend to the destination type by shifting the significand and
102 // exponent into the proper position and rebiasing the exponent.
103 dst.exp = @intCast(a_abs >> src_sig_bits);
104 dst.exp += dst_exp_bias - src_exp_bias;
105 dst.fraction = @as(u64, a_abs) << (dst_sig_bits - src_sig_bits);
106 dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers
107 } else if (a_abs >= src_inf) {
108 // a is NaN or infinity.
109 // Conjure the result by beginning with infinity, then setting the qNaN
110 // bit (if needed) and right-aligning the rest of the trailing NaN
111 // payload field.
112 dst.exp = 0x7fff;
113 dst.fraction = dst_int_bit;
114 dst.fraction |= @as(u64, a_abs & src_qnan) << (dst_sig_bits - src_sig_bits);
115 dst.fraction |= @as(u64, a_abs & src_nan_code) << (dst_sig_bits - src_sig_bits);
116 } else if (a_abs != 0) {
117 // a is denormal.
118 // renormalize the significand and clear the leading bit, then insert
119 // the correct adjusted exponent in the destination type.
120 const scale: u16 = @clz(a_abs) - @clz(@as(src_rep_t, src_min_normal));
121
122 dst.fraction = @as(u64, a_abs) << @intCast(dst_sig_bits - src_sig_bits + scale);
123 dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers
124 dst.exp = @truncate(a_abs >> @intCast(src_sig_bits - scale));
125 dst.exp ^= 1;
126 dst.exp |= dst_exp_bias - src_exp_bias - scale + 1;
127 } else {
128 // a is zero.
129 dst.exp = 0;
130 dst.fraction = 0;
131 }
132
133 dst.exp |= sign;
134 return dst.toFloat();
135}
136
137test {
138 _ = @import("extendf_test.zig");
139}