master
1const std = @import("std");
2
3pub inline fn truncf(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
4 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).float.bits);
5 const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits);
6 const srcSigBits = std.math.floatMantissaBits(src_t);
7 const dstSigBits = std.math.floatMantissaBits(dst_t);
8
9 // Various constants whose values follow from the type parameters.
10 // Any reasonable optimizer will fold and propagate all of these.
11 const srcBits = @typeInfo(src_t).float.bits;
12 const srcExpBits = srcBits - srcSigBits - 1;
13 const srcInfExp = (1 << srcExpBits) - 1;
14 const srcExpBias = srcInfExp >> 1;
15
16 const srcMinNormal = 1 << srcSigBits;
17 const srcSignificandMask = srcMinNormal - 1;
18 const srcInfinity = srcInfExp << srcSigBits;
19 const srcSignMask = 1 << (srcSigBits + srcExpBits);
20 const srcAbsMask = srcSignMask - 1;
21 const roundMask = (1 << (srcSigBits - dstSigBits)) - 1;
22 const halfway = 1 << (srcSigBits - dstSigBits - 1);
23 const srcQNaN = 1 << (srcSigBits - 1);
24 const srcNaNCode = srcQNaN - 1;
25
26 const dstBits = @typeInfo(dst_t).float.bits;
27 const dstExpBits = dstBits - dstSigBits - 1;
28 const dstInfExp = (1 << dstExpBits) - 1;
29 const dstExpBias = dstInfExp >> 1;
30
31 const underflowExponent = srcExpBias + 1 - dstExpBias;
32 const overflowExponent = srcExpBias + dstInfExp - dstExpBias;
33 const underflow = underflowExponent << srcSigBits;
34 const overflow = overflowExponent << srcSigBits;
35
36 const dstQNaN = 1 << (dstSigBits - 1);
37 const dstNaNCode = dstQNaN - 1;
38
39 // Break a into a sign and representation of the absolute value
40 const aRep: src_rep_t = @bitCast(a);
41 const aAbs: src_rep_t = aRep & srcAbsMask;
42 const sign: src_rep_t = aRep & srcSignMask;
43 var absResult: dst_rep_t = undefined;
44
45 if (aAbs -% underflow < aAbs -% overflow) {
46 // The exponent of a is within the range of normal numbers in the
47 // destination format. We can convert by simply right-shifting with
48 // rounding and adjusting the exponent.
49 absResult = @truncate(aAbs >> (srcSigBits - dstSigBits));
50 absResult -%= @as(dst_rep_t, srcExpBias - dstExpBias) << dstSigBits;
51
52 const roundBits: src_rep_t = aAbs & roundMask;
53 if (roundBits > halfway) {
54 // Round to nearest
55 absResult += 1;
56 } else if (roundBits == halfway) {
57 // Ties to even
58 absResult += absResult & 1;
59 }
60 } else if (aAbs > srcInfinity) {
61 // a is NaN.
62 // Conjure the result by beginning with infinity, setting the qNaN
63 // bit and inserting the (truncated) trailing NaN field.
64 absResult = @as(dst_rep_t, @intCast(dstInfExp)) << dstSigBits;
65 absResult |= dstQNaN;
66 absResult |= @intCast(((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode);
67 } else if (aAbs >= overflow) {
68 // a overflows to infinity.
69 absResult = @as(dst_rep_t, @intCast(dstInfExp)) << dstSigBits;
70 } else {
71 // a underflows on conversion to the destination type or is an exact
72 // zero. The result may be a denormal or zero. Extract the exponent
73 // to get the shift amount for the denormalization.
74 const aExp: u32 = @intCast(aAbs >> srcSigBits);
75 const shift: u32 = @intCast(srcExpBias - dstExpBias - aExp + 1);
76
77 const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal;
78
79 // Right shift by the denormalization amount with sticky.
80 if (shift > srcSigBits) {
81 absResult = 0;
82 } else {
83 const sticky: src_rep_t = @intFromBool(significand << @intCast(srcBits - shift) != 0);
84 const denormalizedSignificand: src_rep_t = significand >> @intCast(shift) | sticky;
85 absResult = @intCast(denormalizedSignificand >> (srcSigBits - dstSigBits));
86 const roundBits: src_rep_t = denormalizedSignificand & roundMask;
87 if (roundBits > halfway) {
88 // Round to nearest
89 absResult += 1;
90 } else if (roundBits == halfway) {
91 // Ties to even
92 absResult += absResult & 1;
93 }
94 }
95 }
96
97 const result: dst_rep_t align(@alignOf(dst_t)) = absResult |
98 @as(dst_rep_t, @truncate(sign >> @intCast(srcBits - dstBits)));
99 return @bitCast(result);
100}
101
102pub inline fn trunc_f80(comptime dst_t: type, a: f80) dst_t {
103 const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits);
104 const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
105 const dst_sig_bits = std.math.floatMantissaBits(dst_t);
106
107 const src_exp_bias = 16383;
108
109 const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1;
110 const halfway = 1 << (src_sig_bits - dst_sig_bits - 1);
111
112 const dst_bits = @typeInfo(dst_t).float.bits;
113 const dst_exp_bits = dst_bits - dst_sig_bits - 1;
114 const dst_inf_exp = (1 << dst_exp_bits) - 1;
115 const dst_exp_bias = dst_inf_exp >> 1;
116
117 const underflow = src_exp_bias + 1 - dst_exp_bias;
118 const overflow = src_exp_bias + dst_inf_exp - dst_exp_bias;
119
120 const dst_qnan = 1 << (dst_sig_bits - 1);
121 const dst_nan_mask = dst_qnan - 1;
122
123 // Break a into a sign and representation of the absolute value
124 var a_rep = std.math.F80.fromFloat(a);
125 const sign = a_rep.exp & 0x8000;
126 a_rep.exp &= 0x7FFF;
127 a_rep.fraction &= 0x7FFFFFFFFFFFFFFF;
128 var abs_result: dst_rep_t = undefined;
129
130 if (a_rep.exp -% underflow < a_rep.exp -% overflow) {
131 // The exponent of a is within the range of normal numbers in the
132 // destination format. We can convert by simply right-shifting with
133 // rounding and adjusting the exponent.
134 abs_result = @as(dst_rep_t, a_rep.exp) << dst_sig_bits;
135 abs_result |= @truncate(a_rep.fraction >> (src_sig_bits - dst_sig_bits));
136 abs_result -%= @as(dst_rep_t, src_exp_bias - dst_exp_bias) << dst_sig_bits;
137
138 const round_bits = a_rep.fraction & round_mask;
139 if (round_bits > halfway) {
140 // Round to nearest
141 abs_result += 1;
142 } else if (round_bits == halfway) {
143 // Ties to even
144 abs_result += abs_result & 1;
145 }
146 } else if (a_rep.exp == 0x7FFF and a_rep.fraction != 0) {
147 // a is NaN.
148 // Conjure the result by beginning with infinity, setting the qNaN
149 // bit and inserting the (truncated) trailing NaN field.
150 abs_result = @as(dst_rep_t, @intCast(dst_inf_exp)) << dst_sig_bits;
151 abs_result |= dst_qnan;
152 abs_result |= @intCast((a_rep.fraction >> (src_sig_bits - dst_sig_bits)) & dst_nan_mask);
153 } else if (a_rep.exp >= overflow) {
154 // a overflows to infinity.
155 abs_result = @as(dst_rep_t, @intCast(dst_inf_exp)) << dst_sig_bits;
156 } else {
157 // a underflows on conversion to the destination type or is an exact
158 // zero. The result may be a denormal or zero. Extract the exponent
159 // to get the shift amount for the denormalization.
160 const shift = src_exp_bias - dst_exp_bias - a_rep.exp;
161
162 // Right shift by the denormalization amount with sticky.
163 if (shift > src_sig_bits) {
164 abs_result = 0;
165 } else {
166 const sticky = @intFromBool(a_rep.fraction << @intCast(shift) != 0);
167 const denormalized_significand = a_rep.fraction >> @intCast(shift) | sticky;
168 abs_result = @intCast(denormalized_significand >> (src_sig_bits - dst_sig_bits));
169 const round_bits = denormalized_significand & round_mask;
170 if (round_bits > halfway) {
171 // Round to nearest
172 abs_result += 1;
173 } else if (round_bits == halfway) {
174 // Ties to even
175 abs_result += abs_result & 1;
176 }
177 }
178 }
179
180 const result align(@alignOf(dst_t)) = abs_result | @as(dst_rep_t, sign) << dst_bits - 16;
181 return @bitCast(result);
182}
183
184test {
185 _ = @import("truncf_test.zig");
186}