Commit d293f1a0ed
std/math/floor.zig
@@ -12,12 +12,47 @@ const math = std.math;
pub fn floor(x: var) @typeOf(x) {
const T = @typeOf(x);
return switch (T) {
+ f16 => floor16(x),
f32 => floor32(x),
f64 => floor64(x),
else => @compileError("floor not implemented for " ++ @typeName(T)),
};
}
+fn floor16(x: f16) f16 {
+ var u = @bitCast(u16, x);
+ const e = @intCast(i16, (u >> 10) & 31) - 15;
+ var m: u16 = undefined;
+
+ // TODO: Shouldn't need this explicit check.
+ if (x == 0.0) {
+ return x;
+ }
+
+ if (e >= 10) {
+ return x;
+ }
+
+ if (e >= 0) {
+ m = u16(1023) >> @intCast(u4, e);
+ if (u & m == 0) {
+ return x;
+ }
+ math.forceEval(x + 0x1.0p120);
+ if (u >> 15 != 0) {
+ u += m;
+ }
+ return @bitCast(f16, u & ~m);
+ } else {
+ math.forceEval(x + 0x1.0p120);
+ if (u >> 15 == 0) {
+ return 0.0;
+ } else {
+ return -1.0;
+ }
+ }
+}
+
fn floor32(x: f32) f32 {
var u = @bitCast(u32, x);
const e = @intCast(i32, (u >> 23) & 0xFF) - 0x7F;
@@ -84,10 +119,17 @@ fn floor64(x: f64) f64 {
}
test "math.floor" {
+ assert(floor(f16(1.3)) == floor16(1.3));
assert(floor(f32(1.3)) == floor32(1.3));
assert(floor(f64(1.3)) == floor64(1.3));
}
+test "math.floor16" {
+ assert(floor16(1.3) == 1.0);
+ assert(floor16(-1.3) == -2.0);
+ assert(floor16(0.2) == 0.0);
+}
+
test "math.floor32" {
assert(floor32(1.3) == 1.0);
assert(floor32(-1.3) == -2.0);
@@ -100,6 +142,14 @@ test "math.floor64" {
assert(floor64(0.2) == 0.0);
}
+test "math.floor16.special" {
+ assert(floor16(0.0) == 0.0);
+ assert(floor16(-0.0) == -0.0);
+ assert(math.isPositiveInf(floor16(math.inf(f16))));
+ assert(math.isNegativeInf(floor16(-math.inf(f16))));
+ assert(math.isNan(floor16(math.nan(f16))));
+}
+
test "math.floor32.special" {
assert(floor32(0.0) == 0.0);
assert(floor32(-0.0) == -0.0);
std/math/index.zig
@@ -56,6 +56,11 @@ pub fn approxEq(comptime T: type, x: T, y: T, epsilon: T) bool {
pub fn forceEval(value: var) void {
const T = @typeOf(value);
switch (T) {
+ f16 => {
+ var x: f16 = undefined;
+ const p = @ptrCast(*volatile f16, &x);
+ p.* = x;
+ },
f32 => {
var x: f32 = undefined;
const p = @ptrCast(*volatile f32, &x);