Commit 71b8760d3b
Changed files (19)
lib
std
src
arch
test
behavior
lib/std/math/fma.zig
@@ -19,6 +19,8 @@ pub fn fma(comptime T: type, x: T, y: T, z: T) T {
// TODO this is not correct for some targets
c_longdouble => @floatCast(c_longdouble, fma128(x, y, z)),
+ f80 => @floatCast(f80, fma128(x, y, z)),
+
else => @compileError("fma not implemented for " ++ @typeName(T)),
};
}
lib/std/special/c.zig
@@ -12,7 +12,7 @@ const maxInt = std.math.maxInt;
const native_os = builtin.os.tag;
const native_arch = builtin.cpu.arch;
const native_abi = builtin.abi;
-const long_double_is_f128 = builtin.target.longDoubleIsF128();
+const long_double_is_f128 = builtin.target.longDoubleIs(f128);
const is_wasm = switch (native_arch) {
.wasm32, .wasm64 => true,
@@ -90,10 +90,6 @@ comptime {
@export(fmod, .{ .name = "fmod", .linkage = .Strong });
@export(fmodf, .{ .name = "fmodf", .linkage = .Strong });
- @export(fma, .{ .name = "fma", .linkage = .Strong });
- @export(fmaf, .{ .name = "fmaf", .linkage = .Strong });
- @export(fmal, .{ .name = "fmal", .linkage = .Strong });
-
@export(sincos, .{ .name = "sincos", .linkage = .Strong });
@export(sincosf, .{ .name = "sincosf", .linkage = .Strong });
@@ -561,20 +557,6 @@ test "fmod, fmodf" {
}
}
-fn fmaf(a: f32, b: f32, c: f32) callconv(.C) f32 {
- return math.fma(f32, a, b, c);
-}
-
-fn fma(a: f64, b: f64, c: f64) callconv(.C) f64 {
- return math.fma(f64, a, b, c);
-}
-fn fmal(a: c_longdouble, b: c_longdouble, c: c_longdouble) callconv(.C) c_longdouble {
- if (!long_double_is_f128) {
- @panic("TODO implement this");
- }
- return math.fma(c_longdouble, a, b, c);
-}
-
fn sincos(a: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void {
r_sin.* = math.sin(a);
r_cos.* = math.cos(a);
lib/std/special/compiler_rt.zig
@@ -19,7 +19,8 @@ const strong_linkage = if (is_test)
else
std.builtin.GlobalLinkage.Strong;
-const long_double_is_f128 = builtin.target.longDoubleIsF128();
+const long_double_is_f80 = builtin.target.longDoubleIs(f80);
+const long_double_is_f128 = builtin.target.longDoubleIs(f128);
comptime {
// These files do their own comptime exporting logic.
@@ -758,14 +759,35 @@ comptime {
@export(floorf, .{ .name = "floorf", .linkage = linkage });
@export(floor, .{ .name = "floor", .linkage = linkage });
@export(floorl, .{ .name = "floorl", .linkage = linkage });
- @export(fmaq, .{ .name = "fmaq", .linkage = linkage });
+
+ @export(fma, .{ .name = "fma", .linkage = linkage });
+ @export(fmaf, .{ .name = "fmaf", .linkage = linkage });
+ @export(fmal, .{ .name = "fmal", .linkage = linkage });
+ if (!long_double_is_f80) {
+ @export(__fmax, .{ .name = "__fmax", .linkage = linkage });
+ }
+ if (!long_double_is_f128) {
+ @export(fmaq, .{ .name = "fmaq", .linkage = linkage });
+ }
}
const math = std.math;
+fn fmaf(a: f32, b: f32, c: f32) callconv(.C) f32 {
+ return math.fma(f32, a, b, c);
+}
+fn fma(a: f64, b: f64, c: f64) callconv(.C) f64 {
+ return math.fma(f64, a, b, c);
+}
+fn __fmax(a: f80, b: f80, c: f80) callconv(.C) f80 {
+ return math.fma(f80, a, b, c);
+}
fn fmaq(a: f128, b: f128, c: f128) callconv(.C) f128 {
return math.fma(f128, a, b, c);
}
+fn fmal(a: c_longdouble, b: c_longdouble, c: c_longdouble) callconv(.C) c_longdouble {
+ return math.fma(c_longdouble, a, b, c);
+}
// TODO add intrinsics for these (and probably the double version too)
// and have the math stuff use the intrinsic. same as @mod and @rem
lib/std/target.zig
@@ -1714,9 +1714,55 @@ pub const Target = struct {
};
}
- pub inline fn longDoubleIsF128(target: Target) bool {
- return switch (target.cpu.arch) {
- .riscv64, .aarch64, .aarch64_be, .aarch64_32, .s390x, .mips64, .mips64el => true,
+ pub inline fn longDoubleIs(target: Target, comptime F: type) bool {
+ if (target.abi == .msvc) {
+ return F == f64;
+ }
+ return switch (F) {
+ f128 => switch (target.cpu.arch) {
+ .riscv64,
+ .aarch64,
+ .aarch64_be,
+ .aarch64_32,
+ .s390x,
+ .mips64,
+ .mips64el,
+ .sparc,
+ .sparcv9,
+ .sparcel,
+ .powerpc,
+ .powerpcle,
+ .powerpc64,
+ .powerpc64le,
+ => true,
+
+ else => false,
+ },
+ f80 => switch (target.cpu.arch) {
+ .x86_64, .i386 => true,
+ else => false,
+ },
+ f64 => switch (target.cpu.arch) {
+ .x86_64,
+ .i386,
+ .riscv64,
+ .aarch64,
+ .aarch64_be,
+ .aarch64_32,
+ .s390x,
+ .mips64,
+ .mips64el,
+ .sparc,
+ .sparcv9,
+ .sparcel,
+ .powerpc,
+ .powerpcle,
+ .powerpc64,
+ .powerpc64le,
+ => false,
+
+ else => true,
+ },
else => false,
};
}
src/arch/aarch64/CodeGen.zig
@@ -3654,8 +3654,12 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
}
fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
- _ = inst;
- return self.fail("TODO implement airMulAdd for aarch64", .{});
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+ const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+ return self.fail("TODO implement airMulAdd for aarch64", .{});
+ };
+ return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
}
fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
src/arch/arm/CodeGen.zig
@@ -4088,8 +4088,12 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
}
fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
- _ = inst;
- return self.fail("TODO implement airMulAdd for arm", .{});
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+ const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+ return self.fail("TODO implement airMulAdd for arm", .{});
+ };
+ return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
}
fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
src/arch/riscv64/CodeGen.zig
@@ -2205,8 +2205,12 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
}
fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
- _ = inst;
- return self.fail("TODO implement airMulAdd for riscv64", .{});
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+ const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+ return self.fail("TODO implement airMulAdd for riscv64", .{});
+ };
+ return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
}
fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
src/arch/x86_64/CodeGen.zig
@@ -5561,8 +5561,12 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
}
fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
- _ = inst;
- return self.fail("TODO implement airMulAdd for x86_64", .{});
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+ const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+ return self.fail("TODO implement airMulAdd for x86_64", .{});
+ };
+ return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
}
fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
src/codegen/c.zig
@@ -16,6 +16,7 @@ const trace = @import("../tracy.zig").trace;
const LazySrcLoc = Module.LazySrcLoc;
const Air = @import("../Air.zig");
const Liveness = @import("../Liveness.zig");
+const CType = @import("../type.zig").CType;
const Mutability = enum { Const, Mut };
const BigIntConst = std.math.big.int.Const;
@@ -1635,7 +1636,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
.trunc_float,
=> |tag| return f.fail("TODO: C backend: implement unary op for tag '{s}'", .{@tagName(tag)}),
- .mul_add => return f.fail("TODO: C backend: implement @mulAdd", .{}),
+ .mul_add => try airMulAdd(f, inst),
.add_with_overflow => try airAddWithOverflow(f, inst),
.sub_with_overflow => try airSubWithOverflow(f, inst),
@@ -3623,6 +3624,35 @@ fn airWasmMemoryGrow(f: *Function, inst: Air.Inst.Index) !CValue {
return local;
}
+fn airMulAdd(f: *Function, inst: Air.Inst.Index) !CValue {
+ if (f.liveness.isUnused(inst)) return CValue.none;
+ const pl_op = f.air.instructions.items(.data)[inst].pl_op;
+ const extra = f.air.extraData(Air.Bin, pl_op.payload).data;
+ const inst_ty = f.air.typeOfIndex(inst);
+ const mulend1 = try f.resolveInst(extra.lhs);
+ const mulend2 = try f.resolveInst(extra.rhs);
+ const addend = try f.resolveInst(pl_op.operand);
+ const writer = f.object.writer();
+ const target = f.object.dg.module.getTarget();
+ const fn_name = switch (inst_ty.floatBits(target)) {
+ 16, 32 => "fmaf",
+ 64 => "fma",
+ 80 => if (CType.longdouble.sizeInBits(target) == 80) "fmal" else "__fmax",
+ 128 => if (CType.longdouble.sizeInBits(target) == 128) "fmal" else "fmaq",
+ else => unreachable,
+ };
+ const local = try f.allocLocal(inst_ty, .Const);
+ try writer.writeAll(" = ");
+ try writer.print("{s}(", .{fn_name});
+ try f.writeCValue(writer, mulend1);
+ try writer.writeAll(", ");
+ try f.writeCValue(writer, mulend2);
+ try writer.writeAll(", ");
+ try f.writeCValue(writer, addend);
+ try writer.writeAll(");\n");
+ return local;
+}
+
fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 {
return switch (order) {
.Unordered => "memory_order_relaxed",
src/codegen/llvm.zig
@@ -18,6 +18,7 @@ const target_util = @import("../target.zig");
const Value = @import("../value.zig").Value;
const Type = @import("../type.zig").Type;
const LazySrcLoc = Module.LazySrcLoc;
+const CType = @import("../type.zig").CType;
const Error = error{ OutOfMemory, CodegenFail };
@@ -2189,12 +2190,12 @@ pub const FuncGen = struct {
.min => try self.airMin(inst),
.max => try self.airMax(inst),
.slice => try self.airSlice(inst),
+ .mul_add => try self.airMulAdd(inst),
.add_with_overflow => try self.airOverflow(inst, "llvm.sadd.with.overflow", "llvm.uadd.with.overflow"),
.sub_with_overflow => try self.airOverflow(inst, "llvm.ssub.with.overflow", "llvm.usub.with.overflow"),
.mul_with_overflow => try self.airOverflow(inst, "llvm.smul.with.overflow", "llvm.umul.with.overflow"),
.shl_with_overflow => try self.airShlWithOverflow(inst),
- .mul_add => try self.airMulAdd(inst),
.bit_and, .bool_and => try self.airAnd(inst),
.bit_or, .bool_or => try self.airOr(inst),
@@ -3844,43 +3845,43 @@ pub const FuncGen = struct {
}
fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
- if (self.liveness.isUnused(inst))
- return null;
+ if (self.liveness.isUnused(inst)) return null;
- const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
- const extra = self.air.extraData(Air.MulAdd, ty_pl.payload).data;
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
- const mulend1 = try self.resolveInst(extra.mulend1);
- const mulend2 = try self.resolveInst(extra.mulend2);
- const addend = try self.resolveInst(extra.addend);
+ const mulend1 = try self.resolveInst(extra.lhs);
+ const mulend2 = try self.resolveInst(extra.rhs);
+ const addend = try self.resolveInst(pl_op.operand);
const ty = self.air.typeOfIndex(inst);
const llvm_ty = try self.dg.llvmType(ty);
const target = self.dg.module.getTarget();
- const fn_val = switch (ty.floatBits(target)) {
- 16, 32, 64 => blk: {
- break :blk self.getIntrinsic("llvm.fma", &.{llvm_ty});
- },
- // TODO: using `llvm.fma` for f80 does not seem to work for all targets, needs further investigation.
- 80 => return self.dg.todo("Implement mulAdd for f80", .{}),
- 128 => blk: {
- // LLVM incorrectly lowers the fma builtin for f128 to fmal, which is for
- // `long double`. On some targets this will be correct; on others it will be incorrect.
- if (target.longDoubleIsF128()) {
- break :blk self.getIntrinsic("llvm.fma", &.{llvm_ty});
- } else {
- break :blk self.dg.object.llvm_module.getNamedFunction("fmaq") orelse fn_blk: {
- const param_types = [_]*const llvm.Type{ llvm_ty, llvm_ty, llvm_ty };
- const fn_type = llvm.functionType(llvm_ty, ¶m_types, param_types.len, .False);
- break :fn_blk self.dg.object.llvm_module.addFunction("fmaq", fn_type);
- };
- }
- },
+ const Strat = union(enum) {
+ intrinsic,
+ libc: [*:0]const u8,
+ };
+ const strat: Strat = switch (ty.floatBits(target)) {
+ 16, 32, 64 => Strat.intrinsic,
+ 80 => if (CType.longdouble.sizeInBits(target) == 80) Strat{ .intrinsic = {} } else Strat{ .libc = "__fmax" },
+ // LLVM always lowers the fma builtin for f128 to fmal, which is for `long double`.
+ // On some targets this will be correct; on others it will be incorrect.
+ 128 => if (CType.longdouble.sizeInBits(target) == 128) Strat{ .intrinsic = {} } else Strat{ .libc = "fmaq" },
else => unreachable,
};
+
+ const llvm_fn = switch (strat) {
+ .intrinsic => self.getIntrinsic("llvm.fma", &.{llvm_ty}),
+ .libc => |fn_name| self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: {
+ const param_types = [_]*const llvm.Type{ llvm_ty, llvm_ty, llvm_ty };
+ const fn_type = llvm.functionType(llvm_ty, ¶m_types, param_types.len, .False);
+ break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type);
+ },
+ };
+
const params = [_]*const llvm.Value{ mulend1, mulend2, addend };
- return self.builder.buildCall(fn_val, ¶ms, params.len, .C, .Auto, "");
+ return self.builder.buildCall(llvm_fn, ¶ms, params.len, .C, .Auto, "");
}
fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -4061,8 +4062,15 @@ pub const FuncGen = struct {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const operand = try self.resolveInst(ty_op.operand);
- const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst));
-
+ const operand_ty = self.air.typeOf(ty_op.operand);
+ const dest_ty = self.air.typeOfIndex(inst);
+ const target = self.dg.module.getTarget();
+ const dest_bits = dest_ty.floatBits(target);
+ const src_bits = operand_ty.floatBits(target);
+ if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
+ return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+ }
+ const dest_llvm_ty = try self.dg.llvmType(dest_ty);
return self.builder.buildFPTrunc(operand, dest_llvm_ty, "");
}
@@ -4072,8 +4080,15 @@ pub const FuncGen = struct {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const operand = try self.resolveInst(ty_op.operand);
+ const operand_ty = self.air.typeOf(ty_op.operand);
+ const dest_ty = self.air.typeOfIndex(inst);
+ const target = self.dg.module.getTarget();
+ const dest_bits = dest_ty.floatBits(target);
+ const src_bits = operand_ty.floatBits(target);
+ if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
+ return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+ }
const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst));
-
return self.builder.buildFPExt(operand, dest_llvm_ty, "");
}
@@ -5105,6 +5120,87 @@ pub const FuncGen = struct {
return null;
}
+ fn softF80TruncOrExt(
+ self: *FuncGen,
+ operand: *const llvm.Value,
+ src_bits: u16,
+ dest_bits: u16,
+ ) !?*const llvm.Value {
+ const target = self.dg.module.getTarget();
+
+ var param_llvm_ty: *const llvm.Type = self.context.intType(80);
+ var ret_llvm_ty: *const llvm.Type = param_llvm_ty;
+ var fn_name: [*:0]const u8 = undefined;
+ var arg = operand;
+ var final_cast: ?*const llvm.Type = null;
+
+ assert(src_bits == 80 or dest_bits == 80);
+
+ if (src_bits == 80) switch (dest_bits) {
+ 16 => {
+ // See corresponding condition at definition of
+ // __truncxfhf2 in compiler-rt.
+ if (target.cpu.arch.isAARCH64()) {
+ ret_llvm_ty = self.context.halfType();
+ } else {
+ ret_llvm_ty = self.context.intType(16);
+ final_cast = self.context.halfType();
+ }
+ fn_name = "__truncxfhf2";
+ },
+ 32 => {
+ ret_llvm_ty = self.context.floatType();
+ fn_name = "__truncxfsf2";
+ },
+ 64 => {
+ ret_llvm_ty = self.context.doubleType();
+ fn_name = "__truncxfdf2";
+ },
+ 80 => return operand,
+ 128 => {
+ ret_llvm_ty = self.context.fp128Type();
+ fn_name = "__extendxftf2";
+ },
+ else => unreachable,
+ } else switch (src_bits) {
+ 16 => {
+ // See corresponding condition at definition of
+ // __extendhfxf2 in compiler-rt.
+ param_llvm_ty = if (target.cpu.arch.isAARCH64())
+ self.context.halfType()
+ else
+ self.context.intType(16);
+ arg = self.builder.buildBitCast(arg, param_llvm_ty, "");
+ fn_name = "__extendhfxf2";
+ },
+ 32 => {
+ param_llvm_ty = self.context.floatType();
+ fn_name = "__extendsfxf2";
+ },
+ 64 => {
+ param_llvm_ty = self.context.doubleType();
+ fn_name = "__extenddfxf2";
+ },
+ 80 => return operand,
+ 128 => {
+ param_llvm_ty = self.context.fp128Type();
+ fn_name = "__trunctfxf2";
+ },
+ else => unreachable,
+ }
+
+ const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: {
+ const param_types = [_]*const llvm.Type{param_llvm_ty};
+ const fn_type = llvm.functionType(ret_llvm_ty, ¶m_types, param_types.len, .False);
+ break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type);
+ };
+
+ var args: [1]*const llvm.Value = .{arg};
+ const result = self.builder.buildCall(llvm_fn, &args, args.len, .C, .Auto, "");
+ const final_cast_llvm_ty = final_cast orelse return result;
+ return self.builder.buildBitCast(result, final_cast_llvm_ty, "");
+ }
+
fn getErrorNameTable(self: *FuncGen) !*const llvm.Value {
if (self.dg.object.error_name_table) |table| {
return table;
src/Air.zig
@@ -580,7 +580,8 @@ pub const Inst = struct {
prefetch,
/// Computes `(a * b) + c`, but only rounds once.
- /// Uses the `ty_pl` field.
+ /// Uses the `pl_op` field with payload `Bin`.
+ /// The operand is the addend. The mulends are lhs and rhs.
mul_add,
/// Implements @fieldParentPtr builtin.
@@ -728,12 +729,6 @@ pub const Bin = struct {
rhs: Inst.Ref,
};
-pub const MulAdd = struct {
- mulend1: Inst.Ref,
- mulend2: Inst.Ref,
- addend: Inst.Ref,
-};
-
pub const FieldParentPtr = struct {
field_ptr: Inst.Ref,
field_index: u32,
@@ -899,7 +894,6 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
.aggregate_init,
.union_init,
.field_parent_ptr,
- .mul_add,
=> return air.getRefType(datas[inst].ty_pl.ty),
.not,
@@ -997,6 +991,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
return ptr_ty.elemType();
},
+ .mul_add => return air.typeOf(datas[inst].pl_op.operand),
+
.add_with_overflow,
.sub_with_overflow,
.mul_with_overflow,
src/AstGen.zig
@@ -7309,8 +7309,8 @@ fn builtinCall(
},
.mul_add => {
const float_type = try typeExpr(gz, scope, params[0]);
- const mulend1 = try expr(gz, scope, .{ .ty = float_type }, params[1]);
- const mulend2 = try expr(gz, scope, .{ .ty = float_type }, params[2]);
+ const mulend1 = try expr(gz, scope, .{ .coerced_ty = float_type }, params[1]);
+ const mulend2 = try expr(gz, scope, .{ .coerced_ty = float_type }, params[2]);
const addend = try expr(gz, scope, .{ .ty = float_type }, params[3]);
const result = try gz.addPlNode(.mul_add, node, Zir.Inst.MulAdd{
.mulend1 = mulend1,
src/Liveness.zig
@@ -465,8 +465,9 @@ fn analyzeInst(
return trackOperands(a, new_set, inst, main_tomb, .{ extra.ptr, extra.expected_value, extra.new_value });
},
.mul_add => {
- const extra = a.air.extraData(Air.MulAdd, inst_datas[inst].ty_pl.payload).data;
- return trackOperands(a, new_set, inst, main_tomb, .{ extra.mulend1, extra.mulend2, extra.addend });
+ const pl_op = inst_datas[inst].pl_op;
+ const extra = a.air.extraData(Air.Bin, pl_op.payload).data;
+ return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, pl_op.operand });
},
.atomic_load => {
const ptr = inst_datas[inst].atomic_load.ptr;
src/print_air.zig
@@ -360,14 +360,14 @@ const Writer = struct {
}
fn writeMulAdd(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
- const ty_pl = w.air.instructions.items(.data)[inst].ty_pl;
- const extra = w.air.extraData(Air.MulAdd, ty_pl.payload).data;
+ const pl_op = w.air.instructions.items(.data)[inst].pl_op;
+ const extra = w.air.extraData(Air.Bin, pl_op.payload).data;
- try w.writeOperand(s, inst, 0, extra.mulend1);
+ try w.writeOperand(s, inst, 0, extra.lhs);
try s.writeAll(", ");
- try w.writeOperand(s, inst, 1, extra.mulend2);
+ try w.writeOperand(s, inst, 1, extra.rhs);
try s.writeAll(", ");
- try w.writeOperand(s, inst, 2, extra.addend);
+ try w.writeOperand(s, inst, 2, pl_op.operand);
}
fn writeFence(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
src/Sema.zig
@@ -13525,48 +13525,26 @@ fn zirMulAdd(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
const mulend2_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
const addend_src: LazySrcLoc = .{ .node_offset_builtin_call_arg3 = inst_data.src_node };
- const mulend1 = sema.resolveInst(extra.mulend1);
- const mulend2 = sema.resolveInst(extra.mulend2);
const addend = sema.resolveInst(extra.addend);
- // All args have the same type
- const ty = sema.typeOf(mulend1);
- switch (ty.zigTypeTag()) {
- .ComptimeFloat, .Float => {},
- .Vector => {
- const scalar_ty = ty.scalarType();
- switch (scalar_ty.zigTypeTag()) {
- .ComptimeFloat, .Float => {},
- else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{scalar_ty}),
- }
- },
- else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{ty}),
- }
+ const ty = sema.typeOf(addend);
+ const mulend1 = try sema.coerce(block, ty, sema.resolveInst(extra.mulend1), mulend1_src);
+ const mulend2 = try sema.coerce(block, ty, sema.resolveInst(extra.mulend2), mulend2_src);
const target = sema.mod.getTarget();
+
switch (ty.zigTypeTag()) {
.ComptimeFloat, .Float => {
const maybe_mulend1 = try sema.resolveMaybeUndefVal(block, mulend1_src, mulend1);
const maybe_mulend2 = try sema.resolveMaybeUndefVal(block, mulend2_src, mulend2);
const maybe_addend = try sema.resolveMaybeUndefVal(block, addend_src, addend);
- if (maybe_mulend1) |mulend1_val| {
- if (mulend1_val.isUndef())
- return sema.addConstUndef(ty);
- }
-
- if (maybe_mulend2) |mulend2_val| {
- if (mulend2_val.isUndef())
- return sema.addConstUndef(ty);
- }
-
- if (maybe_addend) |addend_val| {
- if (addend_val.isUndef())
- return sema.addConstUndef(ty);
- }
-
- if (maybe_mulend1) |mulend1_val| {
+ const runtime_src = if (maybe_mulend1) |mulend1_val| rs: {
if (maybe_mulend2) |mulend2_val| {
+ if (mulend2_val.isUndef()) return sema.addConstUndef(ty);
+
if (maybe_addend) |addend_val| {
+ if (addend_val.isUndef()) return sema.addConstUndef(ty);
+
const result_val = try Value.mulAdd(
ty,
mulend1_val,
@@ -13576,25 +13554,46 @@ fn zirMulAdd(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
target,
);
return sema.addConstant(ty, result_val);
+ } else {
+ break :rs addend_src;
}
+ } else {
+ if (maybe_addend) |addend_val| {
+ if (addend_val.isUndef()) return sema.addConstUndef(ty);
+ }
+ break :rs mulend2_src;
}
- }
+ } else rs: {
+ if (maybe_mulend2) |mulend2_val| {
+ if (mulend2_val.isUndef()) return sema.addConstUndef(ty);
+ }
+ if (maybe_addend) |addend_val| {
+ if (addend_val.isUndef()) return sema.addConstUndef(ty);
+ }
+ break :rs mulend1_src;
+ };
- try sema.requireRuntimeBlock(block, src);
+ try sema.requireRuntimeBlock(block, runtime_src);
return block.addInst(.{
.tag = .mul_add,
- .data = .{ .ty_pl = .{
- .ty = try sema.addType(ty),
- .payload = try sema.addExtra(Air.MulAdd{
- .mulend1 = mulend1,
- .mulend2 = mulend2,
- .addend = addend,
+ .data = .{ .pl_op = .{
+ .operand = addend,
+ .payload = try sema.addExtra(Air.Bin{
+ .lhs = mulend1,
+ .rhs = mulend2,
}),
} },
});
},
- .Vector => return sema.fail(block, src, "TODO: implement @mulAdd for vectors", .{}),
- else => unreachable,
+ .Vector => {
+ const scalar_ty = ty.scalarType();
+ switch (scalar_ty.zigTypeTag()) {
+ .ComptimeFloat, .Float => {},
+ else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{scalar_ty}),
+ }
+ return sema.fail(block, src, "TODO: implement @mulAdd for vectors", .{});
+ },
+ else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{ty}),
}
}
src/type.zig
@@ -5436,33 +5436,36 @@ pub const CType = enum {
switch (target.os.tag) {
.freestanding, .other => switch (target.cpu.arch) {
.msp430 => switch (self) {
- .short,
- .ushort,
- .int,
- .uint,
- => return 16,
- .long,
- .ulong,
- => return 32,
- .longlong,
- .ulonglong,
- => return 64,
- .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
+ .short, .ushort, .int, .uint => return 16,
+ .long, .ulong => return 32,
+ .longlong, .ulonglong, .longdouble => return 64,
},
else => switch (self) {
- .short,
- .ushort,
- => return 16,
- .int,
- .uint,
- => return 32,
- .long,
- .ulong,
- => return target.cpu.arch.ptrBitWidth(),
- .longlong,
- .ulonglong,
- => return 64,
- .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
+ .short, .ushort => return 16,
+ .int, .uint => return 32,
+ .long, .ulong => return target.cpu.arch.ptrBitWidth(),
+ .longlong, .ulonglong => return 64,
+ .longdouble => switch (target.cpu.arch) {
+ .i386, .x86_64 => return 80,
+
+ .riscv64,
+ .aarch64,
+ .aarch64_be,
+ .aarch64_32,
+ .s390x,
+ .mips64,
+ .mips64el,
+ .sparc,
+ .sparcv9,
+ .sparcel,
+ .powerpc,
+ .powerpcle,
+ .powerpc64,
+ .powerpc64le,
+ => return 128,
+
+ else => return 64,
+ },
},
},
@@ -5477,19 +5480,13 @@ pub const CType = enum {
.plan9,
.solaris,
=> switch (self) {
- .short,
- .ushort,
- => return 16,
- .int,
- .uint,
- => return 32,
- .long,
- .ulong,
- => return target.cpu.arch.ptrBitWidth(),
- .longlong,
- .ulonglong,
- => return 64,
+ .short, .ushort => return 16,
+ .int, .uint => return 32,
+ .long, .ulong => return target.cpu.arch.ptrBitWidth(),
+ .longlong, .ulonglong => return 64,
.longdouble => switch (target.cpu.arch) {
+ .i386, .x86_64 => return 80,
+
.riscv64,
.aarch64,
.aarch64_be,
@@ -5497,40 +5494,33 @@ pub const CType = enum {
.s390x,
.mips64,
.mips64el,
+ .sparc,
+ .sparcv9,
+ .sparcel,
+ .powerpc,
+ .powerpcle,
+ .powerpc64,
+ .powerpc64le,
=> return 128,
- else => return 80,
+ else => return 64,
},
},
.windows, .uefi => switch (self) {
- .short,
- .ushort,
- => return 16,
- .int,
- .uint,
- .long,
- .ulong,
- => return 32,
- .longlong,
- .ulonglong,
- => return 64,
- .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
- },
-
- .ios => switch (self) {
- .short,
- .ushort,
- => return 16,
- .int,
- .uint,
- => return 32,
- .long,
- .ulong,
- .longlong,
- .ulonglong,
- => return 64,
- .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
+ .short, .ushort => return 16,
+ .int, .uint, .long, .ulong => return 32,
+ .longlong, .ulonglong, .longdouble => return 64,
+ },
+
+ .ios, .tvos, .watchos => switch (self) {
+ .short, .ushort => return 16,
+ .int, .uint => return 32,
+ .long, .ulong, .longlong, .ulonglong => return 64,
+ .longdouble => switch (target.cpu.arch) {
+ .i386, .x86_64 => return 80,
+ else => return 64,
+ },
},
.ananas,
@@ -5549,8 +5539,6 @@ pub const CType = enum {
.amdhsa,
.ps4,
.elfiamcu,
- .tvos,
- .watchos,
.mesa3d,
.contiki,
.amdpal,
src/value.zig
@@ -4020,37 +4020,44 @@ pub const Value = extern union {
}
}
- pub fn mulAdd(float_type: Type, mulend1: Value, mulend2: Value, addend: Value, arena: Allocator, target: Target) Allocator.Error!Value {
+ pub fn mulAdd(
+ float_type: Type,
+ mulend1: Value,
+ mulend2: Value,
+ addend: Value,
+ arena: Allocator,
+ target: Target,
+ ) Allocator.Error!Value {
switch (float_type.floatBits(target)) {
16 => {
- if (true) {
- // TODO: missing f16 implementation of FMA in `std.math.fma` or compiler-rt
- @panic("TODO implement mulAdd for f16");
- }
+ const m1 = mulend1.toFloat(f16);
+ const m2 = mulend2.toFloat(f16);
+ const a = addend.toFloat(f16);
+ return Value.Tag.float_16.create(arena, @mulAdd(f16, m1, m2, a));
},
32 => {
const m1 = mulend1.toFloat(f32);
const m2 = mulend2.toFloat(f32);
const a = addend.toFloat(f32);
- return Value.Tag.float_32.create(arena, std.math.fma(f32, m1, m2, a));
+ return Value.Tag.float_32.create(arena, @mulAdd(f32, m1, m2, a));
},
64 => {
const m1 = mulend1.toFloat(f64);
const m2 = mulend2.toFloat(f64);
const a = addend.toFloat(f64);
- return Value.Tag.float_64.create(arena, std.math.fma(f64, m1, m2, a));
+ return Value.Tag.float_64.create(arena, @mulAdd(f64, m1, m2, a));
},
80 => {
- if (true) {
- // TODO: missing f80 implementation of FMA in `std.math.fma` or compiler-rt
- @panic("TODO implement mulAdd for f80");
- }
+ const m1 = mulend1.toFloat(f80);
+ const m2 = mulend2.toFloat(f80);
+ const a = addend.toFloat(f80);
+ return Value.Tag.float_80.create(arena, @mulAdd(f80, m1, m2, a));
},
128 => {
const m1 = mulend1.toFloat(f128);
const m2 = mulend2.toFloat(f128);
const a = addend.toFloat(f128);
- return Value.Tag.float_128.create(arena, std.math.fma(f128, m1, m2, a));
+ return Value.Tag.float_128.create(arena, @mulAdd(f128, m1, m2, a));
},
else => unreachable,
}
src/Zir.zig
@@ -891,6 +891,8 @@ pub const Inst = struct {
atomic_store,
/// Implements the `@mulAdd` builtin.
/// Uses the `pl_node` union field with payload `MulAdd`.
+ /// The addend communicates the type of the builtin.
+ /// The mulends need to be coerced to the same type.
mul_add,
/// Implements the `@call` builtin.
/// Uses the `pl_node` union field with payload `BuiltinCall`.
test/behavior/muladd.zig
@@ -2,8 +2,8 @@ const builtin = @import("builtin");
const expect = @import("std").testing.expect;
test "@mulAdd" {
- if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
@@ -13,22 +13,22 @@ test "@mulAdd" {
}
fn testMulAdd() !void {
- if (builtin.zig_backend == .stage1) {
- const a: f16 = 5.5;
- const b: f16 = 2.5;
- const c: f16 = 6.25;
+ {
+ var a: f16 = 5.5;
+ var b: f16 = 2.5;
+ var c: f16 = 6.25;
try expect(@mulAdd(f16, a, b, c) == 20);
}
{
- const a: f32 = 5.5;
- const b: f32 = 2.5;
- const c: f32 = 6.25;
+ var a: f32 = 5.5;
+ var b: f32 = 2.5;
+ var c: f32 = 6.25;
try expect(@mulAdd(f32, a, b, c) == 20);
}
{
- const a: f64 = 5.5;
- const b: f64 = 2.5;
- const c: f64 = 6.25;
+ var a: f64 = 5.5;
+ var b: f64 = 2.5;
+ var c: f64 = 6.25;
try expect(@mulAdd(f64, a, b, c) == 20);
}
}
@@ -39,9 +39,7 @@ test "@mulAdd f80" {
return error.SkipZigTest;
}
- // TODO: missing f80 implementation of FMA in `std.math.fma` or compiler-rt
- // comptime try testMulAdd80();
-
+ comptime try testMulAdd80();
try testMulAdd80();
}
@@ -53,11 +51,12 @@ fn testMulAdd80() !void {
}
test "@mulAdd f128" {
- if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
if (builtin.os.tag == .macos and builtin.cpu.arch == .aarch64) {
// https://github.com/ziglang/zig/issues/9900
return error.SkipZigTest;
@@ -68,8 +67,8 @@ test "@mulAdd f128" {
}
fn testMulAdd128() !void {
- const a: f16 = 5.5;
- const b: f128 = 2.5;
- const c: f128 = 6.25;
+ var a: f16 = 5.5;
+ var b: f128 = 2.5;
+ var c: f128 = 6.25;
try expect(@mulAdd(f128, a, b, c) == 20);
}