Commit 087aedfa38
Changed files (3)
lib
std
math
src
codegen
lib/std/math/nan.zig
@@ -1,7 +1,7 @@
const math = @import("../math.zig");
/// Returns the nan representation for type T.
-pub fn nan(comptime T: type) T {
+pub inline fn nan(comptime T: type) T {
return switch (@typeInfo(T).Float.bits) {
16 => math.nan_f16,
32 => math.nan_f32,
@@ -13,15 +13,8 @@ pub fn nan(comptime T: type) T {
}
/// Returns the signalling nan representation for type T.
-pub fn snan(comptime T: type) T {
- // Note: A signalling nan is identical to a standard right now by may have a different bit
- // representation in the future when required.
- return switch (@typeInfo(T).Float.bits) {
- 16 => math.nan_u16,
- 32 => math.nan_u32,
- 64 => math.nan_u64,
- 80 => math.nan_u80,
- 128 => math.nan_u128,
- else => @compileError("unreachable"),
- };
+/// Note: A signalling nan is identical to a standard right now by may have a different bit
+/// representation in the future when required.
+pub inline fn snan(comptime T: type) T {
+ return nan(T);
}
src/codegen/llvm/bindings.zig
@@ -295,9 +295,6 @@ pub const Type = opaque {
pub const countStructElementTypes = LLVMCountStructElementTypes;
extern fn LLVMCountStructElementTypes(StructTy: *const Type) c_uint;
-
- pub const getVectorSize = LLVMGetVectorSize;
- extern fn LLVMGetVectorSize(VectorTy: *const Type) c_uint;
};
pub const Module = opaque {
@@ -681,8 +678,8 @@ pub const Builder = opaque {
pub const buildVectorSplat = LLVMBuildVectorSplat;
extern fn LLVMBuildVectorSplat(
*const Builder,
- EltVal: *const Value,
ElementCount: c_uint,
+ EltVal: *const Value,
Name: [*:0]const u8,
) *const Value;
src/codegen/llvm.zig
@@ -3983,7 +3983,7 @@ pub const FuncGen = struct {
);
return phi_node;
},
- .Float => return self.buildFloatCmp(op, operand_ty, &.{ lhs, rhs }),
+ .Float => return self.buildFloatCmp(op, operand_ty, .{ lhs, rhs }),
else => unreachable,
};
const is_signed = int_ty.isSignedInt();
@@ -5211,7 +5211,7 @@ pub const FuncGen = struct {
const inst_ty = self.air.typeOfIndex(inst);
const scalar_ty = inst_ty.scalarType();
- if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.add, inst_ty, &.{ lhs, rhs });
+ if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.add, inst_ty, 2, .{ lhs, rhs });
if (scalar_ty.isSignedInt()) return self.builder.buildNSWAdd(lhs, rhs, "");
return self.builder.buildNUWAdd(lhs, rhs, "");
}
@@ -5250,7 +5250,7 @@ pub const FuncGen = struct {
const inst_ty = self.air.typeOfIndex(inst);
const scalar_ty = inst_ty.scalarType();
- if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.sub, inst_ty, &.{ lhs, rhs });
+ if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.sub, inst_ty, 2, .{ lhs, rhs });
if (scalar_ty.isSignedInt()) return self.builder.buildNSWSub(lhs, rhs, "");
return self.builder.buildNUWSub(lhs, rhs, "");
}
@@ -5288,7 +5288,7 @@ pub const FuncGen = struct {
const inst_ty = self.air.typeOfIndex(inst);
const scalar_ty = inst_ty.scalarType();
- if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.mul, inst_ty, &.{ lhs, rhs });
+ if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.mul, inst_ty, 2, .{ lhs, rhs });
if (scalar_ty.isSignedInt()) return self.builder.buildNSWMul(lhs, rhs, "");
return self.builder.buildNUWMul(lhs, rhs, "");
}
@@ -5325,7 +5325,7 @@ pub const FuncGen = struct {
const rhs = try self.resolveInst(bin_op.rhs);
const inst_ty = self.air.typeOfIndex(inst);
- return self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
+ return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
}
fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -5338,8 +5338,8 @@ pub const FuncGen = struct {
const scalar_ty = inst_ty.scalarType();
if (scalar_ty.isRuntimeFloat()) {
- const result = try self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
- return self.buildFloatOp(.trunc, inst_ty, &.{result});
+ const result = try self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
+ return self.buildFloatOp(.trunc, inst_ty, 1, .{result});
}
if (scalar_ty.isSignedInt()) return self.builder.buildSDiv(lhs, rhs, "");
return self.builder.buildUDiv(lhs, rhs, "");
@@ -5355,8 +5355,8 @@ pub const FuncGen = struct {
const scalar_ty = inst_ty.scalarType();
if (scalar_ty.isRuntimeFloat()) {
- const result = try self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
- return self.buildFloatOp(.floor, inst_ty, &.{result});
+ const result = try self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
+ return self.buildFloatOp(.floor, inst_ty, 1, .{result});
}
if (scalar_ty.isSignedInt()) {
// const d = @divTrunc(a, b);
@@ -5386,7 +5386,7 @@ pub const FuncGen = struct {
const inst_ty = self.air.typeOfIndex(inst);
const scalar_ty = inst_ty.scalarType();
- if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
+ if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
if (scalar_ty.isSignedInt()) return self.builder.buildExactSDiv(lhs, rhs, "");
return self.builder.buildExactUDiv(lhs, rhs, "");
}
@@ -5400,7 +5400,7 @@ pub const FuncGen = struct {
const inst_ty = self.air.typeOfIndex(inst);
const scalar_ty = inst_ty.scalarType();
- if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.rem, inst_ty, &.{ lhs, rhs });
+ if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.fmod, inst_ty, 2, .{ lhs, rhs });
if (scalar_ty.isSignedInt()) return self.builder.buildSRem(lhs, rhs, "");
return self.builder.buildURem(lhs, rhs, "");
}
@@ -5416,11 +5416,11 @@ pub const FuncGen = struct {
const scalar_ty = inst_ty.scalarType();
if (scalar_ty.isRuntimeFloat()) {
- const a = try self.buildFloatOp(.rem, inst_ty, &.{ lhs, rhs });
- const b = try self.buildFloatOp(.add, inst_ty, &.{ a, rhs });
- const c = try self.buildFloatOp(.rem, inst_ty, &.{ b, rhs });
+ const a = try self.buildFloatOp(.fmod, inst_ty, 2, .{ lhs, rhs });
+ const b = try self.buildFloatOp(.add, inst_ty, 2, .{ a, rhs });
+ const c = try self.buildFloatOp(.fmod, inst_ty, 2, .{ b, rhs });
const zero = inst_llvm_ty.constNull();
- const ltz = try self.buildFloatCmp(.lt, inst_ty, &.{ lhs, zero });
+ const ltz = try self.buildFloatCmp(.lt, inst_ty, .{ lhs, zero });
return self.builder.buildSelect(ltz, c, a, "");
}
if (scalar_ty.isSignedInt()) {
@@ -5508,18 +5508,18 @@ pub const FuncGen = struct {
) !*const llvm.Value {
const args_len = @intCast(c_uint, args_vectors.len);
const llvm_i32 = self.context.intType(32);
- assert(args_len <= 8);
+ assert(args_len <= 3);
var i: usize = 0;
var result = result_vector;
while (i < vector_len) : (i += 1) {
const index_i32 = llvm_i32.constInt(i, .False);
- var args: [8]*const llvm.Value = undefined;
+ var args: [3]*const llvm.Value = undefined;
for (args_vectors) |arg_vector, k| {
args[k] = self.builder.buildExtractElement(arg_vector, index_i32, "");
}
- const result_elem = self.builder.buildCall(llvm_fn, args[0..], args_len, .C, .Auto, "");
+ const result_elem = self.builder.buildCall(llvm_fn, &args, args_len, .C, .Auto, "");
result = self.builder.buildInsertElement(result, result_elem, index_i32, "");
}
return result;
@@ -5542,20 +5542,27 @@ pub const FuncGen = struct {
};
}
- fn getMathHTypeAbbrev(ty: Type) []const u8 {
- return switch (ty.tag()) {
- .f16 => "h", // Non-standard
- .f32 => "s",
- .f64 => "",
- .f80 => "x", // Non-standard
- .c_longdouble => "l",
- .f128 => "q", // Non-standard (mimics convention in GCC libquadmath)
+ fn libcFloatPrefix(float_bits: u16) []const u8 {
+ return switch (float_bits) {
+ 16, 80 => "__",
+ 32, 64, 128 => "",
else => unreachable,
};
}
- fn getCompilerRtTypeAbbrev(ty: Type, target: std.Target) []const u8 {
- return switch (ty.floatBits(target)) {
+ fn libcFloatSuffix(float_bits: u16) []const u8 {
+ return switch (float_bits) {
+ 16 => "h", // Non-standard
+ 32 => "s",
+ 64 => "",
+ 80 => "x", // Non-standard
+ 128 => "q", // Non-standard (mimics convention in GCC libquadmath)
+ else => unreachable,
+ };
+ }
+
+ fn compilerRtFloatAbbrev(float_bits: u16) []const u8 {
+ return switch (float_bits) {
16 => "h",
32 => "s",
64 => "d",
@@ -5571,20 +5578,13 @@ pub const FuncGen = struct {
self: *FuncGen,
pred: math.CompareOperator,
ty: Type,
- params: []const *const llvm.Value,
+ params: [2]*const llvm.Value,
) !*const llvm.Value {
const target = self.dg.module.getTarget();
const scalar_ty = ty.scalarType();
const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
- // LLVM does not support all floating point comparisons for all targets, so we
- // may need to manually generate a libc call
- const intrinsics_allowed = switch (scalar_ty.tag()) {
- .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
- .f128 => target.longDoubleIs(f128),
- else => true,
- };
- if (intrinsics_allowed) {
+ if (intrinsicsAllowed(scalar_ty, target)) {
const llvm_predicate: llvm.RealPredicate = switch (pred) {
.eq => .OEQ,
.neq => .UNE,
@@ -5596,7 +5596,8 @@ pub const FuncGen = struct {
return self.builder.buildFCmp(llvm_predicate, params[0], params[1], "");
}
- const compiler_rt_type_abbrev = getCompilerRtTypeAbbrev(scalar_ty, target);
+ const float_bits = scalar_ty.floatBits(target);
+ const compiler_rt_float_abbrev = compilerRtFloatAbbrev(float_bits);
var fn_name_buf: [64]u8 = undefined;
const fn_base_name = switch (pred) {
.neq => "ne",
@@ -5606,9 +5607,10 @@ pub const FuncGen = struct {
.gt => "gt",
.gte => "ge",
};
- const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f2", .{ fn_base_name, compiler_rt_type_abbrev }) catch unreachable;
+ const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f2", .{
+ fn_base_name, compiler_rt_float_abbrev,
+ }) catch unreachable;
- assert(params.len == 2);
const param_types = [2]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty };
const llvm_i32 = self.context.intType(32);
const libc_fn = self.getLibcFunction(fn_name, param_types[0..], llvm_i32);
@@ -5628,110 +5630,119 @@ pub const FuncGen = struct {
const vector_result_ty = llvm_i32.vectorType(vec_len);
var result = vector_result_ty.getUndef();
- result = try self.buildElementwiseCall(libc_fn, params[0..], result, vec_len);
+ result = try self.buildElementwiseCall(libc_fn, ¶ms, result, vec_len);
- const zero_vector = self.builder.buildVectorSplat(zero, vec_len, "");
+ const zero_vector = self.builder.buildVectorSplat(vec_len, zero, "");
return self.builder.buildICmp(int_pred, result, zero_vector, "");
}
- const result = self.builder.buildCall(libc_fn, params.ptr, 2, .C, .Auto, "");
+ const result = self.builder.buildCall(libc_fn, ¶ms, params.len, .C, .Auto, "");
return self.builder.buildICmp(int_pred, result, zero, "");
}
+ const FloatOp = enum {
+ add,
+ ceil,
+ cos,
+ div,
+ exp,
+ exp2,
+ fabs,
+ floor,
+ fma,
+ log,
+ log10,
+ log2,
+ fmax,
+ fmin,
+ mul,
+ fmod,
+ round,
+ sin,
+ sqrt,
+ sub,
+ trunc,
+ };
+
+ const FloatOpStrat = union(enum) {
+ intrinsic: []const u8,
+ libc: [:0]const u8,
+ };
+
/// Creates a floating point operation (add, sub, fma, sqrt, exp, etc.)
/// by lowering to the appropriate hardware instruction or softfloat
/// routine for the target
fn buildFloatOp(
self: *FuncGen,
- comptime op: @TypeOf(.EnumLiteral),
+ comptime op: FloatOp,
ty: Type,
- params: []const *const llvm.Value,
+ comptime params_len: usize,
+ params: [params_len]*const llvm.Value,
) !*const llvm.Value {
const target = self.dg.module.getTarget();
const scalar_ty = ty.scalarType();
const llvm_ty = try self.dg.llvmType(ty);
const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
- const Strat = union(enum) {
- intrinsic: []const u8,
- libc: [:0]const u8,
- };
-
- // LLVM does not support all relevant intrinsics for all targets, so we
- // may need to manually generate a libc call
- const intrinsics_allowed = switch (scalar_ty.tag()) {
- .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
- .f128 => target.longDoubleIs(f128),
- else => true,
- };
- const strat: Strat = if (intrinsics_allowed) b: {
+ const intrinsics_allowed = intrinsicsAllowed(scalar_ty, target);
+ var fn_name_buf: [64]u8 = undefined;
+ const strat: FloatOpStrat = if (intrinsics_allowed) switch (op) {
// Some operations are dedicated LLVM instructions, not available as intrinsics
- switch (op) {
- .add => return self.builder.buildFAdd(params[0], params[1], ""),
- .sub => return self.builder.buildFSub(params[0], params[1], ""),
- .mul => return self.builder.buildFMul(params[0], params[1], ""),
- .div => return self.builder.buildFDiv(params[0], params[1], ""),
- .rem => return self.builder.buildFRem(params[0], params[1], ""),
- else => {},
- }
- // All other operations are available as intrinsics
- break :b .{
- .intrinsic = "llvm." ++ switch (op) {
- .max => "maximum",
- .min => "minimum",
- .fma, .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => @tagName(op),
- .add, .sub, .mul, .div, .rem => unreachable,
- else => unreachable,
- },
- };
+ .add => return self.builder.buildFAdd(params[0], params[1], ""),
+ .sub => return self.builder.buildFSub(params[0], params[1], ""),
+ .mul => return self.builder.buildFMul(params[0], params[1], ""),
+ .div => return self.builder.buildFDiv(params[0], params[1], ""),
+ .fmod => return self.builder.buildFRem(params[0], params[1], ""),
+ .fmax => return self.builder.buildMaxNum(params[0], params[1], ""),
+ .fmin => return self.builder.buildMinNum(params[0], params[1], ""),
+ else => .{ .intrinsic = "llvm." ++ @tagName(op) },
} else b: {
- const math_h_type_abbrev = getMathHTypeAbbrev(scalar_ty);
- const compiler_rt_type_abbrev = getCompilerRtTypeAbbrev(scalar_ty, target);
- var fn_name_buf: [64]u8 = undefined;
+ const float_bits = scalar_ty.floatBits(target);
break :b switch (op) {
- .fma => Strat{
- .libc = switch (scalar_ty.floatBits(target)) {
- 80 => "__fmax",
- else => std.fmt.bufPrintZ(&fn_name_buf, "fma{s}", .{math_h_type_abbrev}) catch unreachable,
- },
- },
- .add, .sub, .div, .mul => Strat{
- .libc = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f3", .{ @tagName(op), compiler_rt_type_abbrev }) catch unreachable,
- },
- .rem => Strat{
- .libc = std.fmt.bufPrintZ(&fn_name_buf, "fmod{s}", .{math_h_type_abbrev}) catch unreachable,
- },
- .max, .min => Strat{
- .libc = std.fmt.bufPrintZ(&fn_name_buf, "f{s}{s}", .{ @tagName(op), math_h_type_abbrev }) catch unreachable,
+ .add, .sub, .div, .mul => FloatOpStrat{
+ .libc = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f3", .{
+ @tagName(op), compilerRtFloatAbbrev(float_bits),
+ }) catch unreachable,
},
- .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => Strat{
- .libc = std.fmt.bufPrintZ(&fn_name_buf, "{s}{s}", .{ @tagName(op), math_h_type_abbrev }) catch unreachable,
+ .ceil,
+ .cos,
+ .exp,
+ .exp2,
+ .fabs,
+ .floor,
+ .fma,
+ .fmax,
+ .fmin,
+ .fmod,
+ .log,
+ .log10,
+ .log2,
+ .round,
+ .sin,
+ .sqrt,
+ .trunc,
+ => FloatOpStrat{
+ .libc = std.fmt.bufPrintZ(&fn_name_buf, "{s}{s}{s}", .{
+ libcFloatPrefix(float_bits), @tagName(op), libcFloatSuffix(float_bits),
+ }) catch unreachable,
},
- else => unreachable,
};
};
- var llvm_fn: *const llvm.Value = switch (strat) {
+ const llvm_fn: *const llvm.Value = switch (strat) {
.intrinsic => |fn_name| self.getIntrinsic(fn_name, &.{llvm_ty}),
.libc => |fn_name| b: {
- assert(params.len == switch (op) {
- .fma => 3,
- .add, .sub, .div, .mul, .rem, .max, .min => 2,
- .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => 1,
- else => unreachable,
- });
const param_types = [3]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty, scalar_llvm_ty };
const libc_fn = self.getLibcFunction(fn_name, param_types[0..params.len], scalar_llvm_ty);
if (ty.zigTypeTag() == .Vector) {
const result = llvm_ty.getUndef();
- return self.buildElementwiseCall(libc_fn, params[0..], result, ty.vectorLen());
+ return self.buildElementwiseCall(libc_fn, ¶ms, result, ty.vectorLen());
}
break :b libc_fn;
},
};
- const params_len = @intCast(c_uint, params.len);
- return self.builder.buildCall(llvm_fn, params.ptr, params_len, .C, .Auto, "");
+ return self.builder.buildCall(llvm_fn, ¶ms, params_len, .C, .Auto, "");
}
fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -5745,7 +5756,7 @@ pub const FuncGen = struct {
const addend = try self.resolveInst(pl_op.operand);
const ty = self.air.typeOfIndex(inst);
- return self.buildFloatOp(.fma, ty, &.{ mulend1, mulend2, addend });
+ return self.buildFloatOp(.fma, ty, 3, .{ mulend1, mulend2, addend });
}
fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -6550,15 +6561,14 @@ pub const FuncGen = struct {
}
}
- fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, comptime op: @TypeOf(.EnumLiteral)) !?*const llvm.Value {
+ fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, comptime op: FloatOp) !?*const llvm.Value {
if (self.liveness.isUnused(inst)) return null;
const un_op = self.air.instructions.items(.data)[inst].un_op;
const operand = try self.resolveInst(un_op);
const operand_ty = self.air.typeOf(un_op);
- const params = [_]*const llvm.Value{operand};
- return self.buildFloatOp(op, operand_ty, ¶ms);
+ return self.buildFloatOp(op, operand_ty, 1, .{operand});
}
fn airClzCtz(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
@@ -6822,17 +6832,9 @@ pub const FuncGen = struct {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const scalar = try self.resolveInst(ty_op.operand);
- const scalar_ty = self.air.typeOf(ty_op.operand);
const vector_ty = self.air.typeOfIndex(inst);
const len = vector_ty.vectorLen();
- const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
- const op_llvm_ty = scalar_llvm_ty.vectorType(1);
- const u32_llvm_ty = self.context.intType(32);
- const mask_llvm_ty = u32_llvm_ty.vectorType(len);
- const undef_vector = op_llvm_ty.getUndef();
- const u32_zero = u32_llvm_ty.constNull();
- const op_vector = self.builder.buildInsertElement(undef_vector, scalar, u32_zero, "");
- return self.builder.buildShuffleVector(op_vector, undef_vector, mask_llvm_ty.constNull(), "");
+ return self.builder.buildVectorSplat(len, scalar, "");
}
fn airSelect(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -8183,6 +8185,26 @@ fn backendSupportsF80(target: std.Target) bool {
};
}
+/// This function returns true if we expect LLVM to lower f16 correctly
+/// and false if we expect LLVM to crash if it counters an f16 type or
+/// if it produces miscompilations.
+fn backendSupportsF16(target: std.Target) bool {
+ return switch (target.cpu.arch) {
+ else => true,
+ };
+}
+
+/// LLVM does not support all relevant intrinsics for all targets, so we
+/// may need to manually generate a libc call
+fn intrinsicsAllowed(scalar_ty: Type, target: std.Target) bool {
+ return switch (scalar_ty.tag()) {
+ .f16 => backendSupportsF16(target),
+ .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
+ .f128 => target.longDoubleIs(f128),
+ else => true,
+ };
+}
+
/// We need to insert extra padding if LLVM's isn't enough.
/// However we don't want to ever call LLVMABIAlignmentOfType or
/// LLVMABISizeOfType because these functions will trip assertions