Commit `7233a3324a`

Andrew Kelley <andrew@ziglang.org>

2022-03-18 01:24:35

stage2: implement `@reduce`

Notably, Value.eql and Value.hash are improved to treat NaN as equal to itself, so that Type/Value can be hash map keys. Likewise float hashing normalizes the float value before computing the hash.

master

1 parent 76e1030

Changed files (14)

src

arch

aarch64

arm

riscv64

wasm

x86_64

codegen

llvm

test

behavior

vector.zig

@@ -640,6 +640,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
             .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
@@ -3727,6 +3728,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for aarch64", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
     const vector_ty = self.air.typeOfIndex(inst);
     const len = vector_ty.vectorLen();

@@ -637,6 +637,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
             .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
@@ -4204,6 +4205,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for arm", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
     const vector_ty = self.air.typeOfIndex(inst);
     const len = vector_ty.vectorLen();

@@ -604,6 +604,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
             .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
@@ -2213,6 +2214,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for riscv64", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
     const vector_ty = self.air.typeOfIndex(inst);
     const len = vector_ty.vectorLen();

@@ -1263,6 +1263,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .ret_load => self.airRetLoad(inst),
         .splat => self.airSplat(inst),
         .shuffle => self.airShuffle(inst),
+        .reduce => self.airReduce(inst),
         .aggregate_init => self.airAggregateInit(inst),
         .union_init => self.airUnionInit(inst),
         .prefetch => self.airPrefetch(inst),
@@ -2988,7 +2989,6 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
     const operand = try self.resolveInst(ty_op.operand);
 
-    _ = ty_op;
     _ = operand;
     return self.fail("TODO: Implement wasm airSplat", .{});
 }
@@ -2999,11 +2999,20 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
     const operand = try self.resolveInst(ty_op.operand);
 
-    _ = ty_op;
     _ = operand;
     return self.fail("TODO: Implement wasm airShuffle", .{});
 }
 
+fn airReduce(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
+    if (self.liveness.isUnused(inst)) return WValue{ .none = {} };
+
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const operand = try self.resolveInst(reduce.operand);
+
+    _ = operand;
+    return self.fail("TODO: Implement wasm airReduce", .{});
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
     if (self.liveness.isUnused(inst)) return WValue{ .none = {} };

@@ -721,6 +721,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .error_name      => try self.airErrorName(inst),
             .splat           => try self.airSplat(inst),
             .shuffle         => try self.airShuffle(inst),
+            .reduce          => try self.airReduce(inst),
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
@@ -5567,6 +5568,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+    const reduce = self.air.instructions.items(.data)[inst].reduce;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airReduce for x86_64", .{});
+    return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+}
+
 fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
     const vector_ty = self.air.typeOfIndex(inst);
     const len = vector_ty.vectorLen();

@@ -853,6 +853,39 @@ pub const Builder = opaque {
 
     pub const buildShuffleVector = LLVMBuildShuffleVector;
     extern fn LLVMBuildShuffleVector(*const Builder, V1: *const Value, V2: *const Value, Mask: *const Value, Name: [*:0]const u8) *const Value;
+
+    pub const buildAndReduce = ZigLLVMBuildAndReduce;
+    extern fn ZigLLVMBuildAndReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildOrReduce = ZigLLVMBuildOrReduce;
+    extern fn ZigLLVMBuildOrReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildXorReduce = ZigLLVMBuildXorReduce;
+    extern fn ZigLLVMBuildXorReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildIntMaxReduce = ZigLLVMBuildIntMaxReduce;
+    extern fn ZigLLVMBuildIntMaxReduce(B: *const Builder, Val: *const Value, is_signed: bool) *const Value;
+
+    pub const buildIntMinReduce = ZigLLVMBuildIntMinReduce;
+    extern fn ZigLLVMBuildIntMinReduce(B: *const Builder, Val: *const Value, is_signed: bool) *const Value;
+
+    pub const buildFPMaxReduce = ZigLLVMBuildFPMaxReduce;
+    extern fn ZigLLVMBuildFPMaxReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildFPMinReduce = ZigLLVMBuildFPMinReduce;
+    extern fn ZigLLVMBuildFPMinReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildAddReduce = ZigLLVMBuildAddReduce;
+    extern fn ZigLLVMBuildAddReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildMulReduce = ZigLLVMBuildMulReduce;
+    extern fn ZigLLVMBuildMulReduce(B: *const Builder, Val: *const Value) *const Value;
+
+    pub const buildFPAddReduce = ZigLLVMBuildFPAddReduce;
+    extern fn ZigLLVMBuildFPAddReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value;
+
+    pub const buildFPMulReduce = ZigLLVMBuildFPMulReduce;
+    extern fn ZigLLVMBuildFPMulReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value;
 };
 
 pub const MDString = opaque {

@@ -1731,6 +1731,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .error_name       => try airErrorName(f, inst),
             .splat            => try airSplat(f, inst),
             .shuffle          => try airShuffle(f, inst),
+            .reduce           => try airReduce(f, inst),
             .aggregate_init   => try airAggregateInit(f, inst),
             .union_init       => try airUnionInit(f, inst),
             .prefetch         => try airPrefetch(f, inst),
@@ -3625,6 +3626,21 @@ fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue {
     return f.fail("TODO: C backend: implement airShuffle", .{});
 }
 
+fn airReduce(f: *Function, inst: Air.Inst.Index) !CValue {
+    if (f.liveness.isUnused(inst)) return CValue.none;
+
+    const inst_ty = f.air.typeOfIndex(inst);
+    const reduce = f.air.instructions.items(.data)[inst].reduce;
+    const operand = try f.resolveInst(reduce.operand);
+    const writer = f.object.writer();
+    const local = try f.allocLocal(inst_ty, .Const);
+    try writer.writeAll(" = ");
+
+    _ = operand;
+    _ = local;
+    return f.fail("TODO: C backend: implement airReduce", .{});
+}
+
 fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
     if (f.liveness.isUnused(inst)) return CValue.none;

@@ -3426,6 +3426,7 @@ pub const FuncGen = struct {
                 .error_name     => try self.airErrorName(inst),
                 .splat          => try self.airSplat(inst),
                 .shuffle        => try self.airShuffle(inst),
+                .reduce         => try self.airReduce(inst),
                 .aggregate_init => try self.airAggregateInit(inst),
                 .union_init     => try self.airUnionInit(inst),
                 .prefetch       => try self.airPrefetch(inst),
@@ -6281,6 +6282,50 @@ pub const FuncGen = struct {
         return self.builder.buildShuffleVector(a, b, llvm_mask_value, "");
     }
 
+    fn airReduce(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const reduce = self.air.instructions.items(.data)[inst].reduce;
+        const operand = try self.resolveInst(reduce.operand);
+        const scalar_ty = self.air.typeOfIndex(inst);
+
+        // TODO handle the fast math setting
+
+        switch (reduce.operation) {
+            .And => return self.builder.buildAndReduce(operand),
+            .Or => return self.builder.buildOrReduce(operand),
+            .Xor => return self.builder.buildXorReduce(operand),
+            .Min => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildIntMinReduce(operand, scalar_ty.isSignedInt()),
+                .Float => return self.builder.buildFPMinReduce(operand),
+                else => unreachable,
+            },
+            .Max => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildIntMaxReduce(operand, scalar_ty.isSignedInt()),
+                .Float => return self.builder.buildFPMaxReduce(operand),
+                else => unreachable,
+            },
+            .Add => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildAddReduce(operand),
+                .Float => {
+                    const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
+                    const neutral_value = scalar_llvm_ty.constReal(-0.0);
+                    return self.builder.buildFPAddReduce(neutral_value, operand);
+                },
+                else => unreachable,
+            },
+            .Mul => switch (scalar_ty.zigTypeTag()) {
+                .Int => return self.builder.buildMulReduce(operand),
+                .Float => {
+                    const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
+                    const neutral_value = scalar_llvm_ty.constReal(1.0);
+                    return self.builder.buildFPMulReduce(neutral_value, operand);
+                },
+                else => unreachable,
+            },
+        }
+    }
+
     fn airAggregateInit(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;

@@ -530,6 +530,14 @@ pub const Inst = struct {
         /// Given an integer operand, return the float with the closest mathematical meaning.
         /// Uses the `ty_op` field.
         int_to_float,
+
+        /// Transforms a vector into a scalar value by performing a sequential
+        /// horizontal reduction of its elements using the specified operator.
+        /// The vector element type (and hence result type) will be:
+        ///  * and, or, xor       => integer or boolean
+        ///  * min, max, add, mul => integer or float
+        /// Uses the `reduce` field.
+        reduce,
         /// Given an integer, bool, float, or pointer operand, return a vector with all elements
         /// equal to the scalar value.
         /// Uses the `ty_op` field.
@@ -695,6 +703,10 @@ pub const Inst = struct {
             locality: u2,
             cache: std.builtin.PrefetchOptions.Cache,
         },
+        reduce: struct {
+            operand: Ref,
+            operation: std.builtin.ReduceOp,
+        },
 
         // Make sure we don't accidentally add a field to make this union
         // bigger than expected. Note that in Debug builds, Zig is allowed
@@ -1027,6 +1039,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
             return ptr_ty.elemType();
         },
 
+        .reduce => return air.typeOf(datas[inst].reduce.operand).childType(),
+
         .mul_add => return air.typeOf(datas[inst].pl_op.operand),
 
         .add_with_overflow,

@@ -435,6 +435,10 @@ fn analyzeInst(
             const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data;
             return trackOperands(a, new_set, inst, main_tomb, .{ extra.a, extra.b, .none });
         },
+        .reduce => {
+            const reduce = inst_datas[inst].reduce;
+            return trackOperands(a, new_set, inst, main_tomb, .{ reduce.operand, .none, .none });
+        },
         .aggregate_init => {
             const ty_pl = inst_datas[inst].ty_pl;
             const aggregate_ty = a.air.getRefType(ty_pl.ty);

@@ -265,6 +265,7 @@ const Writer = struct {
             .wasm_memory_grow => try w.writeWasmMemoryGrow(s, inst),
             .mul_add => try w.writeMulAdd(s, inst),
             .shuffle => try w.writeShuffle(s, inst),
+            .reduce => try w.writeReduce(s, inst),
 
             .add_with_overflow,
             .sub_with_overflow,
@@ -392,6 +393,13 @@ const Writer = struct {
         try s.print(", mask {d}, len {d}", .{ extra.mask, extra.mask_len });
     }
 
+    fn writeReduce(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const reduce = w.air.instructions.items(.data)[inst].reduce;
+
+        try w.writeOperand(s, inst, 0, reduce.operand);
+        try s.print(", {s}", .{@tagName(reduce.operation)});
+    }
+
     fn writeFence(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
         const atomic_order = w.air.instructions.items(.data)[inst].fence;

@@ -13973,17 +13973,27 @@ fn resolveExportOptions(
     };
 }
 
-fn resolveAtomicOrder(
+fn resolveBuiltinEnum(
     sema: *Sema,
     block: *Block,
     src: LazySrcLoc,
     zir_ref: Zir.Inst.Ref,
-) CompileError!std.builtin.AtomicOrder {
-    const atomic_order_ty = try sema.getBuiltinType(block, src, "AtomicOrder");
+    comptime name: []const u8,
+) CompileError!@field(std.builtin, name) {
+    const ty = try sema.getBuiltinType(block, src, name);
     const air_ref = sema.resolveInst(zir_ref);
-    const coerced = try sema.coerce(block, atomic_order_ty, air_ref, src);
+    const coerced = try sema.coerce(block, ty, air_ref, src);
     const val = try sema.resolveConstValue(block, src, coerced);
-    return val.toEnum(std.builtin.AtomicOrder);
+    return val.toEnum(@field(std.builtin, name));
+}
+
+fn resolveAtomicOrder(
+    sema: *Sema,
+    block: *Block,
+    src: LazySrcLoc,
+    zir_ref: Zir.Inst.Ref,
+) CompileError!std.builtin.AtomicOrder {
+    return resolveBuiltinEnum(sema, block, src, zir_ref, "AtomicOrder");
 }
 
 fn resolveAtomicRmwOp(
@@ -13992,11 +14002,7 @@ fn resolveAtomicRmwOp(
     src: LazySrcLoc,
     zir_ref: Zir.Inst.Ref,
 ) CompileError!std.builtin.AtomicRmwOp {
-    const atomic_rmw_op_ty = try sema.getBuiltinType(block, src, "AtomicRmwOp");
-    const air_ref = sema.resolveInst(zir_ref);
-    const coerced = try sema.coerce(block, atomic_rmw_op_ty, air_ref, src);
-    const val = try sema.resolveConstValue(block, src, coerced);
-    return val.toEnum(std.builtin.AtomicRmwOp);
+    return resolveBuiltinEnum(sema, block, src, zir_ref, "AtomicRmwOp");
 }
 
 fn zirCmpxchg(
@@ -14118,8 +14124,72 @@ fn zirSplat(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I
 
 fn zirReduce(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
     const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
-    const src = inst_data.src();
-    return sema.fail(block, src, "TODO: Sema.zirReduce", .{});
+    const extra = sema.code.extraData(Zir.Inst.Bin, inst_data.payload_index).data;
+    const op_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
+    const operand_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const operation = try sema.resolveBuiltinEnum(block, op_src, extra.lhs, "ReduceOp");
+    const operand = sema.resolveInst(extra.rhs);
+    const operand_ty = sema.typeOf(operand);
+
+    if (operand_ty.zigTypeTag() != .Vector) {
+        return sema.fail(block, operand_src, "expected vector, found {}", .{operand_ty});
+    }
+
+    const scalar_ty = operand_ty.childType();
+
+    // Type-check depending on operation.
+    switch (operation) {
+        .And, .Or, .Xor => switch (scalar_ty.zigTypeTag()) {
+            .Int, .Bool => {},
+            else => return sema.fail(block, operand_src, "@reduce operation '{s}' requires integer or boolean operand; found {}", .{
+                @tagName(operation), operand_ty,
+            }),
+        },
+        .Min, .Max, .Add, .Mul => switch (scalar_ty.zigTypeTag()) {
+            .Int, .Float => {},
+            else => return sema.fail(block, operand_src, "@reduce operation '{s}' requires integer or float operand; found {}", .{
+                @tagName(operation), operand_ty,
+            }),
+        },
+    }
+
+    const vec_len = operand_ty.vectorLen();
+    if (vec_len == 0) {
+        // TODO re-evaluate if we should introduce a "neutral value" for some operations,
+        // e.g. zero for add and one for mul.
+        return sema.fail(block, operand_src, "@reduce operation requires a vector with nonzero length", .{});
+    }
+
+    if (try sema.resolveMaybeUndefVal(block, operand_src, operand)) |operand_val| {
+        if (operand_val.isUndef()) return sema.addConstUndef(scalar_ty);
+
+        const target = sema.mod.getTarget();
+        var accum: Value = try operand_val.elemValue(sema.arena, 0);
+        var elem_buf: Value.ElemValueBuffer = undefined;
+        var i: u32 = 1;
+        while (i < vec_len) : (i += 1) {
+            const elem_val = operand_val.elemValueBuffer(i, &elem_buf);
+            switch (operation) {
+                .And => accum = try accum.bitwiseAnd(elem_val, sema.arena),
+                .Or => accum = try accum.bitwiseOr(elem_val, sema.arena),
+                .Xor => accum = try accum.bitwiseXor(elem_val, sema.arena),
+                .Min => accum = accum.numberMin(elem_val),
+                .Max => accum = accum.numberMax(elem_val),
+                .Add => accum = try accum.numberAddWrap(elem_val, scalar_ty, sema.arena, target),
+                .Mul => accum = try accum.numberMulWrap(elem_val, scalar_ty, sema.arena, target),
+            }
+        }
+        return sema.addConstant(scalar_ty, accum);
+    }
+
+    try sema.requireRuntimeBlock(block, operand_src);
+    return block.addInst(.{
+        .tag = .reduce,
+        .data = .{ .reduce = .{
+            .operand = operand,
+            .operation = operation,
+        } },
+    });
 }
 
 fn zirShuffle(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -14425,8 +14495,8 @@ fn zirAtomicRmw(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
                 .Nand => try stored_val.bitwiseNand  (operand_val, operand_ty, sema.arena, target),
                 .Or   => try stored_val.bitwiseOr    (operand_val,             sema.arena),
                 .Xor  => try stored_val.bitwiseXor   (operand_val,             sema.arena),
-                .Max  => try stored_val.numberMax    (operand_val),
-                .Min  => try stored_val.numberMin    (operand_val),
+                .Max  =>     stored_val.numberMax    (operand_val),
+                .Min  =>     stored_val.numberMin    (operand_val),
                 // zig fmt: on
             };
             try sema.storePtrVal(block, src, ptr_val, new_val, operand_ty);
@@ -14760,7 +14830,7 @@ fn analyzeMinMax(
             else => unreachable,
         };
         const vec_len = simd_op.len orelse {
-            const result_val = try opFunc(lhs_val, rhs_val);
+            const result_val = opFunc(lhs_val, rhs_val);
             return sema.addConstant(simd_op.result_ty, result_val);
         };
         var lhs_buf: Value.ElemValueBuffer = undefined;
@@ -14769,7 +14839,7 @@ fn analyzeMinMax(
         for (elems) |*elem, i| {
             const lhs_elem_val = lhs_val.elemValueBuffer(i, &lhs_buf);
             const rhs_elem_val = rhs_val.elemValueBuffer(i, &rhs_buf);
-            elem.* = try opFunc(lhs_elem_val, rhs_elem_val);
+            elem.* = opFunc(lhs_elem_val, rhs_elem_val);
         }
         return sema.addConstant(
             simd_op.result_ty,
@@ -19246,9 +19316,9 @@ fn cmpNumeric(
     const rhs_ty_tag = rhs_ty.zigTypeTag();
 
     if (lhs_ty_tag == .Vector and rhs_ty_tag == .Vector) {
-        if (lhs_ty.arrayLen() != rhs_ty.arrayLen()) {
+        if (lhs_ty.vectorLen() != rhs_ty.vectorLen()) {
             return sema.fail(block, src, "vector length mismatch: {d} and {d}", .{
-                lhs_ty.arrayLen(), rhs_ty.arrayLen(),
+                lhs_ty.vectorLen(), rhs_ty.vectorLen(),
             });
         }
         return sema.fail(block, src, "TODO implement support for vectors in cmpNumeric", .{});

@@ -1841,6 +1841,8 @@ pub const Value = extern union {
         return orderAgainstZero(lhs).compare(op);
     }
 
+    /// This function is used by hash maps and so treats floating-point NaNs as equal
+    /// to each other, and not equal to other floating-point values.
     pub fn eql(a: Value, b: Value, ty: Type) bool {
         const a_tag = a.tag();
         const b_tag = b.tag();
@@ -2006,10 +2008,20 @@ pub const Value = extern union {
                 // end up here and the values are equal if the type has zero fields.
                 return ty.structFieldCount() != 0;
             },
+            .Float => {
+                const a_nan = a.isNan();
+                const b_nan = b.isNan();
+                if (a_nan or b_nan) {
+                    return a_nan and b_nan;
+                }
+                return order(a, b).compare(.eq);
+            },
             else => return order(a, b).compare(.eq),
         }
     }
 
+    /// This function is used by hash maps and so treats floating-point NaNs as equal
+    /// to each other, and not equal to other floating-point values.
     pub fn hash(val: Value, ty: Type, hasher: *std.hash.Wyhash) void {
         const zig_ty_tag = ty.zigTypeTag();
         std.hash.autoHash(hasher, zig_ty_tag);
@@ -2030,10 +2042,18 @@ pub const Value = extern union {
                 return val.toType(&buf).hashWithHasher(hasher);
             },
             .Float, .ComptimeFloat => {
-                // TODO double check the lang spec. should we to bitwise hashing here,
-                // or a hash that normalizes the float value?
+                // Normalize the float here because this hash must match eql semantics.
+                // These functions are used for hash maps so we want NaN to equal itself,
+                // and -0.0 to equal +0.0.
                 const float = val.toFloat(f128);
-                std.hash.autoHash(hasher, @bitCast(u128, float));
+                if (std.math.isNan(float)) {
+                    std.hash.autoHash(hasher, std.math.nan_u128);
+                } else if (float == 0.0) {
+                    var normalized_zero: f128 = 0.0;
+                    std.hash.autoHash(hasher, @bitCast(u128, normalized_zero));
+                } else {
+                    std.hash.autoHash(hasher, @bitCast(u128, float));
+                }
             },
             .Bool, .Int, .ComptimeInt, .Pointer => switch (val.tag()) {
                 .slice => {
@@ -2948,7 +2968,7 @@ pub const Value = extern union {
     }
 
     /// Supports both floats and ints; handles undefined.
-    pub fn numberMax(lhs: Value, rhs: Value) !Value {
+    pub fn numberMax(lhs: Value, rhs: Value) Value {
         if (lhs.isUndef() or rhs.isUndef()) return undef;
         if (lhs.isNan()) return rhs;
         if (rhs.isNan()) return lhs;
@@ -2960,7 +2980,7 @@ pub const Value = extern union {
     }
 
     /// Supports both floats and ints; handles undefined.
-    pub fn numberMin(lhs: Value, rhs: Value) !Value {
+    pub fn numberMin(lhs: Value, rhs: Value) Value {
         if (lhs.isUndef() or rhs.isUndef()) return undef;
         if (lhs.isNan()) return rhs;
         if (rhs.isNan()) return lhs;

@@ -520,15 +520,20 @@ test "vector shift operators" {
 }
 
 test "vector reduce operation" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
     const S = struct {
-        fn doTheTestReduce(comptime op: std.builtin.ReduceOp, x: anytype, expected: anytype) !void {
+        fn testReduce(comptime op: std.builtin.ReduceOp, x: anytype, expected: anytype) !void {
             const N = @typeInfo(@TypeOf(x)).Array.len;
             const TX = @typeInfo(@TypeOf(x)).Array.child;
 
-            var r = @reduce(op, @as(Vector(N, TX), x));
+            var r = @reduce(op, @as(@Vector(N, TX), x));
             switch (@typeInfo(TX)) {
-                .Int, .Bool => try expectEqual(expected, r),
+                .Int, .Bool => try expect(expected == r),
                 .Float => {
                     const expected_nan = math.isNan(expected);
                     const got_nan = math.isNan(r);
@@ -537,117 +542,119 @@ test "vector reduce operation" {
                         // Do this check explicitly as two NaN values are never
                         // equal.
                     } else {
-                        try expectApproxEqRel(expected, r, math.sqrt(math.epsilon(TX)));
+                        const F = @TypeOf(expected);
+                        const tolerance = @sqrt(math.epsilon(TX));
+                        try expect(std.math.approxEqRel(F, expected, r, tolerance));
                     }
                 },
                 else => unreachable,
             }
         }
         fn doTheTest() !void {
-            try doTheTestReduce(.Add, [4]i16{ -9, -99, -999, -9999 }, @as(i32, -11106));
-            try doTheTestReduce(.Add, [4]u16{ 9, 99, 999, 9999 }, @as(u32, 11106));
-            try doTheTestReduce(.Add, [4]i32{ -9, -99, -999, -9999 }, @as(i32, -11106));
-            try doTheTestReduce(.Add, [4]u32{ 9, 99, 999, 9999 }, @as(u32, 11106));
-            try doTheTestReduce(.Add, [4]i64{ -9, -99, -999, -9999 }, @as(i64, -11106));
-            try doTheTestReduce(.Add, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 11106));
-            try doTheTestReduce(.Add, [4]i128{ -9, -99, -999, -9999 }, @as(i128, -11106));
-            try doTheTestReduce(.Add, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 11106));
-            try doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 42.9));
-            try doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 42.9));
-            try doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 42.9));
-
-            try doTheTestReduce(.And, [4]bool{ true, false, true, true }, @as(bool, false));
-            try doTheTestReduce(.And, [4]u1{ 1, 0, 1, 1 }, @as(u1, 0));
-            try doTheTestReduce(.And, [4]u16{ 0xffff, 0xff55, 0xaaff, 0x1010 }, @as(u16, 0x10));
-            try doTheTestReduce(.And, [4]u32{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u32, 0x1010));
-            try doTheTestReduce(.And, [4]u64{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u64, 0x1010));
-
-            try doTheTestReduce(.Min, [4]i16{ -1, 2, 3, 4 }, @as(i16, -1));
-            try doTheTestReduce(.Min, [4]u16{ 1, 2, 3, 4 }, @as(u16, 1));
-            try doTheTestReduce(.Min, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, -386));
-            try doTheTestReduce(.Min, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 9));
+            try testReduce(.Add, [4]i16{ -9, -99, -999, -9999 }, @as(i32, -11106));
+            try testReduce(.Add, [4]u16{ 9, 99, 999, 9999 }, @as(u32, 11106));
+            try testReduce(.Add, [4]i32{ -9, -99, -999, -9999 }, @as(i32, -11106));
+            try testReduce(.Add, [4]u32{ 9, 99, 999, 9999 }, @as(u32, 11106));
+            try testReduce(.Add, [4]i64{ -9, -99, -999, -9999 }, @as(i64, -11106));
+            try testReduce(.Add, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 11106));
+            try testReduce(.Add, [4]i128{ -9, -99, -999, -9999 }, @as(i128, -11106));
+            try testReduce(.Add, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 11106));
+            try testReduce(.Add, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 42.9));
+            try testReduce(.Add, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 42.9));
+            try testReduce(.Add, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 42.9));
+
+            try testReduce(.And, [4]bool{ true, false, true, true }, @as(bool, false));
+            try testReduce(.And, [4]u1{ 1, 0, 1, 1 }, @as(u1, 0));
+            try testReduce(.And, [4]u16{ 0xffff, 0xff55, 0xaaff, 0x1010 }, @as(u16, 0x10));
+            try testReduce(.And, [4]u32{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u32, 0x1010));
+            try testReduce(.And, [4]u64{ 0xffffffff, 0xffff5555, 0xaaaaffff, 0x10101010 }, @as(u64, 0x1010));
+
+            try testReduce(.Min, [4]i16{ -1, 2, 3, 4 }, @as(i16, -1));
+            try testReduce(.Min, [4]u16{ 1, 2, 3, 4 }, @as(u16, 1));
+            try testReduce(.Min, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, -386));
+            try testReduce(.Min, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 9));
 
             // LLVM 11 ERROR: Cannot select type
             // https://github.com/ziglang/zig/issues/7138
             if (builtin.target.cpu.arch != .aarch64) {
-                try doTheTestReduce(.Min, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, -386));
-                try doTheTestReduce(.Min, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 9));
+                try testReduce(.Min, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, -386));
+                try testReduce(.Min, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 9));
             }
 
-            try doTheTestReduce(.Min, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, -386));
-            try doTheTestReduce(.Min, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 9));
-            try doTheTestReduce(.Min, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, -100.0));
-            try doTheTestReduce(.Min, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, -100.0));
-            try doTheTestReduce(.Min, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, -100.0));
+            try testReduce(.Min, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, -386));
+            try testReduce(.Min, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 9));
+            try testReduce(.Min, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, -100.0));
+            try testReduce(.Min, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, -100.0));
+            try testReduce(.Min, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, -100.0));
 
-            try doTheTestReduce(.Max, [4]i16{ -1, 2, 3, 4 }, @as(i16, 4));
-            try doTheTestReduce(.Max, [4]u16{ 1, 2, 3, 4 }, @as(u16, 4));
-            try doTheTestReduce(.Max, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, 1234567));
-            try doTheTestReduce(.Max, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 99999));
+            try testReduce(.Max, [4]i16{ -1, 2, 3, 4 }, @as(i16, 4));
+            try testReduce(.Max, [4]u16{ 1, 2, 3, 4 }, @as(u16, 4));
+            try testReduce(.Max, [4]i32{ 1234567, -386, 0, 3 }, @as(i32, 1234567));
+            try testReduce(.Max, [4]u32{ 99, 9999, 9, 99999 }, @as(u32, 99999));
 
             // LLVM 11 ERROR: Cannot select type
             // https://github.com/ziglang/zig/issues/7138
             if (builtin.target.cpu.arch != .aarch64) {
-                try doTheTestReduce(.Max, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, 1234567));
-                try doTheTestReduce(.Max, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 99999));
+                try testReduce(.Max, [4]i64{ 1234567, -386, 0, 3 }, @as(i64, 1234567));
+                try testReduce(.Max, [4]u64{ 99, 9999, 9, 99999 }, @as(u64, 99999));
             }
 
-            try doTheTestReduce(.Max, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, 1234567));
-            try doTheTestReduce(.Max, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 99999));
-            try doTheTestReduce(.Max, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, 10.0e9));
-            try doTheTestReduce(.Max, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, 10.0e9));
-            try doTheTestReduce(.Max, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, 10.0e9));
-
-            try doTheTestReduce(.Mul, [4]i16{ -1, 2, 3, 4 }, @as(i16, -24));
-            try doTheTestReduce(.Mul, [4]u16{ 1, 2, 3, 4 }, @as(u16, 24));
-            try doTheTestReduce(.Mul, [4]i32{ -9, -99, -999, 999 }, @as(i32, -889218891));
-            try doTheTestReduce(.Mul, [4]u32{ 1, 2, 3, 4 }, @as(u32, 24));
-            try doTheTestReduce(.Mul, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 8900199891));
-            try doTheTestReduce(.Mul, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 8900199891));
-            try doTheTestReduce(.Mul, [4]i128{ -9, -99, -999, 9999 }, @as(i128, -8900199891));
-            try doTheTestReduce(.Mul, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 8900199891));
-            try doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 58430.7));
-            try doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 58430.7));
-            try doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 58430.7));
-
-            try doTheTestReduce(.Or, [4]bool{ false, true, false, false }, @as(bool, true));
-            try doTheTestReduce(.Or, [4]u1{ 0, 1, 0, 0 }, @as(u1, 1));
-            try doTheTestReduce(.Or, [4]u16{ 0xff00, 0xff00, 0xf0, 0xf }, ~@as(u16, 0));
-            try doTheTestReduce(.Or, [4]u32{ 0xffff0000, 0xff00, 0xf0, 0xf }, ~@as(u32, 0));
-            try doTheTestReduce(.Or, [4]u64{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u64, 0xffffffff));
-            try doTheTestReduce(.Or, [4]u128{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u128, 0xffffffff));
-
-            try doTheTestReduce(.Xor, [4]bool{ true, true, true, false }, @as(bool, true));
-            try doTheTestReduce(.Xor, [4]u1{ 1, 1, 1, 0 }, @as(u1, 1));
-            try doTheTestReduce(.Xor, [4]u16{ 0x0000, 0x3333, 0x8888, 0x4444 }, ~@as(u16, 0));
-            try doTheTestReduce(.Xor, [4]u32{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, ~@as(u32, 0));
-            try doTheTestReduce(.Xor, [4]u64{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u64, 0xffffffff));
-            try doTheTestReduce(.Xor, [4]u128{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u128, 0xffffffff));
+            try testReduce(.Max, [4]i128{ 1234567, -386, 0, 3 }, @as(i128, 1234567));
+            try testReduce(.Max, [4]u128{ 99, 9999, 9, 99999 }, @as(u128, 99999));
+            try testReduce(.Max, [4]f16{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f16, 10.0e9));
+            try testReduce(.Max, [4]f32{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f32, 10.0e9));
+            try testReduce(.Max, [4]f64{ -10.3, 10.0e9, 13.0, -100.0 }, @as(f64, 10.0e9));
+
+            try testReduce(.Mul, [4]i16{ -1, 2, 3, 4 }, @as(i16, -24));
+            try testReduce(.Mul, [4]u16{ 1, 2, 3, 4 }, @as(u16, 24));
+            try testReduce(.Mul, [4]i32{ -9, -99, -999, 999 }, @as(i32, -889218891));
+            try testReduce(.Mul, [4]u32{ 1, 2, 3, 4 }, @as(u32, 24));
+            try testReduce(.Mul, [4]i64{ 9, 99, 999, 9999 }, @as(i64, 8900199891));
+            try testReduce(.Mul, [4]u64{ 9, 99, 999, 9999 }, @as(u64, 8900199891));
+            try testReduce(.Mul, [4]i128{ -9, -99, -999, 9999 }, @as(i128, -8900199891));
+            try testReduce(.Mul, [4]u128{ 9, 99, 999, 9999 }, @as(u128, 8900199891));
+            try testReduce(.Mul, [4]f16{ -1.9, 5.1, -60.3, 100.0 }, @as(f16, 58430.7));
+            try testReduce(.Mul, [4]f32{ -1.9, 5.1, -60.3, 100.0 }, @as(f32, 58430.7));
+            try testReduce(.Mul, [4]f64{ -1.9, 5.1, -60.3, 100.0 }, @as(f64, 58430.7));
+
+            try testReduce(.Or, [4]bool{ false, true, false, false }, @as(bool, true));
+            try testReduce(.Or, [4]u1{ 0, 1, 0, 0 }, @as(u1, 1));
+            try testReduce(.Or, [4]u16{ 0xff00, 0xff00, 0xf0, 0xf }, ~@as(u16, 0));
+            try testReduce(.Or, [4]u32{ 0xffff0000, 0xff00, 0xf0, 0xf }, ~@as(u32, 0));
+            try testReduce(.Or, [4]u64{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u64, 0xffffffff));
+            try testReduce(.Or, [4]u128{ 0xffff0000, 0xff00, 0xf0, 0xf }, @as(u128, 0xffffffff));
+
+            try testReduce(.Xor, [4]bool{ true, true, true, false }, @as(bool, true));
+            try testReduce(.Xor, [4]u1{ 1, 1, 1, 0 }, @as(u1, 1));
+            try testReduce(.Xor, [4]u16{ 0x0000, 0x3333, 0x8888, 0x4444 }, ~@as(u16, 0));
+            try testReduce(.Xor, [4]u32{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, ~@as(u32, 0));
+            try testReduce(.Xor, [4]u64{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u64, 0xffffffff));
+            try testReduce(.Xor, [4]u128{ 0x00000000, 0x33333333, 0x88888888, 0x44444444 }, @as(u128, 0xffffffff));
 
             // Test the reduction on vectors containing NaNs.
             const f16_nan = math.nan(f16);
             const f32_nan = math.nan(f32);
             const f64_nan = math.nan(f64);
 
-            try doTheTestReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-            try doTheTestReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-            try doTheTestReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+            try testReduce(.Add, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+            try testReduce(.Add, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+            try testReduce(.Add, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
 
             // LLVM 11 ERROR: Cannot select type
             // https://github.com/ziglang/zig/issues/7138
             if (false) {
-                try doTheTestReduce(.Min, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-                try doTheTestReduce(.Min, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-                try doTheTestReduce(.Min, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+                try testReduce(.Min, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+                try testReduce(.Min, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+                try testReduce(.Min, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
 
-                try doTheTestReduce(.Max, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-                try doTheTestReduce(.Max, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-                try doTheTestReduce(.Max, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+                try testReduce(.Max, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+                try testReduce(.Max, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+                try testReduce(.Max, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
             }
 
-            try doTheTestReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
-            try doTheTestReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
-            try doTheTestReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
+            try testReduce(.Mul, [4]f16{ -1.9, 5.1, f16_nan, 100.0 }, f16_nan);
+            try testReduce(.Mul, [4]f32{ -1.9, 5.1, f32_nan, 100.0 }, f32_nan);
+            try testReduce(.Mul, [4]f64{ -1.9, 5.1, f64_nan, 100.0 }, f64_nan);
         }
     };

Commit 7233a3324a

Commit `7233a3324a`