Commit a3b1ba82f5

Robin Voetter <robin@voetter.nl>
2024-06-03 00:44:08
spirv: new vectorization helper
The old vectorization helper (WipElementWise) was clunky and a bit annoying to use, and it wasn't really flexible enough. This introduces a new vectorization helper, which uses Temporary and Operation types to deduce a Vectorization to perform the operation in a reasonably efficient manner. It removes the outer loop required by WipElementWise so that implementations of AIR instructions are cleaner. This helps with sanity when we start to introduce support for composite integers. airShift, convertToDirect, convertToIndirect, and normalize are initially implemented using this new method.
1 parent 4e7159a
Changed files (7)
src/codegen/spirv/Module.zig
@@ -155,6 +155,9 @@ cache: struct {
     void_type: ?IdRef = null,
     int_types: std.AutoHashMapUnmanaged(std.builtin.Type.Int, IdRef) = .{},
     float_types: std.AutoHashMapUnmanaged(std.builtin.Type.Float, IdRef) = .{},
+    // This cache is required so that @Vector(X, u1) in direct representation has the
+    // same ID as @Vector(X, bool) in indirect representation.
+    vector_types: std.AutoHashMapUnmanaged(struct { IdRef, u32 }, IdRef) = .{},
 } = .{},
 
 /// Set of Decls, referred to by Decl.Index.
@@ -194,6 +197,7 @@ pub fn deinit(self: *Module) void {
 
     self.cache.int_types.deinit(self.gpa);
     self.cache.float_types.deinit(self.gpa);
+    self.cache.vector_types.deinit(self.gpa);
 
     self.decls.deinit(self.gpa);
     self.decl_deps.deinit(self.gpa);
@@ -474,13 +478,17 @@ pub fn floatType(self: *Module, bits: u16) !IdRef {
 }
 
 pub fn vectorType(self: *Module, len: u32, child_id: IdRef) !IdRef {
-    const result_id = self.allocId();
-    try self.sections.types_globals_constants.emit(self.gpa, .OpTypeVector, .{
-        .id_result = result_id,
-        .component_type = child_id,
-        .component_count = len,
-    });
-    return result_id;
+    const entry = try self.cache.vector_types.getOrPut(self.gpa, .{ child_id, len });
+    if (!entry.found_existing) {
+        const result_id = self.allocId();
+        entry.value_ptr.* = result_id;
+        try self.sections.types_globals_constants.emit(self.gpa, .OpTypeVector, .{
+            .id_result = result_id,
+            .component_type = child_id,
+            .component_count = len,
+        });
+    }
+    return entry.value_ptr.*;
 }
 
 pub fn constUndef(self: *Module, ty_id: IdRef) !IdRef {
src/codegen/spirv.zig
@@ -3,6 +3,7 @@ const Allocator = std.mem.Allocator;
 const Target = std.Target;
 const log = std.log.scoped(.codegen);
 const assert = std.debug.assert;
+const Signedness = std.builtin.Signedness;
 
 const Module = @import("../Module.zig");
 const Decl = Module.Decl;
@@ -423,6 +424,17 @@ const DeclGen = struct {
         return self.fail("TODO (SPIR-V): " ++ format, args);
     }
 
+    /// This imports the "default" extended instruction set for the target
+    /// For OpenCL, OpenCL.std.100. For Vulkan, GLSL.std.450.
+    fn importExtendedSet(self: *DeclGen) !IdResult {
+        const target = self.getTarget();
+        return switch (target.os.tag) {
+            .opencl => try self.spv.importInstructionSet(.@"OpenCL.std"),
+            .vulkan => try self.spv.importInstructionSet(.@"GLSL.std.450"),
+            else => unreachable,
+        };
+    }
+
     /// Fetch the result-id for a previously generated instruction or constant.
     fn resolve(self: *DeclGen, inst: Air.Inst.Ref) !IdRef {
         const mod = self.module;
@@ -631,6 +643,19 @@ const DeclGen = struct {
         const mod = self.module;
         const target = self.getTarget();
         if (ty.zigTypeTag(mod) != .Vector) return false;
+
+        // TODO: This check must be expanded for types that can be represented
+        // as integers (enums / packed structs?) and types that are represented
+        // by multiple SPIR-V values.
+        const scalar_ty = ty.scalarType(mod);
+        switch (scalar_ty.zigTypeTag(mod)) {
+            .Bool,
+            .Int,
+            .Float,
+            => {},
+            else => return false,
+        }
+
         const elem_ty = ty.childType(mod);
 
         const len = ty.vectorLen(mod);
@@ -723,9 +748,13 @@ const DeclGen = struct {
         // Use backing bits so that negatives are sign extended
         const backing_bits = self.backingIntBits(int_info.bits).?; // Assertion failure means big int
 
-        const bits: u64 = switch (int_info.signedness) {
-            // Intcast needed to silence compile errors for when the wrong path is compiled.
-            // Lazy fix.
+        const signedness: Signedness = switch (@typeInfo(@TypeOf(value))) {
+            .Int => |int| int.signedness,
+            .ComptimeInt => if (value < 0) .signed else .unsigned,
+            else => unreachable,
+        };
+
+        const bits: u64 = switch (signedness) {
             .signed => @bitCast(@as(i64, @intCast(value))),
             .unsigned => @as(u64, @intCast(value)),
         };
@@ -1392,6 +1421,19 @@ const DeclGen = struct {
         return ty_id;
     }
 
+    fn zigScalarOrVectorTypeLike(self: *DeclGen, new_ty: Type, base_ty: Type) !Type {
+        const mod = self.module;
+        const new_scalar_ty = new_ty.scalarType(mod);
+        if (!base_ty.isVector(mod)) {
+            return new_scalar_ty;
+        }
+
+        return try mod.vectorType(.{
+            .len = base_ty.vectorLen(mod),
+            .child = new_scalar_ty.toIntern(),
+        });
+    }
+
     /// Generate a union type. Union types are always generated with the
     /// most aligned field active. If the tag alignment is greater
     /// than that of the payload, a regular union (non-packed, with both tag and
@@ -1928,77 +1970,897 @@ const DeclGen = struct {
         return union_layout;
     }
 
-    /// This structure is used as helper for element-wise operations. It is intended
-    /// to be used with vectors, fake vectors (arrays) and single elements.
-    const WipElementWise = struct {
-        dg: *DeclGen,
-        result_ty: Type,
+    /// This structure represents a "temporary" value: Something we are currently
+    /// operating on. It typically lives no longer than the function that
+    /// implements a particular AIR operation. These are used to easier
+    /// implement vectorizable operations (see Vectorization and the build*
+    /// functions), and typically are only used for vectors of primitive types.
+    const Temporary = struct {
+        /// The type of the temporary. This is here mainly
+        /// for easier bookkeeping. Because we will never really
+        /// store Temporaries, they only cause extra stack space,
+        /// therefore no real storage is wasted.
         ty: Type,
-        /// Always in direct representation.
-        ty_id: IdRef,
-        /// True if the input is an array type.
-        is_array: bool,
-        /// The element-wise operation should fill these results before calling finalize().
-        /// These should all be in **direct** representation! `finalize()` will convert
-        /// them to indirect if required.
-        results: []IdRef,
-
-        fn deinit(wip: *WipElementWise) void {
-            wip.dg.gpa.free(wip.results);
-        }
-
-        /// Utility function to extract the element at a particular index in an
-        /// input array. This type is expected to be a fake vector (array) if `wip.is_array`, and
-        /// a vector or scalar otherwise.
-        fn elementAt(wip: WipElementWise, ty: Type, value: IdRef, index: usize) !IdRef {
-            const mod = wip.dg.module;
-            if (wip.is_array) {
-                assert(ty.isVector(mod));
-                return try wip.dg.extractVectorComponent(ty.childType(mod), value, @intCast(index));
+        /// The value that this temporary holds. This is not necessarily
+        /// a value that is actually usable, or a single value: It is virtual
+        /// until materialize() is called, at which point is turned into
+        /// the usual SPIR-V representation of `self.ty`.
+        value: Temporary.Value,
+
+        const Value = union(enum) {
+            singleton: IdResult,
+            exploded_vector: IdRange,
+        };
+
+        fn init(ty: Type, singleton: IdResult) Temporary {
+            return .{ .ty = ty, .value = .{ .singleton = singleton } };
+        }
+
+        fn materialize(self: Temporary, dg: *DeclGen) !IdResult {
+            const mod = dg.module;
+            switch (self.value) {
+                .singleton => |id| return id,
+                .exploded_vector => |range| {
+                    assert(self.ty.isVector(mod));
+                    assert(self.ty.vectorLen(mod) == range.len);
+                    const consituents = try dg.gpa.alloc(IdRef, range.len);
+                    defer dg.gpa.free(consituents);
+                    for (consituents, 0..range.len) |*id, i| {
+                        id.* = range.at(i);
+                    }
+                    return dg.constructVector(self.ty, consituents);
+                },
+            }
+        }
+
+        fn vectorization(self: Temporary, dg: *DeclGen) Vectorization {
+            return Vectorization.fromType(self.ty, dg);
+        }
+
+        fn pun(self: Temporary, new_ty: Type) Temporary {
+            return .{
+                .ty = new_ty,
+                .value = self.value,
+            };
+        }
+
+        /// 'Explode' a temporary into separate elements. This turns a vector
+        /// into a bag of elements.
+        fn explode(self: Temporary, dg: *DeclGen) !IdRange {
+            const mod = dg.module;
+
+            // If the value is a scalar, then this is a no-op.
+            if (!self.ty.isVector(mod)) {
+                return switch (self.value) {
+                    .singleton => |id| IdRange{ .base = @intFromEnum(id), .len = 1 },
+                    .exploded_vector => |range| range,
+                };
+            }
+
+            const ty_id = try dg.resolveType(self.ty.scalarType(mod), .direct);
+            const n = self.ty.vectorLen(mod);
+            const results = dg.spv.allocIds(n);
+
+            const id = switch (self.value) {
+                .singleton => |id| id,
+                .exploded_vector => |range| return range,
+            };
+
+            for (0..n) |i| {
+                const indexes = [_]u32{@intCast(i)};
+                try dg.func.body.emit(dg.spv.gpa, .OpCompositeExtract, .{
+                    .id_result_type = ty_id,
+                    .id_result = results.at(i),
+                    .composite = id,
+                    .indexes = &indexes,
+                });
+            }
+
+            return results;
+        }
+    };
+
+    /// Initialize a `Temporary` from an AIR value.
+    fn temporary(self: *DeclGen, inst: Air.Inst.Ref) !Temporary {
+        return .{
+            .ty = self.typeOf(inst),
+            .value = .{ .singleton = try self.resolve(inst) },
+        };
+    }
+
+    /// This union describes how a particular operation should be vectorized.
+    /// That depends on the operation and number of components of the inputs.
+    const Vectorization = union(enum) {
+        /// This is an operation between scalars.
+        scalar,
+        /// This is an operation between SPIR-V vectors.
+        /// Value is number of components.
+        spv_vectorized: u32,
+        /// This operation is unrolled into separate operations.
+        /// Inputs may still be SPIR-V vectors, for example,
+        /// when the operation can't be vectorized in SPIR-V.
+        /// Value is number of components.
+        unrolled: u32,
+
+        /// Derive a vectorization from a particular type. This usually
+        /// only checks the size, but the source-of-truth is implemented
+        /// by `isSpvVector()`.
+        fn fromType(ty: Type, dg: *DeclGen) Vectorization {
+            const mod = dg.module;
+            if (!ty.isVector(mod)) {
+                return .scalar;
+            } else if (dg.isSpvVector(ty)) {
+                return .{ .spv_vectorized = ty.vectorLen(mod) };
             } else {
-                assert(index == 0);
-                return value;
+                return .{ .unrolled = ty.vectorLen(mod) };
             }
         }
 
-        /// Turns the results of this WipElementWise into a result. This can be
-        /// vectors, fake vectors (arrays) and single elements, depending on `result_ty`.
-        /// After calling this function, this WIP is no longer usable.
-        /// Results is in `direct` representation.
-        fn finalize(wip: *WipElementWise) !IdRef {
-            if (wip.is_array) {
-                return try wip.dg.constructVector(wip.result_ty, wip.results);
+        /// Given two vectorization methods, compute a "unification": a fallback
+        /// that works for both, according to the following rules:
+        /// - Scalars may broadcast
+        /// - SPIR-V vectorized operations may unroll
+        /// - Prefer scalar > SPIR-V vectorized > unrolled
+        fn unify(a: Vectorization, b: Vectorization) Vectorization {
+            if (a == .scalar and b == .scalar) {
+                return .scalar;
+            } else if (a == .spv_vectorized and b == .spv_vectorized) {
+                assert(a.components() == b.components());
+                return .{ .spv_vectorized = a.components() };
+            } else if (a == .unrolled or b == .unrolled) {
+                if (a == .unrolled and b == .unrolled) {
+                    assert(a.components() == b.components());
+                    return .{ .unrolled = a.components() };
+                } else if (a == .unrolled) {
+                    return .{ .unrolled = a.components() };
+                } else if (b == .unrolled) {
+                    return .{ .unrolled = b.components() };
+                } else {
+                    unreachable;
+                }
             } else {
-                return wip.results[0];
+                if (a == .spv_vectorized) {
+                    return .{ .spv_vectorized = a.components() };
+                } else if (b == .spv_vectorized) {
+                    return .{ .spv_vectorized = b.components() };
+                } else {
+                    unreachable;
+                }
             }
         }
 
-        /// Allocate a result id at a particular index, and return it.
-        fn allocId(wip: *WipElementWise, index: usize) IdRef {
-            assert(wip.is_array or index == 0);
-            wip.results[index] = wip.dg.spv.allocId();
-            return wip.results[index];
+        /// Force this vectorization to be unrolled, if its
+        /// an operation involving vectors.
+        fn unroll(self: Vectorization) Vectorization {
+            return switch (self) {
+                .scalar, .unrolled => self,
+                .spv_vectorized => |n| .{ .unrolled = n },
+            };
+        }
+
+        /// Query the number of components that inputs of this operation have.
+        /// Note: for broadcasting scalars, this returns the number of elements
+        /// that the broadcasted vector would have.
+        fn components(self: Vectorization) u32 {
+            return switch (self) {
+                .scalar => 1,
+                .spv_vectorized => |n| n,
+                .unrolled => |n| n,
+            };
+        }
+
+        /// Query the number of operations involving this vectorization.
+        /// This is basically the number of components, except that SPIR-V vectorized
+        /// operations only need a single SPIR-V instruction.
+        fn operations(self: Vectorization) u32 {
+            return switch (self) {
+                .scalar, .spv_vectorized => 1,
+                .unrolled => |n| n,
+            };
+        }
+
+        /// Turns `ty` into the result-type of an individual vector operation.
+        /// `ty` may be a scalar or vector, it doesn't matter.
+        fn operationType(self: Vectorization, dg: *DeclGen, ty: Type) !Type {
+            const mod = dg.module;
+            const scalar_ty = ty.scalarType(mod);
+            return switch (self) {
+                .scalar, .unrolled => scalar_ty,
+                .spv_vectorized => |n| try mod.vectorType(.{
+                    .len = n,
+                    .child = scalar_ty.toIntern(),
+                }),
+            };
+        }
+
+        /// Turns `ty` into the result-type of the entire operation.
+        /// `ty` may be a scalar or vector, it doesn't matter.
+        fn resultType(self: Vectorization, dg: *DeclGen, ty: Type) !Type {
+            const mod = dg.module;
+            const scalar_ty = ty.scalarType(mod);
+            return switch (self) {
+                .scalar => scalar_ty,
+                .unrolled, .spv_vectorized => |n| try mod.vectorType(.{
+                    .len = n,
+                    .child = scalar_ty.toIntern(),
+                }),
+            };
+        }
+
+        /// Before a temporary can be used, some setup may need to be one. This function implements
+        /// this setup, and returns a new type that holds the relevant information on how to access
+        /// elements of the input.
+        fn prepare(self: Vectorization, dg: *DeclGen, tmp: Temporary) !PreparedOperand {
+            const mod = dg.module;
+            const is_vector = tmp.ty.isVector(mod);
+            const is_spv_vector = dg.isSpvVector(tmp.ty);
+            const value: PreparedOperand.Value = switch (tmp.value) {
+                .singleton => |id| switch (self) {
+                    .scalar => blk: {
+                        assert(!is_vector);
+                        break :blk .{ .scalar = id };
+                    },
+                    .spv_vectorized => blk: {
+                        if (is_vector) {
+                            assert(is_spv_vector);
+                            break :blk .{ .spv_vectorwise = id };
+                        }
+
+                        // Broadcast scalar into vector.
+                        const vector_ty = try mod.vectorType(.{
+                            .len = self.components(),
+                            .child = tmp.ty.toIntern(),
+                        });
+
+                        const vector = try dg.constructVectorSplat(vector_ty, id);
+                        return .{
+                            .ty = vector_ty,
+                            .value = .{ .spv_vectorwise = vector },
+                        };
+                    },
+                    .unrolled => blk: {
+                        if (is_vector) {
+                            break :blk .{ .vector_exploded = try tmp.explode(dg) };
+                        } else {
+                            break :blk .{ .scalar_broadcast = id };
+                        }
+                    },
+                },
+                .exploded_vector => |range| switch (self) {
+                    .scalar => unreachable,
+                    .spv_vectorized => |n| blk: {
+                        // We can vectorize this operation, but we have an exploded vector. This can happen
+                        // when a vectorizable operation succeeds a non-vectorizable operation. In this case,
+                        // pack up the IDs into a SPIR-V vector. This path should not be able to be hit with
+                        // a type that cannot do that.
+                        assert(is_spv_vector);
+                        assert(range.len == n);
+                        const vec = try tmp.materialize(dg);
+                        break :blk .{ .spv_vectorwise = vec };
+                    },
+                    .unrolled => |n| blk: {
+                        assert(range.len == n);
+                        break :blk .{ .vector_exploded = range };
+                    },
+                },
+            };
+
+            return .{
+                .ty = tmp.ty,
+                .value = value,
+            };
         }
+
+        /// Finalize the results of an operation back into a temporary. `results` is
+        /// a list of result-ids of the operation.
+        fn finalize(self: Vectorization, ty: Type, results: IdRange) Temporary {
+            assert(self.operations() == results.len);
+            const value: Temporary.Value = switch (self) {
+                .scalar, .spv_vectorized => blk: {
+                    break :blk .{ .singleton = results.at(0) };
+                },
+                .unrolled => blk: {
+                    break :blk .{ .exploded_vector = results };
+                },
+            };
+
+            return .{ .ty = ty, .value = value };
+        }
+
+        /// This struct represents an operand that has gone through some setup, and is
+        /// ready to be used as part of an operation.
+        const PreparedOperand = struct {
+            ty: Type,
+            value: PreparedOperand.Value,
+
+            /// The types of value that a prepared operand can hold internally. Depends
+            /// on the operation and input value.
+            const Value = union(enum) {
+                /// A single scalar value that is used by a scalar operation.
+                scalar: IdResult,
+                /// A single scalar that is broadcasted in an unrolled operation.
+                scalar_broadcast: IdResult,
+                /// A SPIR-V vector that is used in SPIR-V vectorize operation.
+                spv_vectorwise: IdResult,
+                /// A vector represented by a consecutive list of IDs that is used in an unrolled operation.
+                vector_exploded: IdRange,
+            };
+
+            /// Query the value at a particular index of the operation. Note that
+            /// the index is *not* the component/lane, but the index of the *operation*. When
+            /// this operation is vectorized, the return value of this function is a SPIR-V vector.
+            /// See also `Vectorization.operations()`.
+            fn at(self: PreparedOperand, i: usize) IdResult {
+                switch (self.value) {
+                    .scalar => |id| {
+                        assert(i == 0);
+                        return id;
+                    },
+                    .scalar_broadcast => |id| {
+                        return id;
+                    },
+                    .spv_vectorwise => |id| {
+                        assert(i == 0);
+                        return id;
+                    },
+                    .vector_exploded => |range| {
+                        return range.at(i);
+                    },
+                }
+            }
+        };
     };
 
-    /// Create a new element-wise operation.
-    fn elementWise(self: *DeclGen, result_ty: Type, force_element_wise: bool) !WipElementWise {
+    /// A utility function to compute the vectorization style of
+    /// a list of values. These values may be any of the following:
+    /// - A `Vectorization` instance
+    /// - A Type, in which case the vectorization is computed via `Vectorization.fromType`.
+    /// - A Temporary, in which case the vectorization is computed via `Temporary.vectorization`.
+    fn vectorization(self: *DeclGen, args: anytype) Vectorization {
+        var v: Vectorization = undefined;
+        assert(args.len >= 1);
+        inline for (args, 0..) |arg, i| {
+            const iv: Vectorization = switch (@TypeOf(arg)) {
+                Vectorization => arg,
+                Type => Vectorization.fromType(arg, self),
+                Temporary => arg.vectorization(self),
+                else => @compileError("invalid type"),
+            };
+            if (i == 0) {
+                v = iv;
+            } else {
+                v = v.unify(iv);
+            }
+        }
+        return v;
+    }
+
+    /// This function builds an OpSConvert of OpUConvert depending on the
+    /// signedness of the types.
+    fn buildIntConvert(self: *DeclGen, dst_ty: Type, src: Temporary) !Temporary {
         const mod = self.module;
-        const is_array = result_ty.isVector(mod) and (!self.isSpvVector(result_ty) or force_element_wise);
-        const num_results = if (is_array) result_ty.vectorLen(mod) else 1;
-        const results = try self.gpa.alloc(IdRef, num_results);
-        @memset(results, undefined);
 
-        const ty = if (is_array) result_ty.scalarType(mod) else result_ty;
-        const ty_id = try self.resolveType(ty, .direct);
+        const dst_ty_id = try self.resolveType(dst_ty.scalarType(mod), .direct);
+        const src_ty_id = try self.resolveType(src.ty.scalarType(mod), .direct);
+
+        const v = self.vectorization(.{ dst_ty, src });
+        const result_ty = try v.resultType(self, dst_ty);
+
+        // We can directly compare integers, because those type-IDs are cached.
+        if (dst_ty_id == src_ty_id) {
+            // Nothing to do, type-pun to the right value.
+            // Note, Caller guarantees that the types fit (or caller will normalize after),
+            // so we don't have to normalize here.
+            // Note, dst_ty may be a scalar type even if we expect a vector, so we have to
+            // convert to the right type here.
+            return src.pun(result_ty);
+        }
+
+        const ops = v.operations();
+        const results = self.spv.allocIds(ops);
+
+        const op_result_ty = try v.operationType(self, dst_ty);
+        const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
+
+        const opcode: Opcode = if (dst_ty.isSignedInt(mod)) .OpSConvert else .OpUConvert;
+
+        const op_src = try v.prepare(self, src);
+
+        for (0..ops) |i| {
+            try self.func.body.emitRaw(self.spv.gpa, opcode, 3);
+            self.func.body.writeOperand(spec.IdResultType, op_result_ty_id);
+            self.func.body.writeOperand(IdResult, results.at(i));
+            self.func.body.writeOperand(IdResult, op_src.at(i));
+        }
+
+        return v.finalize(result_ty, results);
+    }
+
+    fn buildFma(self: *DeclGen, a: Temporary, b: Temporary, c: Temporary) !Temporary {
+        const target = self.getTarget();
+
+        const v = self.vectorization(.{ a, b, c });
+        const ops = v.operations();
+        const results = self.spv.allocIds(ops);
+
+        const op_result_ty = try v.operationType(self, a.ty);
+        const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
+        const result_ty = try v.resultType(self, a.ty);
+
+        const op_a = try v.prepare(self, a);
+        const op_b = try v.prepare(self, b);
+        const op_c = try v.prepare(self, c);
+
+        const set = try self.importExtendedSet();
+
+        // TODO: Put these numbers in some definition
+        const instruction: u32 = switch (target.os.tag) {
+            .opencl => 26, // fma
+            // NOTE: Vulkan's FMA instruction does *NOT* produce the right values!
+            //   its precision guarantees do NOT match zigs and it does NOT match OpenCLs!
+            //   it needs to be emulated!
+            .vulkan => unreachable, // TODO: See above
+            else => unreachable,
+        };
+
+        for (0..ops) |i| {
+            try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
+                .id_result_type = op_result_ty_id,
+                .id_result = results.at(i),
+                .set = set,
+                .instruction = .{ .inst = instruction },
+                .id_ref_4 = &.{ op_a.at(i), op_b.at(i), op_c.at(i) },
+            });
+        }
+
+        return v.finalize(result_ty, results);
+    }
+
+    fn buildSelect(self: *DeclGen, condition: Temporary, lhs: Temporary, rhs: Temporary) !Temporary {
+        const mod = self.module;
+
+        const v = self.vectorization(.{ condition, lhs, rhs });
+        const ops = v.operations();
+        const results = self.spv.allocIds(ops);
+
+        const op_result_ty = try v.operationType(self, lhs.ty);
+        const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
+        const result_ty = try v.resultType(self, lhs.ty);
+
+        assert(condition.ty.scalarType(mod).zigTypeTag(mod) == .Bool);
+
+        const cond = try v.prepare(self, condition);
+        const object_1 = try v.prepare(self, lhs);
+        const object_2 = try v.prepare(self, rhs);
+
+        for (0..ops) |i| {
+            try self.func.body.emit(self.spv.gpa, .OpSelect, .{
+                .id_result_type = op_result_ty_id,
+                .id_result = results.at(i),
+                .condition = cond.at(i),
+                .object_1 = object_1.at(i),
+                .object_2 = object_2.at(i),
+            });
+        }
 
+        return v.finalize(result_ty, results);
+    }
+
+    const CmpPredicate = enum {
+        l_eq,
+        l_ne,
+        i_ne,
+        i_eq,
+        s_lt,
+        s_gt,
+        s_le,
+        s_ge,
+        u_lt,
+        u_gt,
+        u_le,
+        u_ge,
+        f_oeq,
+        f_une,
+        f_olt,
+        f_ole,
+        f_ogt,
+        f_oge,
+    };
+
+    fn buildCmp(self: *DeclGen, pred: CmpPredicate, lhs: Temporary, rhs: Temporary) !Temporary {
+        const v = self.vectorization(.{ lhs, rhs });
+        const ops = v.operations();
+        const results = self.spv.allocIds(ops);
+
+        const op_result_ty = try v.operationType(self, Type.bool);
+        const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
+        const result_ty = try v.resultType(self, Type.bool);
+
+        const op_lhs = try v.prepare(self, lhs);
+        const op_rhs = try v.prepare(self, rhs);
+
+        const opcode: Opcode = switch (pred) {
+            .l_eq => .OpLogicalEqual,
+            .l_ne => .OpLogicalNotEqual,
+            .i_eq => .OpIEqual,
+            .i_ne => .OpINotEqual,
+            .s_lt => .OpSLessThan,
+            .s_gt => .OpSGreaterThan,
+            .s_le => .OpSLessThanEqual,
+            .s_ge => .OpSGreaterThanEqual,
+            .u_lt => .OpULessThan,
+            .u_gt => .OpUGreaterThan,
+            .u_le => .OpULessThanEqual,
+            .u_ge => .OpUGreaterThanEqual,
+            .f_oeq => .OpFOrdEqual,
+            .f_une => .OpFUnordNotEqual,
+            .f_olt => .OpFOrdLessThan,
+            .f_ole => .OpFOrdLessThanEqual,
+            .f_ogt => .OpFOrdGreaterThan,
+            .f_oge => .OpFOrdGreaterThanEqual,
+        };
+
+        for (0..ops) |i| {
+            try self.func.body.emitRaw(self.spv.gpa, opcode, 4);
+            self.func.body.writeOperand(spec.IdResultType, op_result_ty_id);
+            self.func.body.writeOperand(IdResult, results.at(i));
+            self.func.body.writeOperand(IdResult, op_lhs.at(i));
+            self.func.body.writeOperand(IdResult, op_rhs.at(i));
+        }
+
+        return v.finalize(result_ty, results);
+    }
+
+    const UnaryOp = enum {
+        l_not,
+        bit_not,
+        i_neg,
+        f_neg,
+        i_abs,
+        f_abs,
+        clz,
+        ctz,
+        floor,
+        ceil,
+        trunc,
+        round,
+        sqrt,
+        sin,
+        cos,
+        tan,
+        exp,
+        exp2,
+        log,
+        log2,
+        log10,
+    };
+
+    fn buildUnary(self: *DeclGen, op: UnaryOp, operand: Temporary) !Temporary {
+        const target = self.getTarget();
+        const v = blk: {
+            const v = self.vectorization(.{operand});
+            break :blk switch (op) {
+                // TODO: These instructions don't seem to be working
+                // properly for LLVM-based backends on OpenCL for 8- and
+                // 16-component vectors.
+                .i_abs => if (target.os.tag == .opencl and v.components() >= 8) v.unroll() else v,
+                else => v,
+            };
+        };
+
+        const ops = v.operations();
+        const results = self.spv.allocIds(ops);
+
+        const op_result_ty = try v.operationType(self, operand.ty);
+        const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
+        const result_ty = try v.resultType(self, operand.ty);
+
+        const op_operand = try v.prepare(self, operand);
+
+        if (switch (op) {
+            .l_not => .OpLogicalNot,
+            .bit_not => .OpNot,
+            .i_neg => .OpSNegate,
+            .f_neg => .OpFNegate,
+            else => @as(?Opcode, null),
+        }) |opcode| {
+            for (0..ops) |i| {
+                try self.func.body.emitRaw(self.spv.gpa, opcode, 3);
+                self.func.body.writeOperand(spec.IdResultType, op_result_ty_id);
+                self.func.body.writeOperand(IdResult, results.at(i));
+                self.func.body.writeOperand(IdResult, op_operand.at(i));
+            }
+        } else {
+            const set = try self.importExtendedSet();
+            const extinst: u32 = switch (target.os.tag) {
+                .opencl => switch (op) {
+                    .i_abs => 141, // s_abs
+                    .f_abs => 23, // fabs
+                    .clz => 151, // clz
+                    .ctz => 152, // ctz
+                    .floor => 25, // floor
+                    .ceil => 12, // ceil
+                    .trunc => 66, // trunc
+                    .round => 55, // round
+                    .sqrt => 61, // sqrt
+                    .sin => 57, // sin
+                    .cos => 14, // cos
+                    .tan => 62, // tan
+                    .exp => 19, // exp
+                    .exp2 => 20, // exp2
+                    .log => 37, // log
+                    .log2 => 38, // log2
+                    .log10 => 39, // log10
+                    else => unreachable,
+                },
+                // Note: We'll need to check these for floating point accuracy
+                // Vulkan does not put tight requirements on these, for correction
+                // we might want to emulate them at some point.
+                .vulkan => switch (op) {
+                    .i_abs => 5, // SAbs
+                    .f_abs => 4, // FAbs
+                    .clz => unreachable, // TODO
+                    .ctz => unreachable, // TODO
+                    .floor => 8, // Floor
+                    .ceil => 9, // Ceil
+                    .trunc => 3, // Trunc
+                    .round => 1, // Round
+                    .sqrt,
+                    .sin,
+                    .cos,
+                    .tan,
+                    .exp,
+                    .exp2,
+                    .log,
+                    .log2,
+                    .log10,
+                    => unreachable, // TODO
+                    else => unreachable,
+                },
+                else => unreachable,
+            };
+
+            for (0..ops) |i| {
+                try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
+                    .id_result_type = op_result_ty_id,
+                    .id_result = results.at(i),
+                    .set = set,
+                    .instruction = .{ .inst = extinst },
+                    .id_ref_4 = &.{op_operand.at(i)},
+                });
+            }
+        }
+
+        return v.finalize(result_ty, results);
+    }
+
+    const BinaryOp = enum {
+        i_add,
+        f_add,
+        i_sub,
+        f_sub,
+        i_mul,
+        f_mul,
+        s_div,
+        u_div,
+        f_div,
+        s_rem,
+        f_rem,
+        s_mod,
+        u_mod,
+        f_mod,
+        srl,
+        sra,
+        sll,
+        bit_and,
+        bit_or,
+        bit_xor,
+        f_max,
+        s_max,
+        u_max,
+        f_min,
+        s_min,
+        u_min,
+        l_and,
+        l_or,
+    };
+
+    fn buildBinary(self: *DeclGen, op: BinaryOp, lhs: Temporary, rhs: Temporary) !Temporary {
+        const target = self.getTarget();
+
+        const v = self.vectorization(.{ lhs, rhs });
+        const ops = v.operations();
+        const results = self.spv.allocIds(ops);
+
+        const op_result_ty = try v.operationType(self, lhs.ty);
+        const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
+        const result_ty = try v.resultType(self, lhs.ty);
+
+        const op_lhs = try v.prepare(self, lhs);
+        const op_rhs = try v.prepare(self, rhs);
+
+        if (switch (op) {
+            .i_add => .OpIAdd,
+            .f_add => .OpFAdd,
+            .i_sub => .OpISub,
+            .f_sub => .OpFSub,
+            .i_mul => .OpIMul,
+            .f_mul => .OpFMul,
+            .s_div => .OpSDiv,
+            .u_div => .OpUDiv,
+            .f_div => .OpFDiv,
+            .s_rem => .OpSRem,
+            .f_rem => .OpFRem,
+            .s_mod => .OpSMod,
+            .u_mod => .OpUMod,
+            .f_mod => .OpFMod,
+            .srl => .OpShiftRightLogical,
+            .sra => .OpShiftRightArithmetic,
+            .sll => .OpShiftLeftLogical,
+            .bit_and => .OpBitwiseAnd,
+            .bit_or => .OpBitwiseOr,
+            .bit_xor => .OpBitwiseXor,
+            .l_and => .OpLogicalAnd,
+            .l_or => .OpLogicalOr,
+            else => @as(?Opcode, null),
+        }) |opcode| {
+            for (0..ops) |i| {
+                try self.func.body.emitRaw(self.spv.gpa, opcode, 4);
+                self.func.body.writeOperand(spec.IdResultType, op_result_ty_id);
+                self.func.body.writeOperand(IdResult, results.at(i));
+                self.func.body.writeOperand(IdResult, op_lhs.at(i));
+                self.func.body.writeOperand(IdResult, op_rhs.at(i));
+            }
+        } else {
+            const set = try self.importExtendedSet();
+
+            // TODO: Put these numbers in some definition
+            const extinst: u32 = switch (target.os.tag) {
+                .opencl => switch (op) {
+                    .f_max => 27, // fmax
+                    .s_max => 156, // s_max
+                    .u_max => 157, // u_max
+                    .f_min => 28, // fmin
+                    .s_min => 158, // s_min
+                    .u_min => 159, // u_min
+                    else => unreachable,
+                },
+                .vulkan => switch (op) {
+                    .f_max => 40, // FMax
+                    .s_max => 42, // SMax
+                    .u_max => 41, // UMax
+                    .f_min => 37, // FMin
+                    .s_min => 39, // SMin
+                    .u_min => 38, // UMin
+                    else => unreachable,
+                },
+                else => unreachable,
+            };
+
+            for (0..ops) |i| {
+                try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
+                    .id_result_type = op_result_ty_id,
+                    .id_result = results.at(i),
+                    .set = set,
+                    .instruction = .{ .inst = extinst },
+                    .id_ref_4 = &.{ op_lhs.at(i), op_rhs.at(i) },
+                });
+            }
+        }
+
+        return v.finalize(result_ty, results);
+    }
+
+    /// This function builds an extended multiplication, either OpSMulExtended or OpUMulExtended on Vulkan,
+    /// or OpIMul and s_mul_hi or u_mul_hi on OpenCL.
+    fn buildWideMul(
+        self: *DeclGen,
+        op: enum {
+            s_mul_extended,
+            u_mul_extended,
+        },
+        lhs: Temporary,
+        rhs: Temporary,
+    ) !struct { Temporary, Temporary } {
+        const mod = self.module;
+        const target = self.getTarget();
+        const ip = &mod.intern_pool;
+
+        const v = lhs.vectorization(self).unify(rhs.vectorization(self));
+        const ops = v.operations();
+
+        const arith_op_ty = try v.operationType(self, lhs.ty);
+        const arith_op_ty_id = try self.resolveType(arith_op_ty, .direct);
+
+        const lhs_op = try v.prepare(self, lhs);
+        const rhs_op = try v.prepare(self, rhs);
+
+        const value_results = self.spv.allocIds(ops);
+        const overflow_results = self.spv.allocIds(ops);
+
+        switch (target.os.tag) {
+            .opencl => {
+                // Currently, SPIRV-LLVM-Translator based backends cannot deal with OpSMulExtended and
+                // OpUMulExtended. For these we will use the OpenCL s_mul_hi to compute the high-order bits
+                // instead.
+                const set = try self.importExtendedSet();
+                const overflow_inst: u32 = switch (op) {
+                    .s_mul_extended => 160, // s_mul_hi
+                    .u_mul_extended => 203, // u_mul_hi
+                };
+
+                for (0..ops) |i| {
+                    try self.func.body.emit(self.spv.gpa, .OpIMul, .{
+                        .id_result_type = arith_op_ty_id,
+                        .id_result = value_results.at(i),
+                        .operand_1 = lhs_op.at(i),
+                        .operand_2 = rhs_op.at(i),
+                    });
+
+                    try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
+                        .id_result_type = arith_op_ty_id,
+                        .id_result = overflow_results.at(i),
+                        .set = set,
+                        .instruction = .{ .inst = overflow_inst },
+                        .id_ref_4 = &.{ lhs_op.at(i), rhs_op.at(i) },
+                    });
+                }
+            },
+            .vulkan => {
+                const op_result_ty = blk: {
+                    // Operations return a struct{T, T}
+                    // where T is maybe vectorized.
+                    const types = [2]InternPool.Index{ arith_op_ty.toIntern(), arith_op_ty.toIntern() };
+                    const values = [2]InternPool.Index{ .none, .none };
+                    const index = try ip.getAnonStructType(mod.gpa, .{
+                        .types = &types,
+                        .values = &values,
+                        .names = &.{},
+                    });
+                    break :blk Type.fromInterned(index);
+                };
+                const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
+
+                const opcode: Opcode = switch (op) {
+                    .s_mul_extended => .OpSMulExtended,
+                    .u_mul_extended => .OpUMulExtended,
+                };
+
+                for (0..ops) |i| {
+                    const op_result = self.spv.allocId();
+
+                    try self.func.body.emitRaw(self.spv.gpa, opcode, 4);
+                    self.func.body.writeOperand(spec.IdResultType, op_result_ty_id);
+                    self.func.body.writeOperand(IdResult, op_result);
+                    self.func.body.writeOperand(IdResult, lhs_op.at(i));
+                    self.func.body.writeOperand(IdResult, rhs_op.at(i));
+
+                    // The above operation returns a struct. We might want to expand
+                    // Temporary to deal with the fact that these are structs eventually,
+                    // but for now, take the struct apart and return two separate vectors.
+
+                    try self.func.body.emit(self.spv.gpa, .OpCompositeExtract, .{
+                        .id_result_type = arith_op_ty_id,
+                        .id_result = value_results.at(i),
+                        .composite = op_result,
+                        .indexes = &.{0},
+                    });
+
+                    try self.func.body.emit(self.spv.gpa, .OpCompositeExtract, .{
+                        .id_result_type = arith_op_ty_id,
+                        .id_result = overflow_results.at(i),
+                        .composite = op_result,
+                        .indexes = &.{1},
+                    });
+                }
+            },
+            else => unreachable,
+        }
+
+        const result_ty = try v.resultType(self, lhs.ty);
         return .{
-            .dg = self,
-            .result_ty = result_ty,
-            .ty = ty,
-            .ty_id = ty_id,
-            .is_array = is_array,
-            .results = results,
+            v.finalize(result_ty, value_results),
+            v.finalize(result_ty, overflow_results),
         };
     }
 
@@ -2237,59 +3099,42 @@ const DeclGen = struct {
         }
     }
 
-    fn intFromBool(self: *DeclGen, ty: Type, condition_id: IdRef) !IdRef {
-        const zero_id = try self.constInt(ty, 0, .direct);
-        const one_id = try self.constInt(ty, 1, .direct);
-        const result_id = self.spv.allocId();
-        try self.func.body.emit(self.spv.gpa, .OpSelect, .{
-            .id_result_type = try self.resolveType(ty, .direct),
-            .id_result = result_id,
-            .condition = condition_id,
-            .object_1 = one_id,
-            .object_2 = zero_id,
-        });
-        return result_id;
+    fn intFromBool(self: *DeclGen, value: Temporary) !Temporary {
+        return try self.intFromBool2(value, Type.u1);
+    }
+
+    fn intFromBool2(self: *DeclGen, value: Temporary, result_ty: Type) !Temporary {
+        const zero_id = try self.constInt(result_ty, 0, .direct);
+        const one_id = try self.constInt(result_ty, 1, .direct);
+
+        return try self.buildSelect(
+            value,
+            Temporary.init(result_ty, one_id),
+            Temporary.init(result_ty, zero_id),
+        );
     }
 
     /// Convert representation from indirect (in memory) to direct (in 'register')
     /// This converts the argument type from resolveType(ty, .indirect) to resolveType(ty, .direct).
     fn convertToDirect(self: *DeclGen, ty: Type, operand_id: IdRef) !IdRef {
         const mod = self.module;
-        const scalar_ty = ty.scalarType(mod);
-        const is_spv_vector = self.isSpvVector(ty);
-        switch (scalar_ty.zigTypeTag(mod)) {
+        switch (ty.scalarType(mod).zigTypeTag(mod)) {
             .Bool => {
-                // TODO: We may want to use something like elementWise in this function.
-                // First we need to audit whether this would recursively call into itself.
-                if (!ty.isVector(mod) or is_spv_vector) {
-                    const result_id = self.spv.allocId();
-                    const scalar_false_id = try self.constBool(false, .indirect);
-                    const false_id = if (is_spv_vector) blk: {
-                        const index = try mod.intern_pool.get(mod.gpa, .{
-                            .vector_type = .{
-                                .len = ty.vectorLen(mod),
-                                .child = Type.u1.toIntern(),
-                            },
-                        });
-                        const vec_ty = Type.fromInterned(index);
-                        break :blk try self.constructVectorSplat(vec_ty, scalar_false_id);
-                    } else scalar_false_id;
-
-                    try self.func.body.emit(self.spv.gpa, .OpINotEqual, .{
-                        .id_result_type = try self.resolveType(ty, .direct),
-                        .id_result = result_id,
-                        .operand_1 = operand_id,
-                        .operand_2 = false_id,
-                    });
-                    return result_id;
-                }
-
-                const constituents = try self.gpa.alloc(IdRef, ty.vectorLen(mod));
-                for (constituents, 0..) |*id, i| {
-                    const element = try self.extractVectorComponent(scalar_ty, operand_id, @intCast(i));
-                    id.* = try self.convertToDirect(scalar_ty, element);
-                }
-                return try self.constructVector(ty, constituents);
+                const false_id = try self.constBool(false, .indirect);
+                // The operation below requires inputs in direct representation, but the operand
+                // is actually in indirect representation.
+                // Cheekily swap out the type to the direct equivalent of the indirect type here, they have the
+                // same representation when converted to SPIR-V.
+                const operand_ty = try self.zigScalarOrVectorTypeLike(Type.u1, ty);
+                // Note: We can guarantee that these are the same ID due to the SPIR-V Module's `vector_types` cache!
+                assert(try self.resolveType(operand_ty, .direct) == try self.resolveType(ty, .indirect));
+
+                const result = try self.buildCmp(
+                    .i_ne,
+                    Temporary.init(operand_ty, operand_id),
+                    Temporary.init(Type.u1, false_id),
+                );
+                return try result.materialize(self);
             },
             else => return operand_id,
         }
@@ -2299,55 +3144,10 @@ const DeclGen = struct {
     /// This converts the argument type from resolveType(ty, .direct) to resolveType(ty, .indirect).
     fn convertToIndirect(self: *DeclGen, ty: Type, operand_id: IdRef) !IdRef {
         const mod = self.module;
-        const scalar_ty = ty.scalarType(mod);
-        const is_spv_vector = self.isSpvVector(ty);
-        switch (scalar_ty.zigTypeTag(mod)) {
+        switch (ty.scalarType(mod).zigTypeTag(mod)) {
             .Bool => {
-                const result_ty = if (is_spv_vector) blk: {
-                    const index = try mod.intern_pool.get(mod.gpa, .{
-                        .vector_type = .{
-                            .len = ty.vectorLen(mod),
-                            .child = Type.u1.toIntern(),
-                        },
-                    });
-                    break :blk Type.fromInterned(index);
-                } else Type.u1;
-
-                if (!ty.isVector(mod) or is_spv_vector) {
-                    // TODO: We may want to use something like elementWise in this function.
-                    // First we need to audit whether this would recursively call into itself.
-                    // Also unify it with intFromBool
-
-                    const scalar_zero_id = try self.constInt(Type.u1, 0, .direct);
-                    const scalar_one_id = try self.constInt(Type.u1, 1, .direct);
-
-                    const zero_id = if (is_spv_vector)
-                        try self.constructVectorSplat(result_ty, scalar_zero_id)
-                    else
-                        scalar_zero_id;
-
-                    const one_id = if (is_spv_vector)
-                        try self.constructVectorSplat(result_ty, scalar_one_id)
-                    else
-                        scalar_one_id;
-
-                    const result_id = self.spv.allocId();
-                    try self.func.body.emit(self.spv.gpa, .OpSelect, .{
-                        .id_result_type = try self.resolveType(result_ty, .direct),
-                        .id_result = result_id,
-                        .condition = operand_id,
-                        .object_1 = one_id,
-                        .object_2 = zero_id,
-                    });
-                    return result_id;
-                }
-
-                const constituents = try self.gpa.alloc(IdRef, ty.vectorLen(mod));
-                for (constituents, 0..) |*id, i| {
-                    const element = try self.extractVectorComponent(scalar_ty, operand_id, @intCast(i));
-                    id.* = try self.convertToIndirect(scalar_ty, element);
-                }
-                return try self.constructVector(result_ty, constituents);
+                const result = try self.intFromBool(Temporary.init(ty, operand_id));
+                return try result.materialize(self);
             },
             else => return operand_id,
         }
@@ -2428,26 +3228,35 @@ const DeclGen = struct {
         const air_tags = self.air.instructions.items(.tag);
         const maybe_result_id: ?IdRef = switch (air_tags[@intFromEnum(inst)]) {
             // zig fmt: off
-            .add, .add_wrap, .add_optimized => try self.airArithOp(inst, .OpFAdd, .OpIAdd, .OpIAdd),
-            .sub, .sub_wrap, .sub_optimized => try self.airArithOp(inst, .OpFSub, .OpISub, .OpISub),
-            .mul, .mul_wrap, .mul_optimized => try self.airArithOp(inst, .OpFMul, .OpIMul, .OpIMul),
-
-
+            .add, .add_wrap, .add_optimized => try self.airArithOp(inst, .f_add, .i_add, .i_add),
+            .sub, .sub_wrap, .sub_optimized => try self.airArithOp(inst, .f_sub, .i_sub, .i_sub),
+            .mul, .mul_wrap, .mul_optimized => try self.airArithOp(inst, .f_mul, .i_mul, .i_mul),
+
+            .sqrt => try self.airUnOpSimple(inst, .sqrt),
+            .sin => try self.airUnOpSimple(inst, .sin),
+            .cos => try self.airUnOpSimple(inst, .cos),
+            .tan => try self.airUnOpSimple(inst, .tan),
+            .exp => try self.airUnOpSimple(inst, .exp),
+            .exp2 => try self.airUnOpSimple(inst, .exp2),
+            .log => try self.airUnOpSimple(inst, .log),
+            .log2 => try self.airUnOpSimple(inst, .log2),
+            .log10 => try self.airUnOpSimple(inst, .log10),
             .abs => try self.airAbs(inst),
-            .floor => try self.airFloor(inst),
+            .floor => try self.airUnOpSimple(inst, .floor),
+            .ceil => try self.airUnOpSimple(inst, .ceil),
+            .round => try self.airUnOpSimple(inst, .round),
+            .trunc_float => try self.airUnOpSimple(inst, .trunc),
+            .neg, .neg_optimized => try self.airUnOpSimple(inst, .f_neg),
 
-            .div_floor => try self.airDivFloor(inst),
+            .div_float, .div_float_optimized => try self.airArithOp(inst, .f_div, .s_div, .u_div),
+            .div_floor, .div_floor_optimized => try self.airDivFloor(inst),
+            .div_trunc, .div_trunc_optimized => try self.airDivTrunc(inst),
 
-            .div_float,
-            .div_float_optimized,
-            .div_trunc,
-            .div_trunc_optimized => try self.airArithOp(inst, .OpFDiv, .OpSDiv, .OpUDiv),
-            .rem, .rem_optimized => try self.airArithOp(inst, .OpFRem, .OpSRem, .OpSRem),
-            .mod, .mod_optimized => try self.airArithOp(inst, .OpFMod, .OpSMod, .OpSMod),
+            .rem, .rem_optimized => try self.airArithOp(inst, .f_rem, .s_rem, .u_mod),
+            .mod, .mod_optimized => try self.airArithOp(inst, .f_mod, .s_mod, .u_mod),
 
-
-            .add_with_overflow => try self.airAddSubOverflow(inst, .OpIAdd, .OpULessThan, .OpSLessThan),
-            .sub_with_overflow => try self.airAddSubOverflow(inst, .OpISub, .OpUGreaterThan, .OpSGreaterThan),
+            .add_with_overflow => try self.airAddSubOverflow(inst, .i_add, .u_lt, .s_lt),
+            .sub_with_overflow => try self.airAddSubOverflow(inst, .i_sub, .u_gt, .s_gt),
             .mul_with_overflow => try self.airMulOverflow(inst),
             .shl_with_overflow => try self.airShlOverflow(inst),
 
@@ -2456,6 +3265,8 @@ const DeclGen = struct {
             .ctz => try self.airClzCtz(inst, .ctz),
             .clz => try self.airClzCtz(inst, .clz),
 
+            .select => try self.airSelect(inst),
+
             .splat => try self.airSplat(inst),
             .reduce, .reduce_optimized => try self.airReduce(inst),
             .shuffle                   => try self.airShuffle(inst),
@@ -2463,17 +3274,17 @@ const DeclGen = struct {
             .ptr_add => try self.airPtrAdd(inst),
             .ptr_sub => try self.airPtrSub(inst),
 
-            .bit_and  => try self.airBinOpSimple(inst, .OpBitwiseAnd),
-            .bit_or   => try self.airBinOpSimple(inst, .OpBitwiseOr),
-            .xor      => try self.airBinOpSimple(inst, .OpBitwiseXor),
-            .bool_and => try self.airBinOpSimple(inst, .OpLogicalAnd),
-            .bool_or  => try self.airBinOpSimple(inst, .OpLogicalOr),
+            .bit_and  => try self.airBinOpSimple(inst, .bit_and),
+            .bit_or   => try self.airBinOpSimple(inst, .bit_or),
+            .xor      => try self.airBinOpSimple(inst, .bit_xor),
+            .bool_and => try self.airBinOpSimple(inst, .l_and),
+            .bool_or  => try self.airBinOpSimple(inst, .l_or),
 
-            .shl, .shl_exact => try self.airShift(inst, .OpShiftLeftLogical, .OpShiftLeftLogical),
-            .shr, .shr_exact => try self.airShift(inst, .OpShiftRightLogical, .OpShiftRightArithmetic),
+            .shl, .shl_exact => try self.airShift(inst, .sll, .sll),
+            .shr, .shr_exact => try self.airShift(inst, .srl, .sra),
 
-            .min => try self.airMinMax(inst, .lt),
-            .max => try self.airMinMax(inst, .gt),
+            .min => try self.airMinMax(inst, .min),
+            .max => try self.airMinMax(inst, .max),
 
             .bitcast         => try self.airBitCast(inst),
             .intcast, .trunc => try self.airIntCast(inst),
@@ -2574,39 +3385,23 @@ const DeclGen = struct {
         try self.inst_results.putNoClobber(self.gpa, inst, result_id);
     }
 
-    fn binOpSimple(self: *DeclGen, ty: Type, lhs_id: IdRef, rhs_id: IdRef, comptime opcode: Opcode) !IdRef {
-        var wip = try self.elementWise(ty, false);
-        defer wip.deinit();
-        for (0..wip.results.len) |i| {
-            try self.func.body.emit(self.spv.gpa, opcode, .{
-                .id_result_type = wip.ty_id,
-                .id_result = wip.allocId(i),
-                .operand_1 = try wip.elementAt(ty, lhs_id, i),
-                .operand_2 = try wip.elementAt(ty, rhs_id, i),
-            });
-        }
-        return try wip.finalize();
-    }
-
-    fn airBinOpSimple(self: *DeclGen, inst: Air.Inst.Index, comptime opcode: Opcode) !?IdRef {
+    fn airBinOpSimple(self: *DeclGen, inst: Air.Inst.Index, op: BinaryOp) !?IdRef {
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-        const lhs_id = try self.resolve(bin_op.lhs);
-        const rhs_id = try self.resolve(bin_op.rhs);
-        const ty = self.typeOf(bin_op.lhs);
+        const lhs = try self.temporary(bin_op.lhs);
+        const rhs = try self.temporary(bin_op.rhs);
 
-        return try self.binOpSimple(ty, lhs_id, rhs_id, opcode);
+        const result = try self.buildBinary(op, lhs, rhs);
+        return try result.materialize(self);
     }
 
-    fn airShift(self: *DeclGen, inst: Air.Inst.Index, comptime unsigned: Opcode, comptime signed: Opcode) !?IdRef {
+    fn airShift(self: *DeclGen, inst: Air.Inst.Index, unsigned: BinaryOp, signed: BinaryOp) !?IdRef {
         const mod = self.module;
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-        const lhs_id = try self.resolve(bin_op.lhs);
-        const rhs_id = try self.resolve(bin_op.rhs);
+
+        const base = try self.temporary(bin_op.lhs);
+        const shift = try self.temporary(bin_op.rhs);
 
         const result_ty = self.typeOfIndex(inst);
-        const shift_ty = self.typeOf(bin_op.rhs);
-        const scalar_result_ty_id = try self.resolveType(result_ty.scalarType(mod), .direct);
-        const scalar_shift_ty_id = try self.resolveType(shift_ty.scalarType(mod), .direct);
 
         const info = self.arithmeticTypeInfo(result_ty);
         switch (info.class) {
@@ -2615,121 +3410,58 @@ const DeclGen = struct {
             .float, .bool => unreachable,
         }
 
-        var wip = try self.elementWise(result_ty, false);
-        defer wip.deinit();
-        for (wip.results, 0..) |*result_id, i| {
-            const lhs_elem_id = try wip.elementAt(result_ty, lhs_id, i);
-            const rhs_elem_id = try wip.elementAt(shift_ty, rhs_id, i);
-
-            // Sometimes Zig doesn't make both of the arguments the same types here. SPIR-V expects that,
-            // so just manually upcast it if required.
-            const shift_id = if (scalar_shift_ty_id != scalar_result_ty_id) blk: {
-                const shift_id = self.spv.allocId();
-                try self.func.body.emit(self.spv.gpa, .OpUConvert, .{
-                    .id_result_type = wip.ty_id,
-                    .id_result = shift_id,
-                    .unsigned_value = rhs_elem_id,
-                });
-                break :blk shift_id;
-            } else rhs_elem_id;
-
-            const value_id = self.spv.allocId();
-            const args = .{
-                .id_result_type = wip.ty_id,
-                .id_result = value_id,
-                .base = lhs_elem_id,
-                .shift = shift_id,
-            };
+        // Sometimes Zig doesn't make both of the arguments the same types here. SPIR-V expects that,
+        // so just manually upcast it if required.
 
-            if (result_ty.isSignedInt(mod)) {
-                try self.func.body.emit(self.spv.gpa, signed, args);
-            } else {
-                try self.func.body.emit(self.spv.gpa, unsigned, args);
-            }
+        // Note: The sign may differ here between the shift and the base type, in case
+        // of an arithmetic right shift. SPIR-V still expects the same type,
+        // so in that case we have to cast convert to signed.
+        const casted_shift = try self.buildIntConvert(base.ty.scalarType(mod), shift);
 
-            result_id.* = try self.normalize(wip.ty, value_id, info);
-        }
-        return try wip.finalize();
+        const shifted = switch (info.signedness) {
+            .unsigned => try self.buildBinary(unsigned, base, casted_shift),
+            .signed => try self.buildBinary(signed, base, casted_shift),
+        };
+
+        const result = try self.normalize(shifted, info);
+        return try result.materialize(self);
     }
 
-    fn airMinMax(self: *DeclGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !?IdRef {
+    const MinMax = enum { min, max };
+
+    fn airMinMax(self: *DeclGen, inst: Air.Inst.Index, op: MinMax) !?IdRef {
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-        const lhs_id = try self.resolve(bin_op.lhs);
-        const rhs_id = try self.resolve(bin_op.rhs);
-        const result_ty = self.typeOfIndex(inst);
 
-        return try self.minMax(result_ty, op, lhs_id, rhs_id);
-    }
+        const lhs = try self.temporary(bin_op.lhs);
+        const rhs = try self.temporary(bin_op.rhs);
 
-    fn minMax(self: *DeclGen, result_ty: Type, op: std.math.CompareOperator, lhs_id: IdRef, rhs_id: IdRef) !IdRef {
-        const info = self.arithmeticTypeInfo(result_ty);
-        const target = self.getTarget();
+        const result = try self.minMax(lhs, rhs, op);
+        return try result.materialize(self);
+    }
 
-        const use_backup_codegen = target.os.tag == .opencl and info.class != .float;
-        var wip = try self.elementWise(result_ty, use_backup_codegen);
-        defer wip.deinit();
+    fn minMax(self: *DeclGen, lhs: Temporary, rhs: Temporary, op: MinMax) !Temporary {
+        const info = self.arithmeticTypeInfo(lhs.ty);
 
-        for (wip.results, 0..) |*result_id, i| {
-            const lhs_elem_id = try wip.elementAt(result_ty, lhs_id, i);
-            const rhs_elem_id = try wip.elementAt(result_ty, rhs_id, i);
-
-            if (use_backup_codegen) {
-                const cmp_id = try self.cmp(op, Type.bool, wip.ty, lhs_elem_id, rhs_elem_id);
-                result_id.* = self.spv.allocId();
-                try self.func.body.emit(self.spv.gpa, .OpSelect, .{
-                    .id_result_type = wip.ty_id,
-                    .id_result = result_id.*,
-                    .condition = cmp_id,
-                    .object_1 = lhs_elem_id,
-                    .object_2 = rhs_elem_id,
-                });
-            } else {
-                const ext_inst: Word = switch (target.os.tag) {
-                    .opencl => switch (op) {
-                        .lt => 28, // fmin
-                        .gt => 27, // fmax
-                        else => unreachable,
-                    },
-                    .vulkan => switch (info.class) {
-                        .float => switch (op) {
-                            .lt => 37, // FMin
-                            .gt => 40, // FMax
-                            else => unreachable,
-                        },
-                        .integer, .strange_integer => switch (info.signedness) {
-                            .signed => switch (op) {
-                                .lt => 39, // SMin
-                                .gt => 42, // SMax
-                                else => unreachable,
-                            },
-                            .unsigned => switch (op) {
-                                .lt => 38, // UMin
-                                .gt => 41, // UMax
-                                else => unreachable,
-                            },
-                        },
-                        .composite_integer => unreachable, // TODO
-                        .bool => unreachable,
-                    },
-                    else => unreachable,
-                };
-                const set_id = switch (target.os.tag) {
-                    .opencl => try self.spv.importInstructionSet(.@"OpenCL.std"),
-                    .vulkan => try self.spv.importInstructionSet(.@"GLSL.std.450"),
-                    else => unreachable,
-                };
+        const binop: BinaryOp = switch (info.class) {
+            .float => switch (op) {
+                .min => .f_min,
+                .max => .f_max,
+            },
+            .integer, .strange_integer => switch (info.signedness) {
+                .signed => switch (op) {
+                    .min => .s_min,
+                    .max => .s_max,
+                },
+                .unsigned => switch (op) {
+                    .min => .u_min,
+                    .max => .u_max,
+                },
+            },
+            .composite_integer => unreachable, // TODO
+            .bool => unreachable,
+        };
 
-                result_id.* = self.spv.allocId();
-                try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
-                    .id_result_type = wip.ty_id,
-                    .id_result = result_id.*,
-                    .set = set_id,
-                    .instruction = .{ .inst = ext_inst },
-                    .id_ref_4 = &.{ lhs_elem_id, rhs_elem_id },
-                });
-            }
-        }
-        return wip.finalize();
+        return try self.buildBinary(binop, lhs, rhs);
     }
 
     /// This function normalizes values to a canonical representation
@@ -2740,41 +3472,24 @@ const DeclGen = struct {
     /// - Signed integers are also sign extended if they are negative.
     /// All other values are returned unmodified (this makes strange integer
     /// wrapping easier to use in generic operations).
-    fn normalize(self: *DeclGen, ty: Type, value_id: IdRef, info: ArithmeticTypeInfo) !IdRef {
+    fn normalize(self: *DeclGen, value: Temporary, info: ArithmeticTypeInfo) !Temporary {
+        const mod = self.module;
+        const ty = value.ty;
         switch (info.class) {
-            .integer, .bool, .float => return value_id,
+            .integer, .bool, .float => return value,
             .composite_integer => unreachable, // TODO
             .strange_integer => switch (info.signedness) {
                 .unsigned => {
                     const mask_value = if (info.bits == 64) 0xFFFF_FFFF_FFFF_FFFF else (@as(u64, 1) << @as(u6, @intCast(info.bits))) - 1;
-                    const result_id = self.spv.allocId();
-                    const mask_id = try self.constInt(ty, mask_value, .direct);
-                    try self.func.body.emit(self.spv.gpa, .OpBitwiseAnd, .{
-                        .id_result_type = try self.resolveType(ty, .direct),
-                        .id_result = result_id,
-                        .operand_1 = value_id,
-                        .operand_2 = mask_id,
-                    });
-                    return result_id;
+                    const mask_id = try self.constInt(ty.scalarType(mod), mask_value, .direct);
+                    return try self.buildBinary(.bit_and, value, Temporary.init(ty.scalarType(mod), mask_id));
                 },
                 .signed => {
                     // Shift left and right so that we can copy the sight bit that way.
-                    const shift_amt_id = try self.constInt(ty, info.backing_bits - info.bits, .direct);
-                    const left_id = self.spv.allocId();
-                    try self.func.body.emit(self.spv.gpa, .OpShiftLeftLogical, .{
-                        .id_result_type = try self.resolveType(ty, .direct),
-                        .id_result = left_id,
-                        .base = value_id,
-                        .shift = shift_amt_id,
-                    });
-                    const right_id = self.spv.allocId();
-                    try self.func.body.emit(self.spv.gpa, .OpShiftRightArithmetic, .{
-                        .id_result_type = try self.resolveType(ty, .direct),
-                        .id_result = right_id,
-                        .base = left_id,
-                        .shift = shift_amt_id,
-                    });
-                    return right_id;
+                    const shift_amt_id = try self.constInt(ty.scalarType(mod), info.backing_bits - info.bits, .direct);
+                    const shift_amt = Temporary.init(ty.scalarType(mod), shift_amt_id);
+                    const left = try self.buildBinary(.sll, value, shift_amt);
+                    return try self.buildBinary(.sra, left, shift_amt);
                 },
             },
         }
@@ -2782,491 +3497,438 @@ const DeclGen = struct {
 
     fn airDivFloor(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-        const lhs_id = try self.resolve(bin_op.lhs);
-        const rhs_id = try self.resolve(bin_op.rhs);
-        const ty = self.typeOfIndex(inst);
-        const ty_id = try self.resolveType(ty, .direct);
-        const info = self.arithmeticTypeInfo(ty);
+
+        const lhs = try self.temporary(bin_op.lhs);
+        const rhs = try self.temporary(bin_op.rhs);
+
+        const info = self.arithmeticTypeInfo(lhs.ty);
         switch (info.class) {
             .composite_integer => unreachable, // TODO
             .integer, .strange_integer => {
-                const zero_id = try self.constInt(ty, 0, .direct);
-                const one_id = try self.constInt(ty, 1, .direct);
-
-                // (a ^ b) > 0
-                const bin_bitwise_id = try self.binOpSimple(ty, lhs_id, rhs_id, .OpBitwiseXor);
-                const is_positive_id = try self.cmp(.gt, Type.bool, ty, bin_bitwise_id, zero_id);
-
-                // a / b
-                const positive_div_id = try self.arithOp(ty, lhs_id, rhs_id, .OpFDiv, .OpSDiv, .OpUDiv);
-
-                // - (abs(a) + abs(b) - 1) / abs(b)
-                const lhs_abs = try self.abs(ty, ty, lhs_id);
-                const rhs_abs = try self.abs(ty, ty, rhs_id);
-                const negative_div_lhs = try self.arithOp(
-                    ty,
-                    try self.arithOp(ty, lhs_abs, rhs_abs, .OpFAdd, .OpIAdd, .OpIAdd),
-                    one_id,
-                    .OpFSub,
-                    .OpISub,
-                    .OpISub,
+                switch (info.signedness) {
+                    .unsigned => {
+                        const result = try self.buildBinary(.u_div, lhs, rhs);
+                        return try result.materialize(self);
+                    },
+                    .signed => {},
+                }
+
+                // For signed integers:
+                //   (a / b) - (a % b != 0 && a < 0 != b < 0);
+                // There shouldn't be any overflow issues.
+
+                const div = try self.buildBinary(.s_div, lhs, rhs);
+                const rem = try self.buildBinary(.s_rem, lhs, rhs);
+
+                const zero = Temporary.init(lhs.ty, try self.constInt(lhs.ty, 0, .direct));
+
+                const rem_is_not_zero = try self.buildCmp(.i_ne, rem, zero);
+
+                const result_negative = try self.buildCmp(
+                    .l_ne,
+                    try self.buildCmp(.s_lt, lhs, zero),
+                    try self.buildCmp(.s_lt, rhs, zero),
+                );
+                const rem_is_not_zero_and_result_is_negative = try self.buildBinary(
+                    .l_and,
+                    rem_is_not_zero,
+                    result_negative,
                 );
-                const negative_div_id = try self.arithOp(ty, negative_div_lhs, rhs_abs, .OpFDiv, .OpSDiv, .OpUDiv);
-                const negated_negative_div_id = self.spv.allocId();
-                try self.func.body.emit(self.spv.gpa, .OpSNegate, .{
-                    .id_result_type = ty_id,
-                    .id_result = negated_negative_div_id,
-                    .operand = negative_div_id,
-                });
 
-                const result_id = self.spv.allocId();
-                try self.func.body.emit(self.spv.gpa, .OpSelect, .{
-                    .id_result_type = ty_id,
-                    .id_result = result_id,
-                    .condition = is_positive_id,
-                    .object_1 = positive_div_id,
-                    .object_2 = negated_negative_div_id,
-                });
-                return result_id;
+                const result = try self.buildBinary(
+                    .i_sub,
+                    div,
+                    try self.intFromBool2(rem_is_not_zero_and_result_is_negative, div.ty),
+                );
+
+                return try result.materialize(self);
             },
             .float => {
-                const div_id = try self.arithOp(ty, lhs_id, rhs_id, .OpFDiv, .OpSDiv, .OpUDiv);
-                return try self.floor(ty, div_id);
+                const div = try self.buildBinary(.f_div, lhs, rhs);
+                const result = try self.buildUnary(.floor, div);
+                return try result.materialize(self);
             },
             .bool => unreachable,
         }
     }
 
-    fn airFloor(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
-        const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
-        const operand_id = try self.resolve(un_op);
-        const result_ty = self.typeOfIndex(inst);
-        return try self.floor(result_ty, operand_id);
-    }
+    fn airDivTrunc(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
+        const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
 
-    fn floor(self: *DeclGen, ty: Type, operand_id: IdRef) !IdRef {
-        const target = self.getTarget();
-        const ty_id = try self.resolveType(ty, .direct);
-        const ext_inst: Word = switch (target.os.tag) {
-            .opencl => 25,
-            .vulkan => 8,
-            else => unreachable,
-        };
-        const set_id = switch (target.os.tag) {
-            .opencl => try self.spv.importInstructionSet(.@"OpenCL.std"),
-            .vulkan => try self.spv.importInstructionSet(.@"GLSL.std.450"),
-            else => unreachable,
-        };
+        const lhs = try self.temporary(bin_op.lhs);
+        const rhs = try self.temporary(bin_op.rhs);
 
-        const result_id = self.spv.allocId();
-        try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
-            .id_result_type = ty_id,
-            .id_result = result_id,
-            .set = set_id,
-            .instruction = .{ .inst = ext_inst },
-            .id_ref_4 = &.{operand_id},
-        });
-        return result_id;
+        const info = self.arithmeticTypeInfo(lhs.ty);
+        switch (info.class) {
+            .composite_integer => unreachable, // TODO
+            .integer, .strange_integer => switch (info.signedness) {
+                .unsigned => {
+                    const result = try self.buildBinary(.u_div, lhs, rhs);
+                    return try result.materialize(self);
+                },
+                .signed => {
+                    const result = try self.buildBinary(.s_div, lhs, rhs);
+                    return try result.materialize(self);
+                },
+            },
+            .float => {
+                const div = try self.buildBinary(.f_div, lhs, rhs);
+                const result = try self.buildUnary(.trunc, div);
+                return try result.materialize(self);
+            },
+            .bool => unreachable,
+        }
+    }
+
+    fn airUnOpSimple(self: *DeclGen, inst: Air.Inst.Index, op: UnaryOp) !?IdRef {
+        const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+        const operand = try self.temporary(un_op);
+        const result = try self.buildUnary(op, operand);
+        return try result.materialize(self);
     }
 
     fn airArithOp(
         self: *DeclGen,
         inst: Air.Inst.Index,
-        comptime fop: Opcode,
-        comptime sop: Opcode,
-        comptime uop: Opcode,
+        comptime fop: BinaryOp,
+        comptime sop: BinaryOp,
+        comptime uop: BinaryOp,
     ) !?IdRef {
-        // LHS and RHS are guaranteed to have the same type, and AIR guarantees
-        // the result to be the same as the LHS and RHS, which matches SPIR-V.
-        const ty = self.typeOfIndex(inst);
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-        const lhs_id = try self.resolve(bin_op.lhs);
-        const rhs_id = try self.resolve(bin_op.rhs);
-
-        assert(self.typeOf(bin_op.lhs).eql(ty, self.module));
-        assert(self.typeOf(bin_op.rhs).eql(ty, self.module));
 
-        return try self.arithOp(ty, lhs_id, rhs_id, fop, sop, uop);
-    }
+        const lhs = try self.temporary(bin_op.lhs);
+        const rhs = try self.temporary(bin_op.rhs);
 
-    fn arithOp(
-        self: *DeclGen,
-        ty: Type,
-        lhs_id: IdRef,
-        rhs_id: IdRef,
-        comptime fop: Opcode,
-        comptime sop: Opcode,
-        comptime uop: Opcode,
-    ) !IdRef {
-        // Binary operations are generally applicable to both scalar and vector operations
-        // in SPIR-V, but int and float versions of operations require different opcodes.
-        const info = self.arithmeticTypeInfo(ty);
+        const info = self.arithmeticTypeInfo(lhs.ty);
 
-        const opcode_index: usize = switch (info.class) {
-            .composite_integer => {
-                return self.todo("binary operations for composite integers", .{});
-            },
+        const result = switch (info.class) {
+            .composite_integer => unreachable, // TODO
             .integer, .strange_integer => switch (info.signedness) {
-                .signed => 1,
-                .unsigned => 2,
+                .signed => try self.buildBinary(sop, lhs, rhs),
+                .unsigned => try self.buildBinary(uop, lhs, rhs),
             },
-            .float => 0,
+            .float => try self.buildBinary(fop, lhs, rhs),
             .bool => unreachable,
         };
 
-        var wip = try self.elementWise(ty, false);
-        defer wip.deinit();
-        for (wip.results, 0..) |*result_id, i| {
-            const lhs_elem_id = try wip.elementAt(ty, lhs_id, i);
-            const rhs_elem_id = try wip.elementAt(ty, rhs_id, i);
-
-            const value_id = self.spv.allocId();
-            const operands = .{
-                .id_result_type = wip.ty_id,
-                .id_result = value_id,
-                .operand_1 = lhs_elem_id,
-                .operand_2 = rhs_elem_id,
-            };
-
-            switch (opcode_index) {
-                0 => try self.func.body.emit(self.spv.gpa, fop, operands),
-                1 => try self.func.body.emit(self.spv.gpa, sop, operands),
-                2 => try self.func.body.emit(self.spv.gpa, uop, operands),
-                else => unreachable,
-            }
-
-            // TODO: Trap on overflow? Probably going to be annoying.
-            // TODO: Look into SPV_KHR_no_integer_wrap_decoration which provides NoSignedWrap/NoUnsignedWrap.
-            result_id.* = try self.normalize(wip.ty, value_id, info);
-        }
-
-        return try wip.finalize();
+        return try result.materialize(self);
     }
 
     fn airAbs(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-        const operand_id = try self.resolve(ty_op.operand);
+        const operand = try self.temporary(ty_op.operand);
         // Note: operand_ty may be signed, while ty is always unsigned!
-        const operand_ty = self.typeOf(ty_op.operand);
         const result_ty = self.typeOfIndex(inst);
-        return try self.abs(result_ty, operand_ty, operand_id);
+        const result = try self.abs(result_ty, operand);
+        return try result.materialize(self);
     }
 
-    fn abs(self: *DeclGen, result_ty: Type, operand_ty: Type, operand_id: IdRef) !IdRef {
+    fn abs(self: *DeclGen, result_ty: Type, value: Temporary) !Temporary {
         const target = self.getTarget();
-        const operand_info = self.arithmeticTypeInfo(operand_ty);
+        const operand_info = self.arithmeticTypeInfo(value.ty);
 
-        var wip = try self.elementWise(result_ty, false);
-        defer wip.deinit();
+        switch (operand_info.class) {
+            .float => return try self.buildUnary(.f_abs, value),
+            .integer, .strange_integer => {
+                const abs_value = try self.buildUnary(.i_abs, value);
 
-        for (wip.results, 0..) |*result_id, i| {
-            const elem_id = try wip.elementAt(operand_ty, operand_id, i);
-
-            const ext_inst: Word = switch (target.os.tag) {
-                .opencl => switch (operand_info.class) {
-                    .float => 23, // fabs
-                    .integer, .strange_integer => switch (operand_info.signedness) {
-                        .signed => 141, // s_abs
-                        .unsigned => 201, // u_abs
-                    },
-                    .composite_integer => unreachable, // TODO
-                    .bool => unreachable,
-                },
-                .vulkan => switch (operand_info.class) {
-                    .float => 4, // FAbs
-                    .integer, .strange_integer => 5, // SAbs
-                    .composite_integer => unreachable, // TODO
-                    .bool => unreachable,
-                },
-                else => unreachable,
-            };
-            const set_id = switch (target.os.tag) {
-                .opencl => try self.spv.importInstructionSet(.@"OpenCL.std"),
-                .vulkan => try self.spv.importInstructionSet(.@"GLSL.std.450"),
-                else => unreachable,
-            };
+                // TODO: We may need to bitcast the result to a uint
+                // depending on the result type. Do that when
+                // bitCast is implemented for vectors.
+                // This is only relevant for Vulkan
+                assert(target.os.tag != .vulkan); // TODO
 
-            result_id.* = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
-                .id_result_type = wip.ty_id,
-                .id_result = result_id.*,
-                .set = set_id,
-                .instruction = .{ .inst = ext_inst },
-                .id_ref_4 = &.{elem_id},
-            });
+                return try self.normalize(abs_value, self.arithmeticTypeInfo(result_ty));
+            },
+            .composite_integer => unreachable, // TODO
+            .bool => unreachable,
         }
-        return try wip.finalize();
     }
 
     fn airAddSubOverflow(
         self: *DeclGen,
         inst: Air.Inst.Index,
-        comptime add: Opcode,
-        comptime ucmp: Opcode,
-        comptime scmp: Opcode,
+        comptime add: BinaryOp,
+        comptime ucmp: CmpPredicate,
+        comptime scmp: CmpPredicate,
     ) !?IdRef {
-        const mod = self.module;
+        // Note: OpIAddCarry and OpISubBorrow are not really useful here: For unsigned numbers,
+        // there is in both cases only one extra operation required. For signed operations,
+        // the overflow bit is set then going from 0x80.. to 0x00.., but this doesn't actually
+        // normally set a carry bit. So the SPIR-V overflow operations are not particularly
+        // useful here.
+
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
-        const lhs = try self.resolve(extra.lhs);
-        const rhs = try self.resolve(extra.rhs);
 
-        const result_ty = self.typeOfIndex(inst);
-        const operand_ty = self.typeOf(extra.lhs);
-        const ov_ty = result_ty.structFieldType(1, self.module);
+        const lhs = try self.temporary(extra.lhs);
+        const rhs = try self.temporary(extra.rhs);
 
-        const bool_ty_id = try self.resolveType(Type.bool, .direct);
-        const cmp_ty_id = if (self.isSpvVector(operand_ty))
-            // TODO: Resolving a vector type with .direct should return a SPIR-V vector
-            try self.spv.vectorType(operand_ty.vectorLen(mod), try self.resolveType(Type.bool, .direct))
-        else
-            bool_ty_id;
+        const result_ty = self.typeOfIndex(inst);
 
-        const info = self.arithmeticTypeInfo(operand_ty);
+        const info = self.arithmeticTypeInfo(lhs.ty);
         switch (info.class) {
-            .composite_integer => return self.todo("overflow ops for composite integers", .{}),
+            .composite_integer => unreachable, // TODO
             .strange_integer, .integer => {},
             .float, .bool => unreachable,
         }
 
-        var wip_result = try self.elementWise(operand_ty, false);
-        defer wip_result.deinit();
-        var wip_ov = try self.elementWise(ov_ty, false);
-        defer wip_ov.deinit();
-        for (wip_result.results, wip_ov.results, 0..) |*result_id, *ov_id, i| {
-            const lhs_elem_id = try wip_result.elementAt(operand_ty, lhs, i);
-            const rhs_elem_id = try wip_result.elementAt(operand_ty, rhs, i);
-
-            // Normalize both so that we can properly check for overflow
-            const value_id = self.spv.allocId();
-
-            try self.func.body.emit(self.spv.gpa, add, .{
-                .id_result_type = wip_result.ty_id,
-                .id_result = value_id,
-                .operand_1 = lhs_elem_id,
-                .operand_2 = rhs_elem_id,
-            });
-
-            // Normalize the result so that the comparisons go well
-            result_id.* = try self.normalize(wip_result.ty, value_id, info);
-
-            const overflowed_id = switch (info.signedness) {
-                .unsigned => blk: {
-                    // Overflow happened if the result is smaller than either of the operands. It doesn't matter which.
-                    // For subtraction the conditions need to be swapped.
-                    const overflowed_id = self.spv.allocId();
-                    try self.func.body.emit(self.spv.gpa, ucmp, .{
-                        .id_result_type = cmp_ty_id,
-                        .id_result = overflowed_id,
-                        .operand_1 = result_id.*,
-                        .operand_2 = lhs_elem_id,
-                    });
-                    break :blk overflowed_id;
-                },
-                .signed => blk: {
-                    // lhs - rhs
-                    // For addition, overflow happened if:
-                    // - rhs is negative and value > lhs
-                    // - rhs is positive and value < lhs
-                    // This can be shortened to:
-                    //   (rhs < 0 and value > lhs) or (rhs >= 0 and value <= lhs)
-                    // = (rhs < 0) == (value > lhs)
-                    // = (rhs < 0) == (lhs < value)
-                    // Note that signed overflow is also wrapping in spir-v.
-                    // For subtraction, overflow happened if:
-                    // - rhs is negative and value < lhs
-                    // - rhs is positive and value > lhs
-                    // This can be shortened to:
-                    //   (rhs < 0 and value < lhs) or (rhs >= 0 and value >= lhs)
-                    // = (rhs < 0) == (value < lhs)
-                    // = (rhs < 0) == (lhs > value)
-
-                    const rhs_lt_zero_id = self.spv.allocId();
-                    const zero_id = try self.constInt(wip_result.ty, 0, .direct);
-                    try self.func.body.emit(self.spv.gpa, .OpSLessThan, .{
-                        .id_result_type = cmp_ty_id,
-                        .id_result = rhs_lt_zero_id,
-                        .operand_1 = rhs_elem_id,
-                        .operand_2 = zero_id,
-                    });
-
-                    const value_gt_lhs_id = self.spv.allocId();
-                    try self.func.body.emit(self.spv.gpa, scmp, .{
-                        .id_result_type = cmp_ty_id,
-                        .id_result = value_gt_lhs_id,
-                        .operand_1 = lhs_elem_id,
-                        .operand_2 = result_id.*,
-                    });
-
-                    const overflowed_id = self.spv.allocId();
-                    try self.func.body.emit(self.spv.gpa, .OpLogicalEqual, .{
-                        .id_result_type = cmp_ty_id,
-                        .id_result = overflowed_id,
-                        .operand_1 = rhs_lt_zero_id,
-                        .operand_2 = value_gt_lhs_id,
-                    });
-                    break :blk overflowed_id;
-                },
-            };
+        const sum = try self.buildBinary(add, lhs, rhs);
+        const result = try self.normalize(sum, info);
+
+        const overflowed = switch (info.signedness) {
+            // Overflow happened if the result is smaller than either of the operands. It doesn't matter which.
+            // For subtraction the conditions need to be swapped.
+            .unsigned => try self.buildCmp(ucmp, result, lhs),
+            // For addition, overflow happened if:
+            // - rhs is negative and value > lhs
+            // - rhs is positive and value < lhs
+            // This can be shortened to:
+            //   (rhs < 0 and value > lhs) or (rhs >= 0 and value <= lhs)
+            // = (rhs < 0) == (value > lhs)
+            // = (rhs < 0) == (lhs < value)
+            // Note that signed overflow is also wrapping in spir-v.
+            // For subtraction, overflow happened if:
+            // - rhs is negative and value < lhs
+            // - rhs is positive and value > lhs
+            // This can be shortened to:
+            //   (rhs < 0 and value < lhs) or (rhs >= 0 and value >= lhs)
+            // = (rhs < 0) == (value < lhs)
+            // = (rhs < 0) == (lhs > value)
+            .signed => blk: {
+                const zero = Temporary.init(rhs.ty, try self.constInt(rhs.ty, 0, .direct));
+                const rhs_lt_zero = try self.buildCmp(.s_lt, rhs, zero);
+                const result_gt_lhs = try self.buildCmp(scmp, lhs, result);
+                break :blk try self.buildCmp(.l_eq, rhs_lt_zero, result_gt_lhs);
+            },
+        };
 
-            ov_id.* = try self.intFromBool(wip_ov.ty, overflowed_id);
-        }
+        const ov = try self.intFromBool(overflowed);
 
         return try self.constructStruct(
             result_ty,
-            &.{ operand_ty, ov_ty },
-            &.{ try wip_result.finalize(), try wip_ov.finalize() },
+            &.{ result.ty, ov.ty },
+            &.{ try result.materialize(self), try ov.materialize(self) },
         );
     }
 
     fn airMulOverflow(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
+        const target = self.getTarget();
+        const mod = self.module;
+
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
-        const lhs = try self.resolve(extra.lhs);
-        const rhs = try self.resolve(extra.rhs);
+
+        const lhs = try self.temporary(extra.lhs);
+        const rhs = try self.temporary(extra.rhs);
 
         const result_ty = self.typeOfIndex(inst);
-        const operand_ty = self.typeOf(extra.lhs);
-        const ov_ty = result_ty.structFieldType(1, self.module);
 
-        const info = self.arithmeticTypeInfo(operand_ty);
+        const info = self.arithmeticTypeInfo(lhs.ty);
         switch (info.class) {
-            .composite_integer => return self.todo("overflow ops for composite integers", .{}),
+            .composite_integer => unreachable, // TODO
             .strange_integer, .integer => {},
             .float, .bool => unreachable,
         }
 
-        var wip_result = try self.elementWise(operand_ty, true);
-        defer wip_result.deinit();
-        var wip_ov = try self.elementWise(ov_ty, true);
-        defer wip_ov.deinit();
+        // There are 3 cases which we have to deal with:
+        // - If info.bits < 32 / 2, we will upcast to 32 and check the higher bits
+        // - If info.bits > 32 / 2, we have to use extended multiplication
+        // - Additionally, if info.bits != 32, we'll have to check the high bits
+        //   of the result too.
+
+        const largest_int_bits: u16 = if (Target.spirv.featureSetHas(target.cpu.features, .Int64)) 64 else 32;
+        // If non-null, the number of bits that the multiplication should be performed in. If
+        // null, we have to use wide multiplication.
+        const maybe_op_ty_bits: ?u16 = switch (info.bits) {
+            0 => unreachable,
+            1...16 => 32,
+            17...32 => if (largest_int_bits > 32) 64 else null, // Upcast if we can.
+            33...64 => null, // Always use wide multiplication.
+            else => unreachable, // TODO: Composite integers
+        };
 
-        const zero_id = try self.constInt(wip_result.ty, 0, .direct);
-        const zero_ov_id = try self.constInt(wip_ov.ty, 0, .direct);
-        const one_ov_id = try self.constInt(wip_ov.ty, 1, .direct);
+        const result, const overflowed = switch (info.signedness) {
+            .unsigned => blk: {
+                if (maybe_op_ty_bits) |op_ty_bits| {
+                    const op_ty = try mod.intType(.unsigned, op_ty_bits);
+                    const casted_lhs = try self.buildIntConvert(op_ty, lhs);
+                    const casted_rhs = try self.buildIntConvert(op_ty, rhs);
 
-        for (wip_result.results, wip_ov.results, 0..) |*result_id, *ov_id, i| {
-            const lhs_elem_id = try wip_result.elementAt(operand_ty, lhs, i);
-            const rhs_elem_id = try wip_result.elementAt(operand_ty, rhs, i);
+                    const full_result = try self.buildBinary(.i_mul, casted_lhs, casted_rhs);
 
-            result_id.* = try self.arithOp(wip_result.ty, lhs_elem_id, rhs_elem_id, .OpFMul, .OpIMul, .OpIMul);
+                    const low_bits = try self.buildIntConvert(lhs.ty, full_result);
+                    const result = try self.normalize(low_bits, info);
 
-            // (a != 0) and (x / a != b)
-            const not_zero_id = try self.cmp(.neq, Type.bool, wip_result.ty, lhs_elem_id, zero_id);
-            const res_rhs_id = try self.arithOp(wip_result.ty, result_id.*, lhs_elem_id, .OpFDiv, .OpSDiv, .OpUDiv);
-            const res_rhs_not_rhs_id = try self.cmp(.neq, Type.bool, wip_result.ty, res_rhs_id, rhs_elem_id);
-            const cond_id = try self.binOpSimple(Type.bool, not_zero_id, res_rhs_not_rhs_id, .OpLogicalAnd);
+                    // Shift the result bits away to get the overflow bits.
+                    const shift = Temporary.init(full_result.ty, try self.constInt(full_result.ty, info.bits, .direct));
+                    const overflow = try self.buildBinary(.srl, full_result, shift);
 
-            ov_id.* = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpSelect, .{
-                .id_result_type = wip_ov.ty_id,
-                .id_result = ov_id.*,
-                .condition = cond_id,
-                .object_1 = one_ov_id,
-                .object_2 = zero_ov_id,
-            });
-        }
+                    // Directly check if its zero in the op_ty without converting first.
+                    const zero = Temporary.init(full_result.ty, try self.constInt(full_result.ty, 0, .direct));
+                    const overflowed = try self.buildCmp(.i_ne, zero, overflow);
+
+                    break :blk .{ result, overflowed };
+                }
+
+                const low_bits, const high_bits = try self.buildWideMul(.u_mul_extended, lhs, rhs);
+
+                // Truncate the result, if required.
+                const result = try self.normalize(low_bits, info);
+
+                // Overflow happened if the high-bits of the result are non-zero OR if the
+                // high bits of the low word of the result (those outside the range of the
+                // int) are nonzero.
+                const zero = Temporary.init(lhs.ty, try self.constInt(lhs.ty, 0, .direct));
+                const high_overflowed = try self.buildCmp(.i_ne, zero, high_bits);
+
+                // If no overflow bits in low_bits, no extra work needs to be done.
+                if (info.backing_bits == info.bits) {
+                    break :blk .{ result, high_overflowed };
+                }
+
+                // Shift the result bits away to get the overflow bits.
+                const shift = Temporary.init(lhs.ty, try self.constInt(lhs.ty, info.bits, .direct));
+                const low_overflow = try self.buildBinary(.srl, low_bits, shift);
+                const low_overflowed = try self.buildCmp(.i_ne, zero, low_overflow);
+
+                const overflowed = try self.buildBinary(.l_or, low_overflowed, high_overflowed);
+
+                break :blk .{ result, overflowed };
+            },
+            .signed => blk: {
+                // - lhs >= 0, rhxs >= 0: expect positive; overflow should be  0
+                // - lhs == 0          : expect positive; overflow should be  0
+                // -           rhs == 0: expect positive; overflow should be  0
+                // - lhs  > 0, rhs  < 0: expect negative; overflow should be -1
+                // - lhs  < 0, rhs  > 0: expect negative; overflow should be -1
+                // - lhs <= 0, rhs <= 0: expect positive; overflow should be  0
+                // ------
+                // overflow should be -1 when
+                //   (lhs > 0 && rhs < 0) || (lhs < 0 && rhs > 0)
+
+                const zero = Temporary.init(lhs.ty, try self.constInt(lhs.ty, 0, .direct));
+                const lhs_negative = try self.buildCmp(.s_lt, lhs, zero);
+                const rhs_negative = try self.buildCmp(.s_lt, rhs, zero);
+                const lhs_positive = try self.buildCmp(.s_gt, lhs, zero);
+                const rhs_positive = try self.buildCmp(.s_gt, rhs, zero);
+
+                // Set to `true` if we expect -1.
+                const expected_overflow_bit = try self.buildBinary(
+                    .l_or,
+                    try self.buildBinary(.l_and, lhs_positive, rhs_negative),
+                    try self.buildBinary(.l_and, lhs_negative, rhs_positive),
+                );
+
+                if (maybe_op_ty_bits) |op_ty_bits| {
+                    const op_ty = try mod.intType(.signed, op_ty_bits);
+                    // Assume normalized; sign bit is set. We want a sign extend.
+                    const casted_lhs = try self.buildIntConvert(op_ty, lhs);
+                    const casted_rhs = try self.buildIntConvert(op_ty, rhs);
+
+                    const full_result = try self.buildBinary(.i_mul, casted_lhs, casted_rhs);
+
+                    // Truncate to the result type.
+                    const low_bits = try self.buildIntConvert(lhs.ty, full_result);
+                    const result = try self.normalize(low_bits, info);
+
+                    // Now, we need to check the overflow bits AND the sign
+                    // bit for the expceted overflow bits.
+                    // To do that, shift out everything bit the sign bit and
+                    // then check what remains.
+                    const shift = Temporary.init(full_result.ty, try self.constInt(full_result.ty, info.bits - 1, .direct));
+                    // Use SRA so that any sign bits are duplicated. Now we can just check if ALL bits are set
+                    // for negative cases.
+                    const overflow = try self.buildBinary(.sra, full_result, shift);
+
+                    const long_all_set = Temporary.init(full_result.ty, try self.constInt(full_result.ty, -1, .direct));
+                    const long_zero = Temporary.init(full_result.ty, try self.constInt(full_result.ty, 0, .direct));
+                    const mask = try self.buildSelect(expected_overflow_bit, long_all_set, long_zero);
+
+                    const overflowed = try self.buildCmp(.i_ne, mask, overflow);
+
+                    break :blk .{ result, overflowed };
+                }
+
+                const low_bits, const high_bits = try self.buildWideMul(.s_mul_extended, lhs, rhs);
+
+                // Truncate result if required.
+                const result = try self.normalize(low_bits, info);
+
+                const all_set = Temporary.init(lhs.ty, try self.constInt(lhs.ty, -1, .direct));
+                const mask = try self.buildSelect(expected_overflow_bit, all_set, zero);
+
+                // Like with unsigned, overflow happened if high_bits are not the ones we expect,
+                // and we also need to check some ones from the low bits.
+
+                const high_overflowed = try self.buildCmp(.i_ne, mask, high_bits);
+
+                // If no overflow bits in low_bits, no extra work needs to be done.
+                // Careful, we still have to check the sign bit, so this branch
+                // only goes for i33 and such.
+                if (info.backing_bits == info.bits + 1) {
+                    break :blk .{ result, high_overflowed };
+                }
+
+                // Shift the result bits away to get the overflow bits.
+                const shift = Temporary.init(lhs.ty, try self.constInt(lhs.ty, info.bits - 1, .direct));
+                // Use SRA so that any sign bits are duplicated. Now we can just check if ALL bits are set
+                // for negative cases.
+                const low_overflow = try self.buildBinary(.sra, low_bits, shift);
+                const low_overflowed = try self.buildCmp(.i_ne, mask, low_overflow);
+
+                const overflowed = try self.buildBinary(.l_or, low_overflowed, high_overflowed);
+
+                break :blk .{ result, overflowed };
+            },
+        };
+
+        const ov = try self.intFromBool(overflowed);
 
         return try self.constructStruct(
             result_ty,
-            &.{ operand_ty, ov_ty },
-            &.{ try wip_result.finalize(), try wip_ov.finalize() },
+            &.{ result.ty, ov.ty },
+            &.{ try result.materialize(self), try ov.materialize(self) },
         );
     }
 
     fn airShlOverflow(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const mod = self.module;
+
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
-        const lhs = try self.resolve(extra.lhs);
-        const rhs = try self.resolve(extra.rhs);
-
-        const result_ty = self.typeOfIndex(inst);
-        const operand_ty = self.typeOf(extra.lhs);
-        const shift_ty = self.typeOf(extra.rhs);
-        const scalar_shift_ty_id = try self.resolveType(shift_ty.scalarType(mod), .direct);
-        const scalar_operand_ty_id = try self.resolveType(operand_ty.scalarType(mod), .direct);
 
-        const ov_ty = result_ty.structFieldType(1, self.module);
+        const base = try self.temporary(extra.lhs);
+        const shift = try self.temporary(extra.rhs);
 
-        const bool_ty_id = try self.resolveType(Type.bool, .direct);
-        const cmp_ty_id = if (self.isSpvVector(operand_ty))
-            // TODO: Resolving a vector type with .direct should return a SPIR-V vector
-            try self.spv.vectorType(operand_ty.vectorLen(mod), try self.resolveType(Type.bool, .direct))
-        else
-            bool_ty_id;
+        const result_ty = self.typeOfIndex(inst);
 
-        const info = self.arithmeticTypeInfo(operand_ty);
+        const info = self.arithmeticTypeInfo(base.ty);
         switch (info.class) {
-            .composite_integer => return self.todo("overflow shift for composite integers", .{}),
+            .composite_integer => unreachable, // TODO
             .integer, .strange_integer => {},
             .float, .bool => unreachable,
         }
 
-        var wip_result = try self.elementWise(operand_ty, false);
-        defer wip_result.deinit();
-        var wip_ov = try self.elementWise(ov_ty, false);
-        defer wip_ov.deinit();
-        for (wip_result.results, wip_ov.results, 0..) |*result_id, *ov_id, i| {
-            const lhs_elem_id = try wip_result.elementAt(operand_ty, lhs, i);
-            const rhs_elem_id = try wip_result.elementAt(shift_ty, rhs, i);
-
-            // Sometimes Zig doesn't make both of the arguments the same types here. SPIR-V expects that,
-            // so just manually upcast it if required.
-            const shift_id = if (scalar_shift_ty_id != scalar_operand_ty_id) blk: {
-                const shift_id = self.spv.allocId();
-                try self.func.body.emit(self.spv.gpa, .OpUConvert, .{
-                    .id_result_type = wip_result.ty_id,
-                    .id_result = shift_id,
-                    .unsigned_value = rhs_elem_id,
-                });
-                break :blk shift_id;
-            } else rhs_elem_id;
-
-            const value_id = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpShiftLeftLogical, .{
-                .id_result_type = wip_result.ty_id,
-                .id_result = value_id,
-                .base = lhs_elem_id,
-                .shift = shift_id,
-            });
-            result_id.* = try self.normalize(wip_result.ty, value_id, info);
+        // Sometimes Zig doesn't make both of the arguments the same types here. SPIR-V expects that,
+        // so just manually upcast it if required.
+        const casted_shift = try self.buildIntConvert(base.ty.scalarType(mod), shift);
 
-            const right_shift_id = self.spv.allocId();
-            switch (info.signedness) {
-                .signed => {
-                    try self.func.body.emit(self.spv.gpa, .OpShiftRightArithmetic, .{
-                        .id_result_type = wip_result.ty_id,
-                        .id_result = right_shift_id,
-                        .base = result_id.*,
-                        .shift = shift_id,
-                    });
-                },
-                .unsigned => {
-                    try self.func.body.emit(self.spv.gpa, .OpShiftRightLogical, .{
-                        .id_result_type = wip_result.ty_id,
-                        .id_result = right_shift_id,
-                        .base = result_id.*,
-                        .shift = shift_id,
-                    });
-                },
-            }
+        const left = try self.buildBinary(.sll, base, casted_shift);
+        const result = try self.normalize(left, info);
 
-            const overflowed_id = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpINotEqual, .{
-                .id_result_type = cmp_ty_id,
-                .id_result = overflowed_id,
-                .operand_1 = lhs_elem_id,
-                .operand_2 = right_shift_id,
-            });
+        const right = switch (info.signedness) {
+            .unsigned => try self.buildBinary(.srl, result, casted_shift),
+            .signed => try self.buildBinary(.sra, result, casted_shift),
+        };
 
-            ov_id.* = try self.intFromBool(wip_ov.ty, overflowed_id);
-        }
+        const overflowed = try self.buildCmp(.i_ne, base, right);
+        const ov = try self.intFromBool(overflowed);
 
         return try self.constructStruct(
             result_ty,
-            &.{ operand_ty, ov_ty },
-            &.{ try wip_result.finalize(), try wip_ov.finalize() },
+            &.{ result.ty, ov.ty },
+            &.{ try result.materialize(self), try ov.materialize(self) },
         );
     }
 
@@ -3274,122 +3936,67 @@ const DeclGen = struct {
         const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
         const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
 
-        const mulend1 = try self.resolve(extra.lhs);
-        const mulend2 = try self.resolve(extra.rhs);
-        const addend = try self.resolve(pl_op.operand);
-
-        const ty = self.typeOfIndex(inst);
+        const a = try self.temporary(extra.lhs);
+        const b = try self.temporary(extra.rhs);
+        const c = try self.temporary(pl_op.operand);
 
-        const info = self.arithmeticTypeInfo(ty);
+        const result_ty = self.typeOfIndex(inst);
+        const info = self.arithmeticTypeInfo(result_ty);
         assert(info.class == .float); // .mul_add is only emitted for floats
 
-        var wip = try self.elementWise(ty, false);
-        defer wip.deinit();
-        for (0..wip.results.len) |i| {
-            const mul_result = self.spv.allocId();
-            try self.func.body.emit(self.spv.gpa, .OpFMul, .{
-                .id_result_type = wip.ty_id,
-                .id_result = mul_result,
-                .operand_1 = try wip.elementAt(ty, mulend1, i),
-                .operand_2 = try wip.elementAt(ty, mulend2, i),
-            });
-
-            try self.func.body.emit(self.spv.gpa, .OpFAdd, .{
-                .id_result_type = wip.ty_id,
-                .id_result = wip.allocId(i),
-                .operand_1 = mul_result,
-                .operand_2 = try wip.elementAt(ty, addend, i),
-            });
-        }
-        return try wip.finalize();
+        const result = try self.buildFma(a, b, c);
+        return try result.materialize(self);
     }
 
-    fn airClzCtz(self: *DeclGen, inst: Air.Inst.Index, op: enum { clz, ctz }) !?IdRef {
+    fn airClzCtz(self: *DeclGen, inst: Air.Inst.Index, op: UnaryOp) !?IdRef {
         if (self.liveness.isUnused(inst)) return null;
 
         const mod = self.module;
         const target = self.getTarget();
         const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-        const result_ty = self.typeOfIndex(inst);
-        const operand_ty = self.typeOf(ty_op.operand);
-        const operand = try self.resolve(ty_op.operand);
+        const operand = try self.temporary(ty_op.operand);
 
-        const info = self.arithmeticTypeInfo(operand_ty);
+        const scalar_result_ty = self.typeOfIndex(inst).scalarType(mod);
+
+        const info = self.arithmeticTypeInfo(operand.ty);
         switch (info.class) {
             .composite_integer => unreachable, // TODO
             .integer, .strange_integer => {},
             .float, .bool => unreachable,
         }
 
-        var wip = try self.elementWise(result_ty, false);
-        defer wip.deinit();
-
-        const elem_ty = if (wip.is_array) operand_ty.scalarType(mod) else operand_ty;
-        const elem_ty_id = try self.resolveType(elem_ty, .direct);
-
-        for (wip.results, 0..) |*result_id, i| {
-            const elem = try wip.elementAt(operand_ty, operand, i);
-
-            switch (target.os.tag) {
-                .opencl => {
-                    const set = try self.spv.importInstructionSet(.@"OpenCL.std");
-                    const ext_inst: u32 = switch (op) {
-                        .clz => 151, // clz
-                        .ctz => 152, // ctz
-                    };
+        switch (target.os.tag) {
+            .vulkan => unreachable, // TODO
+            else => {},
+        }
 
-                    // Note: result of OpenCL ctz/clz returns operand_ty, and we want result_ty.
-                    // result_ty is always large enough to hold the result, so we might have to down
-                    // cast it.
-                    const tmp = self.spv.allocId();
-                    try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
-                        .id_result_type = elem_ty_id,
-                        .id_result = tmp,
-                        .set = set,
-                        .instruction = .{ .inst = ext_inst },
-                        .id_ref_4 = &.{elem},
-                    });
+        const count = try self.buildUnary(op, operand);
 
-                    // TODO: Comparison should be removed..
-                    // Its valid because SpvModule caches numeric types
-                    if (wip.ty_id == elem_ty_id) {
-                        result_id.* = tmp;
-                        continue;
-                    }
+        // Result of OpenCL ctz/clz returns operand.ty, and we want result_ty.
+        // result_ty is always large enough to hold the result, so we might have to down
+        // cast it.
+        const result = try self.buildIntConvert(scalar_result_ty, count);
+        return try result.materialize(self);
+    }
 
-                    result_id.* = self.spv.allocId();
-                    if (result_ty.scalarType(mod).isSignedInt(mod)) {
-                        assert(elem_ty.scalarType(mod).isSignedInt(mod));
-                        try self.func.body.emit(self.spv.gpa, .OpSConvert, .{
-                            .id_result_type = wip.ty_id,
-                            .id_result = result_id.*,
-                            .signed_value = tmp,
-                        });
-                    } else {
-                        assert(elem_ty.scalarType(mod).isUnsignedInt(mod));
-                        try self.func.body.emit(self.spv.gpa, .OpUConvert, .{
-                            .id_result_type = wip.ty_id,
-                            .id_result = result_id.*,
-                            .unsigned_value = tmp,
-                        });
-                    }
-                },
-                .vulkan => unreachable, // TODO
-                else => unreachable,
-            }
-        }
+    fn airSelect(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
+        const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+        const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+        const pred = try self.temporary(pl_op.operand);
+        const a = try self.temporary(extra.lhs);
+        const b = try self.temporary(extra.rhs);
 
-        return try wip.finalize();
+        const result = try self.buildSelect(pred, a, b);
+        return try result.materialize(self);
     }
 
     fn airSplat(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
         const operand_id = try self.resolve(ty_op.operand);
         const result_ty = self.typeOfIndex(inst);
-        var wip = try self.elementWise(result_ty, true);
-        defer wip.deinit();
-        @memset(wip.results, operand_id);
-        return try wip.finalize();
+
+        return try self.constructVectorSplat(result_ty, operand_id);
     }
 
     fn airReduce(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
@@ -3402,23 +4009,33 @@ const DeclGen = struct {
 
         const info = self.arithmeticTypeInfo(operand_ty);
 
-        var result_id = try self.extractVectorComponent(scalar_ty, operand, 0);
         const len = operand_ty.vectorLen(mod);
 
+        const first = try self.extractVectorComponent(scalar_ty, operand, 0);
+
         switch (reduce.operation) {
             .Min, .Max => |op| {
-                const cmp_op: std.math.CompareOperator = if (op == .Max) .gt else .lt;
+                var result = Temporary.init(scalar_ty, first);
+                const cmp_op: MinMax = switch (op) {
+                    .Max => .max,
+                    .Min => .min,
+                    else => unreachable,
+                };
                 for (1..len) |i| {
-                    const lhs = result_id;
-                    const rhs = try self.extractVectorComponent(scalar_ty, operand, @intCast(i));
-                    result_id = try self.minMax(scalar_ty, cmp_op, lhs, rhs);
+                    const lhs = result;
+                    const rhs_id = try self.extractVectorComponent(scalar_ty, operand, @intCast(i));
+                    const rhs = Temporary.init(scalar_ty, rhs_id);
+
+                    result = try self.minMax(lhs, rhs, cmp_op);
                 }
 
-                return result_id;
+                return try result.materialize(self);
             },
             else => {},
         }
 
+        var result_id = first;
+
         const opcode: Opcode = switch (info.class) {
             .bool => switch (reduce.operation) {
                 .And => .OpLogicalAnd,
@@ -3602,50 +4219,66 @@ const DeclGen = struct {
     fn cmp(
         self: *DeclGen,
         op: std.math.CompareOperator,
-        result_ty: Type,
-        ty: Type,
-        lhs_id: IdRef,
-        rhs_id: IdRef,
-    ) !IdRef {
+        lhs: Temporary,
+        rhs: Temporary,
+    ) !Temporary {
         const mod = self.module;
-        var cmp_lhs_id = lhs_id;
-        var cmp_rhs_id = rhs_id;
-        const bool_ty_id = try self.resolveType(Type.bool, .direct);
-        const op_ty = switch (ty.zigTypeTag(mod)) {
-            .Int, .Bool, .Float => ty,
-            .Enum => ty.intTagType(mod),
-            .ErrorSet => Type.u16,
-            .Pointer => blk: {
+        const scalar_ty = lhs.ty.scalarType(mod);
+        const is_vector = lhs.ty.isVector(mod);
+
+        switch (scalar_ty.zigTypeTag(mod)) {
+            .Int, .Bool, .Float => {},
+            .Enum => {
+                assert(!is_vector);
+                const ty = lhs.ty.intTagType(mod);
+                return try self.cmp(op, lhs.pun(ty), rhs.pun(ty));
+            },
+            .ErrorSet => {
+                assert(!is_vector);
+                return try self.cmp(op, lhs.pun(Type.u16), rhs.pun(Type.u16));
+            },
+            .Pointer => {
+                assert(!is_vector);
                 // Note that while SPIR-V offers OpPtrEqual and OpPtrNotEqual, they are
                 // currently not implemented in the SPIR-V LLVM translator. Thus, we emit these using
                 // OpConvertPtrToU...
-                cmp_lhs_id = self.spv.allocId();
-                cmp_rhs_id = self.spv.allocId();
 
                 const usize_ty_id = try self.resolveType(Type.usize, .direct);
 
+                const lhs_int_id = self.spv.allocId();
                 try self.func.body.emit(self.spv.gpa, .OpConvertPtrToU, .{
                     .id_result_type = usize_ty_id,
-                    .id_result = cmp_lhs_id,
-                    .pointer = lhs_id,
+                    .id_result = lhs_int_id,
+                    .pointer = try lhs.materialize(self),
                 });
 
+                const rhs_int_id = self.spv.allocId();
                 try self.func.body.emit(self.spv.gpa, .OpConvertPtrToU, .{
                     .id_result_type = usize_ty_id,
-                    .id_result = cmp_rhs_id,
-                    .pointer = rhs_id,
+                    .id_result = rhs_int_id,
+                    .pointer = try rhs.materialize(self),
                 });
 
-                break :blk Type.usize;
+                const lhs_int = Temporary.init(Type.usize, lhs_int_id);
+                const rhs_int = Temporary.init(Type.usize, rhs_int_id);
+                return try self.cmp(op, lhs_int, rhs_int);
             },
             .Optional => {
+                assert(!is_vector);
+
+                const ty = lhs.ty;
+
                 const payload_ty = ty.optionalChild(mod);
                 if (ty.optionalReprIsPayload(mod)) {
                     assert(payload_ty.hasRuntimeBitsIgnoreComptime(mod));
                     assert(!payload_ty.isSlice(mod));
-                    return self.cmp(op, Type.bool, payload_ty, lhs_id, rhs_id);
+
+                    return try self.cmp(op, lhs.pun(payload_ty), rhs.pun(payload_ty));
                 }
 
+                const lhs_id = try lhs.materialize(self);
+                const rhs_id = try rhs.materialize(self);
+
                 const lhs_valid_id = if (payload_ty.hasRuntimeBitsIgnoreComptime(mod))
                     try self.extractField(Type.bool, lhs_id, 1)
                 else
@@ -3656,8 +4289,11 @@ const DeclGen = struct {
                 else
                     try self.convertToDirect(Type.bool, rhs_id);
 
+                const lhs_valid = Temporary.init(Type.bool, lhs_valid_id);
+                const rhs_valid = Temporary.init(Type.bool, rhs_valid_id);
+
                 if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) {
-                    return try self.cmp(op, Type.bool, Type.bool, lhs_valid_id, rhs_valid_id);
+                    return try self.cmp(op, lhs_valid, rhs_valid);
                 }
 
                 // a = lhs_valid
@@ -3678,118 +4314,71 @@ const DeclGen = struct {
                 const lhs_pl_id = try self.extractField(payload_ty, lhs_id, 0);
                 const rhs_pl_id = try self.extractField(payload_ty, rhs_id, 0);
 
-                switch (op) {
-                    .eq => {
-                        const valid_eq_id = try self.cmp(.eq, Type.bool, Type.bool, lhs_valid_id, rhs_valid_id);
-                        const pl_eq_id = try self.cmp(op, Type.bool, payload_ty, lhs_pl_id, rhs_pl_id);
-                        const lhs_not_valid_id = self.spv.allocId();
-                        try self.func.body.emit(self.spv.gpa, .OpLogicalNot, .{
-                            .id_result_type = bool_ty_id,
-                            .id_result = lhs_not_valid_id,
-                            .operand = lhs_valid_id,
-                        });
-                        const impl_id = self.spv.allocId();
-                        try self.func.body.emit(self.spv.gpa, .OpLogicalOr, .{
-                            .id_result_type = bool_ty_id,
-                            .id_result = impl_id,
-                            .operand_1 = lhs_not_valid_id,
-                            .operand_2 = pl_eq_id,
-                        });
-                        const result_id = self.spv.allocId();
-                        try self.func.body.emit(self.spv.gpa, .OpLogicalAnd, .{
-                            .id_result_type = bool_ty_id,
-                            .id_result = result_id,
-                            .operand_1 = valid_eq_id,
-                            .operand_2 = impl_id,
-                        });
-                        return result_id;
-                    },
-                    .neq => {
-                        const valid_neq_id = try self.cmp(.neq, Type.bool, Type.bool, lhs_valid_id, rhs_valid_id);
-                        const pl_neq_id = try self.cmp(op, Type.bool, payload_ty, lhs_pl_id, rhs_pl_id);
-
-                        const impl_id = self.spv.allocId();
-                        try self.func.body.emit(self.spv.gpa, .OpLogicalAnd, .{
-                            .id_result_type = bool_ty_id,
-                            .id_result = impl_id,
-                            .operand_1 = lhs_valid_id,
-                            .operand_2 = pl_neq_id,
-                        });
-                        const result_id = self.spv.allocId();
-                        try self.func.body.emit(self.spv.gpa, .OpLogicalOr, .{
-                            .id_result_type = bool_ty_id,
-                            .id_result = result_id,
-                            .operand_1 = valid_neq_id,
-                            .operand_2 = impl_id,
-                        });
-                        return result_id;
-                    },
+                const lhs_pl = Temporary.init(payload_ty, lhs_pl_id);
+                const rhs_pl = Temporary.init(payload_ty, rhs_pl_id);
+
+                return switch (op) {
+                    .eq => try self.buildBinary(
+                        .l_and,
+                        try self.cmp(.eq, lhs_valid, rhs_valid),
+                        try self.buildBinary(
+                            .l_or,
+                            try self.buildUnary(.l_not, lhs_valid),
+                            try self.cmp(.eq, lhs_pl, rhs_pl),
+                        ),
+                    ),
+                    .neq => try self.buildBinary(
+                        .l_or,
+                        try self.cmp(.neq, lhs_valid, rhs_valid),
+                        try self.buildBinary(
+                            .l_and,
+                            lhs_valid,
+                            try self.cmp(.neq, lhs_pl, rhs_pl),
+                        ),
+                    ),
                     else => unreachable,
-                }
-            },
-            .Vector => {
-                var wip = try self.elementWise(result_ty, true);
-                defer wip.deinit();
-                const scalar_ty = ty.scalarType(mod);
-                for (wip.results, 0..) |*result_id, i| {
-                    const lhs_elem_id = try wip.elementAt(ty, lhs_id, i);
-                    const rhs_elem_id = try wip.elementAt(ty, rhs_id, i);
-                    result_id.* = try self.cmp(op, Type.bool, scalar_ty, lhs_elem_id, rhs_elem_id);
-                }
-                return wip.finalize();
+                };
             },
             else => unreachable,
-        };
+        }
 
-        const opcode: Opcode = opcode: {
-            const info = self.arithmeticTypeInfo(op_ty);
-            const signedness = switch (info.class) {
-                .composite_integer => {
-                    return self.todo("binary operations for composite integers", .{});
-                },
-                .float => break :opcode switch (op) {
-                    .eq => .OpFOrdEqual,
-                    .neq => .OpFUnordNotEqual,
-                    .lt => .OpFOrdLessThan,
-                    .lte => .OpFOrdLessThanEqual,
-                    .gt => .OpFOrdGreaterThan,
-                    .gte => .OpFOrdGreaterThanEqual,
-                },
-                .bool => break :opcode switch (op) {
-                    .eq => .OpLogicalEqual,
-                    .neq => .OpLogicalNotEqual,
-                    else => unreachable,
+        const info = self.arithmeticTypeInfo(scalar_ty);
+        const pred: CmpPredicate = switch (info.class) {
+            .composite_integer => unreachable, // TODO
+            .float => switch (op) {
+                .eq => .f_oeq,
+                .neq => .f_une,
+                .lt => .f_olt,
+                .lte => .f_ole,
+                .gt => .f_ogt,
+                .gte => .f_oge,
+            },
+            .bool => switch (op) {
+                .eq => .l_eq,
+                .neq => .l_ne,
+                else => unreachable,
+            },
+            .integer, .strange_integer => switch (info.signedness) {
+                .signed => switch (op) {
+                    .eq => .i_eq,
+                    .neq => .i_ne,
+                    .lt => .s_lt,
+                    .lte => .s_le,
+                    .gt => .s_gt,
+                    .gte => .s_ge,
                 },
-                .integer, .strange_integer => info.signedness,
-            };
-
-            break :opcode switch (signedness) {
                 .unsigned => switch (op) {
-                    .eq => .OpIEqual,
-                    .neq => .OpINotEqual,
-                    .lt => .OpULessThan,
-                    .lte => .OpULessThanEqual,
-                    .gt => .OpUGreaterThan,
-                    .gte => .OpUGreaterThanEqual,
-                },
-                .signed => switch (op) {
-                    .eq => .OpIEqual,
-                    .neq => .OpINotEqual,
-                    .lt => .OpSLessThan,
-                    .lte => .OpSLessThanEqual,
-                    .gt => .OpSGreaterThan,
-                    .gte => .OpSGreaterThanEqual,
+                    .eq => .i_eq,
+                    .neq => .i_ne,
+                    .lt => .u_lt,
+                    .lte => .u_le,
+                    .gt => .u_gt,
+                    .gte => .u_ge,
                 },
-            };
+            },
         };
 
-        const result_id = self.spv.allocId();
-        try self.func.body.emitRaw(self.spv.gpa, opcode, 4);
-        self.func.body.writeOperand(spec.IdResultType, bool_ty_id);
-        self.func.body.writeOperand(spec.IdResult, result_id);
-        self.func.body.writeOperand(spec.IdResultType, cmp_lhs_id);
-        self.func.body.writeOperand(spec.IdResultType, cmp_rhs_id);
-        return result_id;
+        return try self.buildCmp(pred, lhs, rhs);
     }
 
     fn airCmp(
@@ -3798,24 +4387,22 @@ const DeclGen = struct {
         comptime op: std.math.CompareOperator,
     ) !?IdRef {
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
-        const lhs_id = try self.resolve(bin_op.lhs);
-        const rhs_id = try self.resolve(bin_op.rhs);
-        const ty = self.typeOf(bin_op.lhs);
-        const result_ty = self.typeOfIndex(inst);
+        const lhs = try self.temporary(bin_op.lhs);
+        const rhs = try self.temporary(bin_op.rhs);
 
-        return try self.cmp(op, result_ty, ty, lhs_id, rhs_id);
+        const result = try self.cmp(op, lhs, rhs);
+        return try result.materialize(self);
     }
 
     fn airVectorCmp(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const vec_cmp = self.air.extraData(Air.VectorCmp, ty_pl.payload).data;
-        const lhs_id = try self.resolve(vec_cmp.lhs);
-        const rhs_id = try self.resolve(vec_cmp.rhs);
+        const lhs = try self.temporary(vec_cmp.lhs);
+        const rhs = try self.temporary(vec_cmp.rhs);
         const op = vec_cmp.compareOperator();
-        const ty = self.typeOf(vec_cmp.lhs);
-        const result_ty = self.typeOfIndex(inst);
 
-        return try self.cmp(op, result_ty, ty, lhs_id, rhs_id);
+        const result = try self.cmp(op, lhs, rhs);
+        return try result.materialize(self);
     }
 
     /// Bitcast one type to another. Note: both types, input, output are expected in **direct** representation.
@@ -3881,7 +4468,8 @@ const DeclGen = struct {
         // should we change the representation of strange integers?
         if (dst_ty.zigTypeTag(mod) == .Int) {
             const info = self.arithmeticTypeInfo(dst_ty);
-            return try self.normalize(dst_ty, result_id, info);
+            const result = try self.normalize(Temporary.init(dst_ty, result_id), info);
+            return try result.materialize(self);
         }
 
         return result_id;
@@ -3897,46 +4485,28 @@ const DeclGen = struct {
 
     fn airIntCast(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-        const operand_id = try self.resolve(ty_op.operand);
-        const src_ty = self.typeOf(ty_op.operand);
+        const src = try self.temporary(ty_op.operand);
         const dst_ty = self.typeOfIndex(inst);
 
-        const src_info = self.arithmeticTypeInfo(src_ty);
+        const src_info = self.arithmeticTypeInfo(src.ty);
         const dst_info = self.arithmeticTypeInfo(dst_ty);
 
         if (src_info.backing_bits == dst_info.backing_bits) {
-            return operand_id;
+            return try src.materialize(self);
         }
 
-        var wip = try self.elementWise(dst_ty, false);
-        defer wip.deinit();
-        for (wip.results, 0..) |*result_id, i| {
-            const elem_id = try wip.elementAt(src_ty, operand_id, i);
-            const value_id = self.spv.allocId();
-            switch (dst_info.signedness) {
-                .signed => try self.func.body.emit(self.spv.gpa, .OpSConvert, .{
-                    .id_result_type = wip.ty_id,
-                    .id_result = value_id,
-                    .signed_value = elem_id,
-                }),
-                .unsigned => try self.func.body.emit(self.spv.gpa, .OpUConvert, .{
-                    .id_result_type = wip.ty_id,
-                    .id_result = value_id,
-                    .unsigned_value = elem_id,
-                }),
-            }
+        const converted = try self.buildIntConvert(dst_ty, src);
 
-            // Make sure to normalize the result if shrinking.
-            // Because strange ints are sign extended in their backing
-            // type, we don't need to normalize when growing the type. The
-            // representation is already the same.
-            if (dst_info.bits < src_info.bits) {
-                result_id.* = try self.normalize(wip.ty, value_id, dst_info);
-            } else {
-                result_id.* = value_id;
-            }
-        }
-        return try wip.finalize();
+        // Make sure to normalize the result if shrinking.
+        // Because strange ints are sign extended in their backing
+        // type, we don't need to normalize when growing the type. The
+        // representation is already the same.
+        const result = if (dst_info.bits < src_info.bits)
+            try self.normalize(converted, dst_info)
+        else
+            converted;
+
+        return try result.materialize(self);
     }
 
     fn intFromPtr(self: *DeclGen, operand_id: IdRef) !IdRef {
@@ -4011,16 +4581,9 @@ const DeclGen = struct {
 
     fn airIntFromBool(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
-        const operand_id = try self.resolve(un_op);
-        const result_ty = self.typeOfIndex(inst);
-
-        var wip = try self.elementWise(result_ty, false);
-        defer wip.deinit();
-        for (wip.results, 0..) |*result_id, i| {
-            const elem_id = try wip.elementAt(Type.bool, operand_id, i);
-            result_id.* = try self.intFromBool(wip.ty, elem_id);
-        }
-        return try wip.finalize();
+        const operand = try self.temporary(un_op);
+        const result = try self.intFromBool(operand);
+        return try result.materialize(self);
     }
 
     fn airFloatCast(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
@@ -4040,33 +4603,21 @@ const DeclGen = struct {
 
     fn airNot(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
         const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
-        const operand_id = try self.resolve(ty_op.operand);
+        const operand = try self.temporary(ty_op.operand);
         const result_ty = self.typeOfIndex(inst);
         const info = self.arithmeticTypeInfo(result_ty);
 
-        var wip = try self.elementWise(result_ty, false);
-        defer wip.deinit();
-
-        for (0..wip.results.len) |i| {
-            const args = .{
-                .id_result_type = wip.ty_id,
-                .id_result = wip.allocId(i),
-                .operand = try wip.elementAt(result_ty, operand_id, i),
-            };
-            switch (info.class) {
-                .bool => {
-                    try self.func.body.emit(self.spv.gpa, .OpLogicalNot, args);
-                },
-                .float => unreachable,
-                .composite_integer => unreachable, // TODO
-                .strange_integer, .integer => {
-                    // Note: strange integer bits will be masked before operations that do not hold under modulo.
-                    try self.func.body.emit(self.spv.gpa, .OpNot, args);
-                },
-            }
-        }
+        const result = switch (info.class) {
+            .bool => try self.buildUnary(.l_not, operand),
+            .float => unreachable,
+            .composite_integer => unreachable, // TODO
+            .strange_integer, .integer => blk: {
+                const complement = try self.buildUnary(.bit_not, operand);
+                break :blk try self.normalize(complement, info);
+            },
+        };
 
-        return try wip.finalize();
+        return try result.materialize(self);
     }
 
     fn airArrayToSlice(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
@@ -4338,8 +4889,11 @@ const DeclGen = struct {
         // For now, just generate a temporary and use that.
         // TODO: This backend probably also should use isByRef from llvm...
 
+        const is_vector = array_ty.isVector(mod);
+
+        const elem_repr: Repr = if (is_vector) .direct else .indirect;
         const ptr_array_ty_id = try self.ptrType2(array_ty, .Function, .direct);
-        const ptr_elem_ty_id = try self.ptrType2(elem_ty, .Function, .direct);
+        const ptr_elem_ty_id = try self.ptrType2(elem_ty, .Function, elem_repr);
 
         const tmp_id = self.spv.allocId();
         try self.func.prologue.emit(self.spv.gpa, .OpVariable, .{
@@ -4357,12 +4911,12 @@ const DeclGen = struct {
 
         const result_id = self.spv.allocId();
         try self.func.body.emit(self.spv.gpa, .OpLoad, .{
-            .id_result_type = try self.resolveType(elem_ty, .direct),
+            .id_result_type = try self.resolveType(elem_ty, elem_repr),
             .id_result = result_id,
             .pointer = elem_ptr_id,
         });
 
-        if (array_ty.isVector(mod)) {
+        if (is_vector) {
             // Result is already in direct representation
             return result_id;
         }
@@ -4585,7 +5139,10 @@ const DeclGen = struct {
             if (field_offset == 0) break :base_ptr_int field_ptr_int;
 
             const field_offset_id = try self.constInt(Type.usize, field_offset, .direct);
-            break :base_ptr_int try self.binOpSimple(Type.usize, field_ptr_int, field_offset_id, .OpISub);
+            const field_ptr_tmp = Temporary.init(Type.usize, field_ptr_int);
+            const field_offset_tmp = Temporary.init(Type.usize, field_offset_id);
+            const result = try self.buildBinary(.i_sub, field_ptr_tmp, field_offset_tmp);
+            break :base_ptr_int try result.materialize(self);
         };
 
         const base_ptr = self.spv.allocId();
@@ -5400,13 +5957,17 @@ const DeclGen = struct {
             else
                 loaded_id;
 
-            const payload_ty_id = try self.resolveType(ptr_ty, .direct);
-            const null_id = try self.spv.constNull(payload_ty_id);
+            const ptr_ty_id = try self.resolveType(ptr_ty, .direct);
+            const null_id = try self.spv.constNull(ptr_ty_id);
+            const null_tmp = Temporary.init(ptr_ty, null_id);
+            const ptr = Temporary.init(ptr_ty, ptr_id);
+
             const op: std.math.CompareOperator = switch (pred) {
                 .is_null => .eq,
                 .is_non_null => .neq,
             };
-            return try self.cmp(op, Type.bool, ptr_ty, ptr_id, null_id);
+            const result = try self.cmp(op, ptr, null_tmp);
+            return try result.materialize(self);
         }
 
         const is_non_null_id = blk: {
test/behavior/abs.zig
@@ -152,7 +152,6 @@ test "@abs int vectors" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
     try comptime testAbsIntVectors(1);
     try testAbsIntVectors(1);
test/behavior/floatop.zig
@@ -275,7 +275,6 @@ test "@sqrt f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -287,7 +286,6 @@ test "@sqrt f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -389,7 +387,6 @@ test "@sqrt with vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try testSqrtWithVectors();
@@ -410,7 +407,6 @@ test "@sin f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -422,7 +418,6 @@ test "@sin f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -464,7 +459,6 @@ test "@sin with vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -486,7 +480,6 @@ test "@cos f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -498,7 +491,6 @@ test "@cos f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -540,7 +532,6 @@ test "@cos with vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -574,7 +565,6 @@ test "@tan f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -616,7 +606,6 @@ test "@tan with vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -638,7 +627,6 @@ test "@exp f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -650,7 +638,6 @@ test "@exp f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -696,7 +683,6 @@ test "@exp with vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -718,7 +704,6 @@ test "@exp2 f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -730,7 +715,6 @@ test "@exp2 f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -771,7 +755,6 @@ test "@exp2 with @vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -793,7 +776,6 @@ test "@log f16" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -805,7 +787,6 @@ test "@log f32/f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -847,7 +828,6 @@ test "@log with @vectors" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -866,7 +846,6 @@ test "@log2 f16" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -878,7 +857,6 @@ test "@log2 f32/f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -919,7 +897,6 @@ test "@log2 with vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     // https://github.com/ziglang/zig/issues/13681
     if (builtin.zig_backend == .stage2_llvm and
@@ -945,7 +922,6 @@ test "@log10 f16" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -957,7 +933,6 @@ test "@log10 f32/f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -998,7 +973,6 @@ test "@log10 with vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -1243,7 +1217,6 @@ test "@ceil f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try testCeil(f16);
@@ -1255,7 +1228,6 @@ test "@ceil f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try testCeil(f32);
@@ -1320,7 +1292,6 @@ test "@ceil with vectors" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and
         !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
@@ -1344,7 +1315,6 @@ test "@trunc f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isMIPS()) {
@@ -1361,7 +1331,6 @@ test "@trunc f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isMIPS()) {
@@ -1430,7 +1399,6 @@ fn testTrunc(comptime T: type) !void {
 test "@trunc with vectors" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and
@@ -1454,7 +1422,6 @@ test "neg f16" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
@@ -1472,7 +1439,6 @@ test "neg f32/f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
test/behavior/math.zig
@@ -440,7 +440,6 @@ test "division" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
@@ -530,7 +529,6 @@ test "division half-precision floats" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -622,7 +620,6 @@ test "negation wrapping" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
     try expectEqual(@as(u1, 1), negateWrap(u1, 1));
 }
@@ -1031,6 +1028,60 @@ test "@mulWithOverflow bitsize > 32" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
+    {
+        var a: u40 = 3;
+        var b: u40 = 0x55_5555_5555;
+        var ov = @mulWithOverflow(a, b);
+
+        try expect(ov[0] == 0xff_ffff_ffff);
+        try expect(ov[1] == 0);
+
+        // Check that overflow bits in the low-word of wide-multiplications are checked too.
+        // Intermediate result is less than 2**64
+        b = 0x55_5555_5556;
+        ov = @mulWithOverflow(a, b);
+        try expect(ov[0] == 2);
+        try expect(ov[1] == 1);
+
+        // Check that overflow bits in the high-word of wide-multiplications are checked too.
+        // Intermediate result is more than 2**64 and bits 40..64 are not set.
+        a = 0x10_0000_0000;
+        b = 0x10_0000_0000;
+        ov = @mulWithOverflow(a, b);
+        try expect(ov[0] == 0);
+        try expect(ov[1] == 1);
+    }
+
+    {
+        var a: i40 = 3;
+        var b: i40 = -0x2a_aaaa_aaaa;
+        var ov = @mulWithOverflow(a, b);
+
+        try expect(ov[0] == -0x7f_ffff_fffe);
+        try expect(ov[1] == 0);
+
+        // Check that the sign bit is properly checked
+        b = -0x2a_aaaa_aaab;
+        ov = @mulWithOverflow(a, b);
+        try expect(ov[0] == 0x7f_ffff_ffff);
+        try expect(ov[1] == 1);
+
+        // Check that the low-order bits above the sign are checked.
+        a = 6;
+        ov = @mulWithOverflow(a, b);
+        try expect(ov[0] == -2);
+        try expect(ov[1] == 1);
+
+        // Check that overflow bits in the high-word of wide-multiplications are checked too.
+        // high parts and sign of low-order bits are all 1.
+        a = 0x08_0000_0000;
+        b = -0x08_0000_0001;
+        ov = @mulWithOverflow(a, b);
+
+        try expect(ov[0] == -0x8_0000_0000);
+        try expect(ov[1] == 1);
+    }
+
     {
         var a: u62 = 3;
         _ = &a;
@@ -1580,7 +1631,6 @@ test "@round f16" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -1592,7 +1642,6 @@ test "@round f32/f64" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
test/behavior/select.zig
@@ -8,7 +8,6 @@ test "@select vectors" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try comptime selectVectors();
@@ -39,7 +38,6 @@ test "@select arrays" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and
         !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .avx2)) return error.SkipZigTest;
test/behavior/vector.zig
@@ -548,7 +548,6 @@ test "vector division operators" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_llvm and comptime builtin.cpu.arch.isArmOrThumb()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const S = struct {