Commit `58b9200106`

Ali Cheraghi <alichraghi@proton.me>

2025-08-03 09:44:10

spirv: use packed struct's backing int type for shift value

master

1 parent d15a7b1

Changed files (2)

src

arch

spirv

CodeGen.zig

Module.zig

@@ -431,15 +431,12 @@ fn resolveUav(cg: *CodeGen, val: InternPool.Index) !Id {
     const zcu = cg.module.zcu;
     const ty: Type = .fromInterned(zcu.intern_pool.typeOf(val));
     const ty_id = try cg.resolveType(ty, .indirect);
-    const decl_ptr_ty_id = try cg.module.ptrType(ty_id, cg.module.storageClass(.generic));
 
     const spv_decl_index = blk: {
         const entry = try cg.module.uav_link.getOrPut(cg.module.gpa, .{ val, .function });
         if (entry.found_existing) {
             try cg.addFunctionDep(entry.value_ptr.*, .function);
-
-            const result_id = cg.module.declPtr(entry.value_ptr.*).result_id;
-            return try cg.castToGeneric(decl_ptr_ty_id, result_id);
+            return cg.module.declPtr(entry.value_ptr.*).result_id;
         }
 
         const spv_decl_index = try cg.module.allocDecl(.invocation_global);
@@ -520,7 +517,7 @@ fn resolveUav(cg: *CodeGen, val: InternPool.Index) !Id {
         });
     }
 
-    return try cg.castToGeneric(decl_ptr_ty_id, result_id);
+    return result_id;
 }
 
 fn addFunctionDep(cg: *CodeGen, decl_index: Module.Decl.Index, storage_class: StorageClass) !void {
@@ -535,21 +532,6 @@ fn addFunctionDep(cg: *CodeGen, decl_index: Module.Decl.Index, storage_class: St
     }
 }
 
-fn castToGeneric(cg: *CodeGen, type_id: Id, ptr_id: Id) !Id {
-    const target = cg.module.zcu.getTarget();
-    if (target.cpu.has(.spirv, .generic_pointer)) {
-        const result_id = cg.module.allocId();
-        try cg.body.emit(cg.module.gpa, .OpPtrCastToGeneric, .{
-            .id_result_type = type_id,
-            .id_result = result_id,
-            .pointer = ptr_id,
-        });
-        return result_id;
-    }
-
-    return ptr_id;
-}
-
 /// Start a new SPIR-V block, Emits the label of the new block, and stores which
 /// block we are currently generating.
 /// Note that there is no such thing as nested blocks like in ZIR or AIR, so we don't need to
@@ -1209,11 +1191,7 @@ fn constantNavRef(cg: *CodeGen, ty: Type, nav_index: InternPool.Nav.Index) !Id {
 
     const spv_decl_index = try cg.module.resolveNav(ip, nav_index);
     const spv_decl = cg.module.declPtr(spv_decl_index);
-
-    const decl_id = switch (spv_decl.kind) {
-        .func => unreachable, // TODO: Is this possible?
-        .global, .invocation_global => spv_decl.result_id,
-    };
+    assert(spv_decl.kind != .func);
 
     const storage_class = cg.module.storageClass(nav.getAddrspace());
     try cg.addFunctionDep(spv_decl_index, storage_class);
@@ -1221,23 +1199,18 @@ fn constantNavRef(cg: *CodeGen, ty: Type, nav_index: InternPool.Nav.Index) !Id {
     const nav_ty_id = try cg.resolveType(nav_ty, .indirect);
     const decl_ptr_ty_id = try cg.module.ptrType(nav_ty_id, storage_class);
 
-    const ptr_id = switch (storage_class) {
-        .generic => try cg.castToGeneric(decl_ptr_ty_id, decl_id),
-        else => decl_id,
-    };
-
     if (decl_ptr_ty_id != ty_id) {
         // Differing pointer types, insert a cast.
         const casted_ptr_id = cg.module.allocId();
         try cg.body.emit(cg.module.gpa, .OpBitcast, .{
             .id_result_type = ty_id,
             .id_result = casted_ptr_id,
-            .operand = ptr_id,
+            .operand = spv_decl.result_id,
         });
         return casted_ptr_id;
-    } else {
-        return ptr_id;
     }
+
+    return spv_decl.result_id;
 }
 
 // Turn a Zig type's name into a cache reference.
@@ -2120,28 +2093,7 @@ fn buildSelect(cg: *CodeGen, condition: Temporary, lhs: Temporary, rhs: Temporar
     return v.finalize(result_ty, results);
 }
 
-const CmpPredicate = enum {
-    l_eq,
-    l_ne,
-    i_ne,
-    i_eq,
-    s_lt,
-    s_gt,
-    s_le,
-    s_ge,
-    u_lt,
-    u_gt,
-    u_le,
-    u_ge,
-    f_oeq,
-    f_une,
-    f_olt,
-    f_ole,
-    f_ogt,
-    f_oge,
-};
-
-fn buildCmp(cg: *CodeGen, pred: CmpPredicate, lhs: Temporary, rhs: Temporary) !Temporary {
+fn buildCmp(cg: *CodeGen, opcode: Opcode, lhs: Temporary, rhs: Temporary) !Temporary {
     const v = cg.vectorization(.{ lhs, rhs });
     const ops = v.components();
     const results = cg.module.allocIds(ops);
@@ -2153,27 +2105,6 @@ fn buildCmp(cg: *CodeGen, pred: CmpPredicate, lhs: Temporary, rhs: Temporary) !T
     const op_lhs = try v.prepare(cg, lhs);
     const op_rhs = try v.prepare(cg, rhs);
 
-    const opcode: Opcode = switch (pred) {
-        .l_eq => .OpLogicalEqual,
-        .l_ne => .OpLogicalNotEqual,
-        .i_eq => .OpIEqual,
-        .i_ne => .OpINotEqual,
-        .s_lt => .OpSLessThan,
-        .s_gt => .OpSGreaterThan,
-        .s_le => .OpSLessThanEqual,
-        .s_ge => .OpSGreaterThanEqual,
-        .u_lt => .OpULessThan,
-        .u_gt => .OpUGreaterThan,
-        .u_le => .OpULessThanEqual,
-        .u_ge => .OpUGreaterThanEqual,
-        .f_oeq => .OpFOrdEqual,
-        .f_une => .OpFUnordNotEqual,
-        .f_olt => .OpFOrdLessThan,
-        .f_ole => .OpFOrdLessThanEqual,
-        .f_ogt => .OpFOrdGreaterThan,
-        .f_oge => .OpFOrdGreaterThanEqual,
-    };
-
     for (0..ops) |i| {
         try cg.body.emitRaw(cg.module.gpa, opcode, 4);
         cg.body.writeOperand(Id, op_result_ty_id);
@@ -2278,7 +2209,10 @@ fn buildUnary(cg: *CodeGen, op: UnaryOp, operand: Temporary) !Temporary {
                 .log,
                 .log2,
                 .log10,
-                => return cg.todo("implement unary operation '{s}' for {s} os", .{ @tagName(op), @tagName(target.os.tag) }),
+                => return cg.todo(
+                    "implement unary operation '{s}' for {s} os",
+                    .{ @tagName(op), @tagName(target.os.tag) },
+                ),
                 else => unreachable,
             },
             else => unreachable,
@@ -2298,40 +2232,8 @@ fn buildUnary(cg: *CodeGen, op: UnaryOp, operand: Temporary) !Temporary {
     return v.finalize(result_ty, results);
 }
 
-const BinaryOp = enum {
-    i_add,
-    f_add,
-    i_sub,
-    f_sub,
-    i_mul,
-    f_mul,
-    s_div,
-    u_div,
-    f_div,
-    s_rem,
-    f_rem,
-    s_mod,
-    u_mod,
-    f_mod,
-    srl,
-    sra,
-    sll,
-    bit_and,
-    bit_or,
-    bit_xor,
-    f_max,
-    s_max,
-    u_max,
-    f_min,
-    s_min,
-    u_min,
-    l_and,
-    l_or,
-};
-
-fn buildBinary(cg: *CodeGen, op: BinaryOp, lhs: Temporary, rhs: Temporary) !Temporary {
+fn buildBinary(cg: *CodeGen, opcode: Opcode, lhs: Temporary, rhs: Temporary) !Temporary {
     const zcu = cg.module.zcu;
-    const target = cg.module.zcu.getTarget();
 
     const v = cg.vectorization(.{ lhs, rhs });
     const ops = v.components();
@@ -2344,73 +2246,12 @@ fn buildBinary(cg: *CodeGen, op: BinaryOp, lhs: Temporary, rhs: Temporary) !Temp
     const op_lhs = try v.prepare(cg, lhs);
     const op_rhs = try v.prepare(cg, rhs);
 
-    if (switch (op) {
-        .i_add => .OpIAdd,
-        .f_add => .OpFAdd,
-        .i_sub => .OpISub,
-        .f_sub => .OpFSub,
-        .i_mul => .OpIMul,
-        .f_mul => .OpFMul,
-        .s_div => .OpSDiv,
-        .u_div => .OpUDiv,
-        .f_div => .OpFDiv,
-        .s_rem => .OpSRem,
-        .f_rem => .OpFRem,
-        .s_mod => .OpSMod,
-        .u_mod => .OpUMod,
-        .f_mod => .OpFMod,
-        .srl => .OpShiftRightLogical,
-        .sra => .OpShiftRightArithmetic,
-        .sll => .OpShiftLeftLogical,
-        .bit_and => .OpBitwiseAnd,
-        .bit_or => .OpBitwiseOr,
-        .bit_xor => .OpBitwiseXor,
-        .l_and => .OpLogicalAnd,
-        .l_or => .OpLogicalOr,
-        else => @as(?Opcode, null),
-    }) |opcode| {
-        for (0..ops) |i| {
-            try cg.body.emitRaw(cg.module.gpa, opcode, 4);
-            cg.body.writeOperand(Id, op_result_ty_id);
-            cg.body.writeOperand(Id, results.at(i));
-            cg.body.writeOperand(Id, op_lhs.at(i));
-            cg.body.writeOperand(Id, op_rhs.at(i));
-        }
-    } else {
-        const set = try cg.importExtendedSet();
-
-        // TODO: Put these numbers in some definition
-        const extinst: u32 = switch (target.os.tag) {
-            .opencl => switch (op) {
-                .f_max => 27, // fmax
-                .s_max => 156, // s_max
-                .u_max => 157, // u_max
-                .f_min => 28, // fmin
-                .s_min => 158, // s_min
-                .u_min => 159, // u_min
-                else => unreachable,
-            },
-            .vulkan, .opengl => switch (op) {
-                .f_max => 40, // FMax
-                .s_max => 42, // SMax
-                .u_max => 41, // UMax
-                .f_min => 37, // FMin
-                .s_min => 39, // SMin
-                .u_min => 38, // UMin
-                else => unreachable,
-            },
-            else => unreachable,
-        };
-
-        for (0..ops) |i| {
-            try cg.body.emit(cg.module.gpa, .OpExtInst, .{
-                .id_result_type = op_result_ty_id,
-                .id_result = results.at(i),
-                .set = set,
-                .instruction = .{ .inst = extinst },
-                .id_ref_4 = &.{ op_lhs.at(i), op_rhs.at(i) },
-            });
-        }
+    for (0..ops) |i| {
+        try cg.body.emitRaw(cg.module.gpa, opcode, 4);
+        cg.body.writeOperand(Id, op_result_ty_id);
+        cg.body.writeOperand(Id, results.at(i));
+        cg.body.writeOperand(Id, op_lhs.at(i));
+        cg.body.writeOperand(Id, op_rhs.at(i));
     }
 
     return v.finalize(result_ty, results);
@@ -2420,10 +2261,7 @@ fn buildBinary(cg: *CodeGen, op: BinaryOp, lhs: Temporary, rhs: Temporary) !Temp
 /// or OpIMul and s_mul_hi or u_mul_hi on OpenCL.
 fn buildWideMul(
     cg: *CodeGen,
-    op: enum {
-        s_mul_extended,
-        u_mul_extended,
-    },
+    signedness: std.builtin.Signedness,
     lhs: Temporary,
     rhs: Temporary,
 ) !struct { Temporary, Temporary } {
@@ -2450,9 +2288,9 @@ fn buildWideMul(
             // OpUMulExtended. For these we will use the OpenCL s_mul_hi to compute the high-order bits
             // instead.
             const set = try cg.importExtendedSet();
-            const overflow_inst: u32 = switch (op) {
-                .s_mul_extended => 160, // s_mul_hi
-                .u_mul_extended => 203, // u_mul_hi
+            const overflow_inst: u32 = switch (signedness) {
+                .signed => 160, // s_mul_hi
+                .unsigned => 203, // u_mul_hi
             };
 
             for (0..ops) |i| {
@@ -2481,9 +2319,9 @@ fn buildWideMul(
             }));
             const op_result_ty_id = try cg.resolveType(op_result_ty, .direct);
 
-            const opcode: Opcode = switch (op) {
-                .s_mul_extended => .OpSMulExtended,
-                .u_mul_extended => .OpUMulExtended,
+            const opcode: Opcode = switch (signedness) {
+                .signed => .OpSMulExtended,
+                .unsigned => .OpUMulExtended,
             };
 
             for (0..ops) |i| {
@@ -2718,7 +2556,7 @@ fn convertToDirect(cg: *CodeGen, ty: Type, operand_id: Id) !Id {
             };
 
             const result = try cg.buildCmp(
-                .i_ne,
+                .OpINotEqual,
                 Temporary.init(operand_ty, operand_id),
                 Temporary.init(.u1, false_id),
             );
@@ -2817,9 +2655,9 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) Error!void {
     const air_tags = cg.air.instructions.items(.tag);
     const maybe_result_id: ?Id = switch (air_tags[@intFromEnum(inst)]) {
         // zig fmt: off
-            .add, .add_wrap, .add_optimized => try cg.airArithOp(inst, .f_add, .i_add, .i_add),
-            .sub, .sub_wrap, .sub_optimized => try cg.airArithOp(inst, .f_sub, .i_sub, .i_sub),
-            .mul, .mul_wrap, .mul_optimized => try cg.airArithOp(inst, .f_mul, .i_mul, .i_mul),
+            .add, .add_wrap, .add_optimized => try cg.airArithOp(inst, .OpFAdd, .OpIAdd, .OpIAdd),
+            .sub, .sub_wrap, .sub_optimized => try cg.airArithOp(inst, .OpFSub, .OpISub, .OpISub),
+            .mul, .mul_wrap, .mul_optimized => try cg.airArithOp(inst, .OpFMul, .OpIMul, .OpIMul),
 
             .sqrt => try cg.airUnOpSimple(inst, .sqrt),
             .sin => try cg.airUnOpSimple(inst, .sin),
@@ -2837,15 +2675,15 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) Error!void {
             .trunc_float => try cg.airUnOpSimple(inst, .trunc),
             .neg, .neg_optimized => try cg.airUnOpSimple(inst, .f_neg),
 
-            .div_float, .div_float_optimized => try cg.airArithOp(inst, .f_div, .s_div, .u_div),
+            .div_float, .div_float_optimized => try cg.airArithOp(inst, .OpFDiv, .OpSDiv, .OpUDiv),
             .div_floor, .div_floor_optimized => try cg.airDivFloor(inst),
             .div_trunc, .div_trunc_optimized => try cg.airDivTrunc(inst),
 
-            .rem, .rem_optimized => try cg.airArithOp(inst, .f_rem, .s_rem, .u_mod),
-            .mod, .mod_optimized => try cg.airArithOp(inst, .f_mod, .s_mod, .u_mod),
+            .rem, .rem_optimized => try cg.airArithOp(inst, .OpFRem, .OpSRem, .OpUMod),
+            .mod, .mod_optimized => try cg.airArithOp(inst, .OpFMod, .OpSMod, .OpUMod),
 
-            .add_with_overflow => try cg.airAddSubOverflow(inst, .i_add, .u_lt, .s_lt),
-            .sub_with_overflow => try cg.airAddSubOverflow(inst, .i_sub, .u_gt, .s_gt),
+            .add_with_overflow => try cg.airAddSubOverflow(inst, .OpIAdd, .OpULessThan, .OpSLessThan),
+            .sub_with_overflow => try cg.airAddSubOverflow(inst, .OpISub, .OpUGreaterThan, .OpSGreaterThan),
             .mul_with_overflow => try cg.airMulOverflow(inst),
             .shl_with_overflow => try cg.airShlOverflow(inst),
 
@@ -2864,14 +2702,14 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) Error!void {
             .ptr_add => try cg.airPtrAdd(inst),
             .ptr_sub => try cg.airPtrSub(inst),
 
-            .bit_and  => try cg.airBinOpSimple(inst, .bit_and),
-            .bit_or   => try cg.airBinOpSimple(inst, .bit_or),
-            .xor      => try cg.airBinOpSimple(inst, .bit_xor),
-            .bool_and => try cg.airBinOpSimple(inst, .l_and),
-            .bool_or  => try cg.airBinOpSimple(inst, .l_or),
+            .bit_and  => try cg.airBinOpSimple(inst, .OpBitwiseAnd),
+            .bit_or   => try cg.airBinOpSimple(inst, .OpBitwiseOr),
+            .xor      => try cg.airBinOpSimple(inst, .OpBitwiseXor),
+            .bool_and => try cg.airBinOpSimple(inst, .OpLogicalAnd),
+            .bool_or  => try cg.airBinOpSimple(inst, .OpLogicalOr),
 
-            .shl, .shl_exact => try cg.airShift(inst, .sll, .sll),
-            .shr, .shr_exact => try cg.airShift(inst, .srl, .sra),
+            .shl, .shl_exact => try cg.airShift(inst, .OpShiftLeftLogical, .OpShiftLeftLogical),
+            .shr, .shr_exact => try cg.airShift(inst, .OpShiftRightLogical, .OpShiftRightArithmetic),
 
             .min => try cg.airMinMax(inst, .min),
             .max => try cg.airMinMax(inst, .max),
@@ -2983,7 +2821,7 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) Error!void {
     try cg.inst_results.putNoClobber(gpa, inst, result_id);
 }
 
-fn airBinOpSimple(cg: *CodeGen, inst: Air.Inst.Index, op: BinaryOp) !?Id {
+fn airBinOpSimple(cg: *CodeGen, inst: Air.Inst.Index, op: Opcode) !?Id {
     const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
     const lhs = try cg.temporary(bin_op.lhs);
     const rhs = try cg.temporary(bin_op.rhs);
@@ -2992,7 +2830,7 @@ fn airBinOpSimple(cg: *CodeGen, inst: Air.Inst.Index, op: BinaryOp) !?Id {
     return try result.materialize(cg);
 }
 
-fn airShift(cg: *CodeGen, inst: Air.Inst.Index, unsigned: BinaryOp, signed: BinaryOp) !?Id {
+fn airShift(cg: *CodeGen, inst: Air.Inst.Index, unsigned: Opcode, signed: Opcode) !?Id {
     const zcu = cg.module.zcu;
     const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
 
@@ -3042,28 +2880,77 @@ fn airMinMax(cg: *CodeGen, inst: Air.Inst.Index, op: MinMax) !?Id {
 }
 
 fn minMax(cg: *CodeGen, lhs: Temporary, rhs: Temporary, op: MinMax) !Temporary {
+    const zcu = cg.module.zcu;
+    const target = zcu.getTarget();
     const info = cg.arithmeticTypeInfo(lhs.ty);
 
-    const binop: BinaryOp = switch (info.class) {
-        .float => switch (op) {
-            .min => .f_min,
-            .max => .f_max,
+    const v = cg.vectorization(.{ lhs, rhs });
+    const ops = v.components();
+    const results = cg.module.allocIds(ops);
+
+    const op_result_ty = lhs.ty.scalarType(zcu);
+    const op_result_ty_id = try cg.resolveType(op_result_ty, .direct);
+    const result_ty = try v.resultType(cg, lhs.ty);
+
+    const op_lhs = try v.prepare(cg, lhs);
+    const op_rhs = try v.prepare(cg, rhs);
+
+    const ext_inst: u32 = switch (target.os.tag) {
+        .opencl => switch (info.class) {
+            .float => switch (op) {
+                .min => 28, // fmin
+                .max => 27, // fmax
+            },
+            .integer,
+            .strange_integer,
+            .composite_integer,
+            => switch (info.signedness) {
+                .signed => switch (op) {
+                    .min => 158, // s_min
+                    .max => 156, // s_max
+                },
+                .unsigned => switch (op) {
+                    .min => 159, // u_min
+                    .max => 157, // u_max
+                },
+            },
+            .bool => unreachable,
         },
-        .integer, .strange_integer => switch (info.signedness) {
-            .signed => switch (op) {
-                .min => .s_min,
-                .max => .s_max,
+        .vulkan, .opengl => switch (info.class) {
+            .float => switch (op) {
+                .min => 37, // FMin
+                .max => 40, // FMax
             },
-            .unsigned => switch (op) {
-                .min => .u_min,
-                .max => .u_max,
+            .integer,
+            .strange_integer,
+            .composite_integer,
+            => switch (info.signedness) {
+                .signed => switch (op) {
+                    .min => 39, // SMin
+                    .max => 42, // SMax
+                },
+                .unsigned => switch (op) {
+                    .min => 38, // UMin
+                    .max => 41, // UMax
+                },
             },
+            .bool => unreachable,
         },
-        .composite_integer => unreachable, // TODO
-        .bool => unreachable,
+        else => unreachable,
     };
 
-    return try cg.buildBinary(binop, lhs, rhs);
+    const set = try cg.importExtendedSet();
+    for (0..ops) |i| {
+        try cg.body.emit(cg.module.gpa, .OpExtInst, .{
+            .id_result_type = op_result_ty_id,
+            .id_result = results.at(i),
+            .set = set,
+            .instruction = .{ .inst = ext_inst },
+            .id_ref_4 = &.{ op_lhs.at(i), op_rhs.at(i) },
+        });
+    }
+
+    return v.finalize(result_ty, results);
 }
 
 /// This function normalizes values to a canonical representation
@@ -3083,14 +2970,14 @@ fn normalize(cg: *CodeGen, value: Temporary, info: ArithmeticTypeInfo) !Temporar
             .unsigned => {
                 const mask_value = if (info.bits == 64) 0xFFFF_FFFF_FFFF_FFFF else (@as(u64, 1) << @as(u6, @intCast(info.bits))) - 1;
                 const mask_id = try cg.constInt(ty.scalarType(zcu), mask_value);
-                return try cg.buildBinary(.bit_and, value, Temporary.init(ty.scalarType(zcu), mask_id));
+                return try cg.buildBinary(.OpBitwiseAnd, value, Temporary.init(ty.scalarType(zcu), mask_id));
             },
             .signed => {
                 // Shift left and right so that we can copy the sight bit that way.
                 const shift_amt_id = try cg.constInt(ty.scalarType(zcu), info.backing_bits - info.bits);
                 const shift_amt: Temporary = .init(ty.scalarType(zcu), shift_amt_id);
-                const left = try cg.buildBinary(.sll, value, shift_amt);
-                return try cg.buildBinary(.sra, left, shift_amt);
+                const left = try cg.buildBinary(.OpShiftLeftLogical, value, shift_amt);
+                return try cg.buildBinary(.OpShiftRightArithmetic, left, shift_amt);
             },
         },
     }
@@ -3108,7 +2995,7 @@ fn airDivFloor(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
         .integer, .strange_integer => {
             switch (info.signedness) {
                 .unsigned => {
-                    const result = try cg.buildBinary(.u_div, lhs, rhs);
+                    const result = try cg.buildBinary(.OpUDiv, lhs, rhs);
                     return try result.materialize(cg);
                 },
                 .signed => {},
@@ -3118,26 +3005,26 @@ fn airDivFloor(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
             //   (a / b) - (a % b != 0 && a < 0 != b < 0);
             // There shouldn't be any overflow issues.
 
-            const div = try cg.buildBinary(.s_div, lhs, rhs);
-            const rem = try cg.buildBinary(.s_rem, lhs, rhs);
+            const div = try cg.buildBinary(.OpSDiv, lhs, rhs);
+            const rem = try cg.buildBinary(.OpSRem, lhs, rhs);
 
             const zero: Temporary = .init(lhs.ty, try cg.constInt(lhs.ty, 0));
 
-            const rem_is_not_zero = try cg.buildCmp(.i_ne, rem, zero);
+            const rem_is_not_zero = try cg.buildCmp(.OpINotEqual, rem, zero);
 
             const result_negative = try cg.buildCmp(
-                .l_ne,
-                try cg.buildCmp(.s_lt, lhs, zero),
-                try cg.buildCmp(.s_lt, rhs, zero),
+                .OpLogicalNotEqual,
+                try cg.buildCmp(.OpSLessThan, lhs, zero),
+                try cg.buildCmp(.OpSLessThan, rhs, zero),
             );
             const rem_is_not_zero_and_result_is_negative = try cg.buildBinary(
-                .l_and,
+                .OpLogicalAnd,
                 rem_is_not_zero,
                 result_negative,
             );
 
             const result = try cg.buildBinary(
-                .i_sub,
+                .OpISub,
                 div,
                 try cg.intFromBool2(rem_is_not_zero_and_result_is_negative, div.ty),
             );
@@ -3145,7 +3032,7 @@ fn airDivFloor(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
             return try result.materialize(cg);
         },
         .float => {
-            const div = try cg.buildBinary(.f_div, lhs, rhs);
+            const div = try cg.buildBinary(.OpFDiv, lhs, rhs);
             const result = try cg.buildUnary(.floor, div);
             return try result.materialize(cg);
         },
@@ -3164,16 +3051,16 @@ fn airDivTrunc(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
         .composite_integer => unreachable, // TODO
         .integer, .strange_integer => switch (info.signedness) {
             .unsigned => {
-                const result = try cg.buildBinary(.u_div, lhs, rhs);
+                const result = try cg.buildBinary(.OpUDiv, lhs, rhs);
                 return try result.materialize(cg);
             },
             .signed => {
-                const result = try cg.buildBinary(.s_div, lhs, rhs);
+                const result = try cg.buildBinary(.OpSDiv, lhs, rhs);
                 return try result.materialize(cg);
             },
         },
         .float => {
-            const div = try cg.buildBinary(.f_div, lhs, rhs);
+            const div = try cg.buildBinary(.OpFDiv, lhs, rhs);
             const result = try cg.buildUnary(.trunc, div);
             return try result.materialize(cg);
         },
@@ -3191,9 +3078,9 @@ fn airUnOpSimple(cg: *CodeGen, inst: Air.Inst.Index, op: UnaryOp) !?Id {
 fn airArithOp(
     cg: *CodeGen,
     inst: Air.Inst.Index,
-    comptime fop: BinaryOp,
-    comptime sop: BinaryOp,
-    comptime uop: BinaryOp,
+    comptime fop: Opcode,
+    comptime sop: Opcode,
+    comptime uop: Opcode,
 ) !?Id {
     const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
 
@@ -3253,11 +3140,11 @@ fn abs(cg: *CodeGen, result_ty: Type, value: Temporary) !Temporary {
 fn airAddSubOverflow(
     cg: *CodeGen,
     inst: Air.Inst.Index,
-    comptime add: BinaryOp,
-    comptime ucmp: CmpPredicate,
-    comptime scmp: CmpPredicate,
+    comptime add: Opcode,
+    u_opcode: Opcode,
+    s_opcode: Opcode,
 ) !?Id {
-    _ = scmp;
+    _ = s_opcode;
     // Note: OpIAddCarry and OpISubBorrow are not really useful here: For unsigned numbers,
     // there is in both cases only one extra operation required. For signed operations,
     // the overflow bit is set then going from 0x80.. to 0x00.., but this doesn't actually
@@ -3285,7 +3172,7 @@ fn airAddSubOverflow(
     const overflowed = switch (info.signedness) {
         // Overflow happened if the result is smaller than either of the operands. It doesn't matter which.
         // For subtraction the conditions need to be swapped.
-        .unsigned => try cg.buildCmp(ucmp, result, lhs),
+        .unsigned => try cg.buildCmp(u_opcode, result, lhs),
         // For signed operations, we check the signs of the operands and the result.
         .signed => blk: {
             // Signed overflow detection using the sign bits of the operands and the result.
@@ -3297,19 +3184,19 @@ fn airAddSubOverflow(
             //   (sign(a) != sign(b)) && (sign(a) != sign(result))
             const zero: Temporary = .init(rhs.ty, try cg.constInt(rhs.ty, 0));
 
-            const lhs_is_neg = try cg.buildCmp(.s_lt, lhs, zero);
-            const rhs_is_neg = try cg.buildCmp(.s_lt, rhs, zero);
-            const result_is_neg = try cg.buildCmp(.s_lt, result, zero);
+            const lhs_is_neg = try cg.buildCmp(.OpSLessThan, lhs, zero);
+            const rhs_is_neg = try cg.buildCmp(.OpSLessThan, rhs, zero);
+            const result_is_neg = try cg.buildCmp(.OpSLessThan, result, zero);
 
-            const signs_match = try cg.buildCmp(.l_eq, lhs_is_neg, rhs_is_neg);
-            const result_sign_differs = try cg.buildCmp(.l_ne, lhs_is_neg, result_is_neg);
+            const signs_match = try cg.buildCmp(.OpLogicalEqual, lhs_is_neg, rhs_is_neg);
+            const result_sign_differs = try cg.buildCmp(.OpLogicalNotEqual, lhs_is_neg, result_is_neg);
 
-            const overflow_condition = if (add == .i_add)
+            const overflow_condition = if (add == .OpIAdd)
                 signs_match
-            else // .i_sub
+            else // .OpISub
                 try cg.buildUnary(.l_not, signs_match);
 
-            break :blk try cg.buildBinary(.l_and, overflow_condition, result_sign_differs);
+            break :blk try cg.buildCmp(.OpLogicalAnd, overflow_condition, result_sign_differs);
         },
     };
 
@@ -3361,23 +3248,23 @@ fn airMulOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
                 const casted_lhs = try cg.buildConvert(op_ty, lhs);
                 const casted_rhs = try cg.buildConvert(op_ty, rhs);
 
-                const full_result = try cg.buildBinary(.i_mul, casted_lhs, casted_rhs);
+                const full_result = try cg.buildBinary(.OpIMul, casted_lhs, casted_rhs);
 
                 const low_bits = try cg.buildConvert(lhs.ty, full_result);
                 const result = try cg.normalize(low_bits, info);
 
                 // Shift the result bits away to get the overflow bits.
                 const shift: Temporary = .init(full_result.ty, try cg.constInt(full_result.ty, info.bits));
-                const overflow = try cg.buildBinary(.srl, full_result, shift);
+                const overflow = try cg.buildBinary(.OpShiftRightLogical, full_result, shift);
 
                 // Directly check if its zero in the op_ty without converting first.
                 const zero: Temporary = .init(full_result.ty, try cg.constInt(full_result.ty, 0));
-                const overflowed = try cg.buildCmp(.i_ne, zero, overflow);
+                const overflowed = try cg.buildCmp(.OpINotEqual, zero, overflow);
 
                 break :blk .{ result, overflowed };
             }
 
-            const low_bits, const high_bits = try cg.buildWideMul(.u_mul_extended, lhs, rhs);
+            const low_bits, const high_bits = try cg.buildWideMul(.unsigned, lhs, rhs);
 
             // Truncate the result, if required.
             const result = try cg.normalize(low_bits, info);
@@ -3386,17 +3273,17 @@ fn airMulOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
             // high bits of the low word of the result (those outside the range of the
             // int) are nonzero.
             const zero: Temporary = .init(lhs.ty, try cg.constInt(lhs.ty, 0));
-            const high_overflowed = try cg.buildCmp(.i_ne, zero, high_bits);
+            const high_overflowed = try cg.buildCmp(.OpINotEqual, zero, high_bits);
 
             // If no overflow bits in low_bits, no extra work needs to be done.
             if (info.backing_bits == info.bits) break :blk .{ result, high_overflowed };
 
             // Shift the result bits away to get the overflow bits.
             const shift: Temporary = .init(lhs.ty, try cg.constInt(lhs.ty, info.bits));
-            const low_overflow = try cg.buildBinary(.srl, low_bits, shift);
-            const low_overflowed = try cg.buildCmp(.i_ne, zero, low_overflow);
+            const low_overflow = try cg.buildBinary(.OpShiftRightLogical, low_bits, shift);
+            const low_overflowed = try cg.buildCmp(.OpINotEqual, zero, low_overflow);
 
-            const overflowed = try cg.buildBinary(.l_or, low_overflowed, high_overflowed);
+            const overflowed = try cg.buildCmp(.OpLogicalOr, low_overflowed, high_overflowed);
 
             break :blk .{ result, overflowed };
         },
@@ -3412,16 +3299,16 @@ fn airMulOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
             //   (lhs > 0 && rhs < 0) || (lhs < 0 && rhs > 0)
 
             const zero: Temporary = .init(lhs.ty, try cg.constInt(lhs.ty, 0));
-            const lhs_negative = try cg.buildCmp(.s_lt, lhs, zero);
-            const rhs_negative = try cg.buildCmp(.s_lt, rhs, zero);
-            const lhs_positive = try cg.buildCmp(.s_gt, lhs, zero);
-            const rhs_positive = try cg.buildCmp(.s_gt, rhs, zero);
+            const lhs_negative = try cg.buildCmp(.OpSLessThan, lhs, zero);
+            const rhs_negative = try cg.buildCmp(.OpSLessThan, rhs, zero);
+            const lhs_positive = try cg.buildCmp(.OpSGreaterThan, lhs, zero);
+            const rhs_positive = try cg.buildCmp(.OpSGreaterThan, rhs, zero);
 
             // Set to `true` if we expect -1.
             const expected_overflow_bit = try cg.buildBinary(
-                .l_or,
-                try cg.buildBinary(.l_and, lhs_positive, rhs_negative),
-                try cg.buildBinary(.l_and, lhs_negative, rhs_positive),
+                .OpLogicalOr,
+                try cg.buildCmp(.OpLogicalAnd, lhs_positive, rhs_negative),
+                try cg.buildCmp(.OpLogicalAnd, lhs_negative, rhs_positive),
             );
 
             if (maybe_op_ty_bits) |op_ty_bits| {
@@ -3430,7 +3317,7 @@ fn airMulOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
                 const casted_lhs = try cg.buildConvert(op_ty, lhs);
                 const casted_rhs = try cg.buildConvert(op_ty, rhs);
 
-                const full_result = try cg.buildBinary(.i_mul, casted_lhs, casted_rhs);
+                const full_result = try cg.buildBinary(.OpIMul, casted_lhs, casted_rhs);
 
                 // Truncate to the result type.
                 const low_bits = try cg.buildConvert(lhs.ty, full_result);
@@ -3443,18 +3330,18 @@ fn airMulOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
                 const shift: Temporary = .init(full_result.ty, try cg.constInt(full_result.ty, info.bits - 1));
                 // Use SRA so that any sign bits are duplicated. Now we can just check if ALL bits are set
                 // for negative cases.
-                const overflow = try cg.buildBinary(.sra, full_result, shift);
+                const overflow = try cg.buildBinary(.OpShiftRightArithmetic, full_result, shift);
 
                 const long_all_set: Temporary = .init(full_result.ty, try cg.constInt(full_result.ty, -1));
                 const long_zero: Temporary = .init(full_result.ty, try cg.constInt(full_result.ty, 0));
                 const mask = try cg.buildSelect(expected_overflow_bit, long_all_set, long_zero);
 
-                const overflowed = try cg.buildCmp(.i_ne, mask, overflow);
+                const overflowed = try cg.buildCmp(.OpINotEqual, mask, overflow);
 
                 break :blk .{ result, overflowed };
             }
 
-            const low_bits, const high_bits = try cg.buildWideMul(.s_mul_extended, lhs, rhs);
+            const low_bits, const high_bits = try cg.buildWideMul(.signed, lhs, rhs);
 
             // Truncate result if required.
             const result = try cg.normalize(low_bits, info);
@@ -3465,7 +3352,7 @@ fn airMulOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
             // Like with unsigned, overflow happened if high_bits are not the ones we expect,
             // and we also need to check some ones from the low bits.
 
-            const high_overflowed = try cg.buildCmp(.i_ne, mask, high_bits);
+            const high_overflowed = try cg.buildCmp(.OpINotEqual, mask, high_bits);
 
             // If no overflow bits in low_bits, no extra work needs to be done.
             // Careful, we still have to check the sign bit, so this branch
@@ -3476,10 +3363,10 @@ fn airMulOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
             const shift: Temporary = .init(lhs.ty, try cg.constInt(lhs.ty, info.bits - 1));
             // Use SRA so that any sign bits are duplicated. Now we can just check if ALL bits are set
             // for negative cases.
-            const low_overflow = try cg.buildBinary(.sra, low_bits, shift);
-            const low_overflowed = try cg.buildCmp(.i_ne, mask, low_overflow);
+            const low_overflow = try cg.buildBinary(.OpShiftRightArithmetic, low_bits, shift);
+            const low_overflowed = try cg.buildCmp(.OpINotEqual, mask, low_overflow);
 
-            const overflowed = try cg.buildBinary(.l_or, low_overflowed, high_overflowed);
+            const overflowed = try cg.buildCmp(.OpLogicalOr, low_overflowed, high_overflowed);
 
             break :blk .{ result, overflowed };
         },
@@ -3517,15 +3404,15 @@ fn airShlOverflow(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
     // so just manually upcast it if required.
     const casted_shift = try cg.buildConvert(base.ty.scalarType(zcu), shift);
 
-    const left = try cg.buildBinary(.sll, base, casted_shift);
+    const left = try cg.buildBinary(.OpShiftLeftLogical, base, casted_shift);
     const result = try cg.normalize(left, info);
 
     const right = switch (info.signedness) {
-        .unsigned => try cg.buildBinary(.srl, result, casted_shift),
-        .signed => try cg.buildBinary(.sra, result, casted_shift),
+        .unsigned => try cg.buildBinary(.OpShiftRightLogical, result, casted_shift),
+        .signed => try cg.buildBinary(.OpShiftRightArithmetic, result, casted_shift),
     };
 
-    const overflowed = try cg.buildCmp(.i_ne, base, right);
+    const overflowed = try cg.buildCmp(.OpINotEqual, base, right);
     const ov = try cg.intFromBool(overflowed);
 
     const result_ty_id = try cg.resolveType(result_ty, .direct);
@@ -3957,19 +3844,19 @@ fn cmp(
 
             return switch (op) {
                 .eq => try cg.buildBinary(
-                    .l_and,
+                    .OpLogicalAnd,
                     try cg.cmp(.eq, lhs_valid, rhs_valid),
                     try cg.buildBinary(
-                        .l_or,
+                        .OpLogicalOr,
                         try cg.buildUnary(.l_not, lhs_valid),
                         try cg.cmp(.eq, lhs_pl, rhs_pl),
                     ),
                 ),
                 .neq => try cg.buildBinary(
-                    .l_or,
+                    .OpLogicalOr,
                     try cg.cmp(.neq, lhs_valid, rhs_valid),
                     try cg.buildBinary(
-                        .l_and,
+                        .OpLogicalAnd,
                         lhs_valid,
                         try cg.cmp(.neq, lhs_pl, rhs_pl),
                     ),
@@ -3981,37 +3868,37 @@ fn cmp(
     }
 
     const info = cg.arithmeticTypeInfo(scalar_ty);
-    const pred: CmpPredicate = switch (info.class) {
+    const pred: Opcode = switch (info.class) {
         .composite_integer => unreachable, // TODO
         .float => switch (op) {
-            .eq => .f_oeq,
-            .neq => .f_une,
-            .lt => .f_olt,
-            .lte => .f_ole,
-            .gt => .f_ogt,
-            .gte => .f_oge,
+            .eq => .OpFOrdEqual,
+            .neq => .OpFUnordNotEqual,
+            .lt => .OpFOrdLessThan,
+            .lte => .OpFOrdLessThanEqual,
+            .gt => .OpFOrdGreaterThan,
+            .gte => .OpFOrdGreaterThanEqual,
         },
         .bool => switch (op) {
-            .eq => .l_eq,
-            .neq => .l_ne,
+            .eq => .OpLogicalEqual,
+            .neq => .OpLogicalNotEqual,
             else => unreachable,
         },
         .integer, .strange_integer => switch (info.signedness) {
             .signed => switch (op) {
-                .eq => .i_eq,
-                .neq => .i_ne,
-                .lt => .s_lt,
-                .lte => .s_le,
-                .gt => .s_gt,
-                .gte => .s_ge,
+                .eq => .OpIEqual,
+                .neq => .OpINotEqual,
+                .lt => .OpSLessThan,
+                .lte => .OpSLessThanEqual,
+                .gt => .OpSGreaterThan,
+                .gte => .OpSGreaterThanEqual,
             },
             .unsigned => switch (op) {
-                .eq => .i_eq,
-                .neq => .i_ne,
-                .lt => .u_lt,
-                .lte => .u_le,
-                .gt => .u_gt,
-                .gte => .u_ge,
+                .eq => .OpIEqual,
+                .neq => .OpINotEqual,
+                .lt => .OpULessThan,
+                .lte => .OpULessThanEqual,
+                .gt => .OpUGreaterThan,
+                .gte => .OpUGreaterThanEqual,
             },
         },
     };
@@ -4312,12 +4199,12 @@ fn airAggregateInit(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
                         .ty = field_int_ty,
                         .value = .{ .singleton = field_int_id },
                     });
-                    const shifted = try cg.buildBinary(.sll, extended_int_conv, .{
+                    const shifted = try cg.buildBinary(.OpShiftLeftLogical, extended_int_conv, .{
                         .ty = backing_int_ty,
                         .value = .{ .singleton = shift_rhs },
                     });
                     const running_int_tmp = try cg.buildBinary(
-                        .bit_or,
+                        .OpBitwiseOr,
                         .{ .ty = backing_int_ty, .value = .{ .singleton = running_int_id } },
                         shifted,
                     );
@@ -4770,17 +4657,20 @@ fn airStructFieldVal(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
         .@"struct" => switch (object_ty.containerLayout(zcu)) {
             .@"packed" => {
                 const struct_ty = zcu.typeToPackedStruct(object_ty).?;
+                const struct_backing_int_bits = cg.module.backingIntBits(@intCast(object_ty.bitSize(zcu))).@"0";
                 const bit_offset = zcu.structPackedFieldBitOffset(struct_ty, field_index);
-                const bit_offset_id = try cg.constInt(.u16, bit_offset);
+                // We use the same int type the packed struct is backed by, because even though it would
+                // be valid SPIR-V to use an smaller type like u16, some implementations like PoCL will complain.
+                const bit_offset_id = try cg.constInt(object_ty, bit_offset);
                 const signedness = if (field_ty.isInt(zcu)) field_ty.intInfo(zcu).signedness else .unsigned;
                 const field_bit_size: u16 = @intCast(field_ty.bitSize(zcu));
                 const field_int_ty = try pt.intType(signedness, field_bit_size);
                 const shift_lhs: Temporary = .{ .ty = object_ty, .value = .{ .singleton = object_id } };
-                const shift = try cg.buildBinary(.srl, shift_lhs, .{ .ty = .u16, .value = .{ .singleton = bit_offset_id } });
+                const shift = try cg.buildBinary(.OpShiftRightLogical, shift_lhs, .{ .ty = object_ty, .value = .{ .singleton = bit_offset_id } });
                 const mask_id = try cg.constInt(object_ty, (@as(u64, 1) << @as(u6, @intCast(field_bit_size))) - 1);
-                const masked = try cg.buildBinary(.bit_and, shift, .{ .ty = object_ty, .value = .{ .singleton = mask_id } });
+                const masked = try cg.buildBinary(.OpBitwiseAnd, shift, .{ .ty = object_ty, .value = .{ .singleton = mask_id } });
                 const result_id = blk: {
-                    if (cg.module.backingIntBits(field_bit_size).@"0" == cg.module.backingIntBits(@intCast(object_ty.bitSize(zcu))).@"0")
+                    if (cg.module.backingIntBits(field_bit_size).@"0" == struct_backing_int_bits)
                         break :blk try cg.bitCast(field_int_ty, object_ty, try masked.materialize(cg));
                     const trunc = try cg.buildConvert(field_int_ty, masked);
                     break :blk try trunc.materialize(cg);
@@ -4799,7 +4689,7 @@ fn airStructFieldVal(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
                 const int_ty = try pt.intType(signedness, field_bit_size);
                 const mask_id = try cg.constInt(backing_int_ty, (@as(u64, 1) << @as(u6, @intCast(field_bit_size))) - 1);
                 const masked = try cg.buildBinary(
-                    .bit_and,
+                    .OpBitwiseAnd,
                     .{ .ty = backing_int_ty, .value = .{ .singleton = object_id } },
                     .{ .ty = backing_int_ty, .value = .{ .singleton = mask_id } },
                 );
@@ -4858,7 +4748,7 @@ fn airFieldParentPtr(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
         const field_offset_id = try cg.constInt(.usize, field_offset);
         const field_ptr_tmp: Temporary = .init(.usize, field_ptr_int);
         const field_offset_tmp: Temporary = .init(.usize, field_offset_id);
-        const result = try cg.buildBinary(.i_sub, field_ptr_tmp, field_offset_tmp);
+        const result = try cg.buildBinary(.OpISub, field_ptr_tmp, field_offset_tmp);
         break :base_ptr_int try result.materialize(cg);
     };
 
@@ -4947,7 +4837,6 @@ fn alloc(
     ty: Type,
     options: AllocOptions,
 ) !Id {
-    const target = cg.module.zcu.getTarget();
     const ty_id = try cg.resolveType(ty, .indirect);
     const ptr_fn_ty_id = try cg.module.ptrType(ty_id, .function);
 
@@ -4961,20 +4850,7 @@ fn alloc(
         .initializer = options.initializer,
     });
 
-    switch (target.os.tag) {
-        .vulkan, .opengl => return var_id,
-        else => {},
-    }
-
-    switch (options.storage_class) {
-        .generic => {
-            const ptr_gn_ty_id = try cg.module.ptrType(ty_id, .generic);
-            // Convert to a generic pointer
-            return cg.castToGeneric(ptr_gn_ty_id, var_id);
-        },
-        .function => return var_id,
-        else => unreachable,
-    }
+    return var_id;
 }
 
 fn airAlloc(cg: *CodeGen, inst: Air.Inst.Index) !?Id {

@@ -368,7 +368,10 @@ pub fn finalize(module: *Module, gpa: Allocator) ![]Word {
     }
     if (target.cpu.arch == .spirv64) try module.addCapability(.int64);
     if (target.cpu.has(.spirv, .int64)) try module.addCapability(.int64);
-    if (target.cpu.has(.spirv, .float16)) try module.addCapability(.float16);
+    if (target.cpu.has(.spirv, .float16)) {
+        if (target.os.tag == .opencl) try module.addExtension("cl_khr_fp16");
+        try module.addCapability(.float16);
+    }
     if (target.cpu.has(.spirv, .float64)) try module.addCapability(.float64);
     if (target.cpu.has(.spirv, .generic_pointer)) try module.addCapability(.generic_pointer);
     if (target.cpu.has(.spirv, .vector16)) try module.addCapability(.vector16);
@@ -920,7 +923,7 @@ pub fn debugString(module: *Module, string: []const u8) !Id {
 pub fn storageClass(module: *Module, as: std.builtin.AddressSpace) spec.StorageClass {
     const target = module.zcu.getTarget();
     return switch (as) {
-        .generic => if (target.cpu.has(.spirv, .generic_pointer)) .generic else .function,
+        .generic => .function,
         .global => switch (target.os.tag) {
             .opencl, .amdhsa => .cross_workgroup,
             else => .storage_buffer,

Commit 58b9200106

Commit `58b9200106`