Commit 1a261917ce

Jacob Young <jacobly0@users.noreply.github.com>
2023-05-04 09:36:04
x86_64: implement `@ctz` and `@clz` for `u128`
1 parent 9bea854
Changed files (3)
src
arch
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -3798,19 +3798,38 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
 
         const dst_reg = try self.register_manager.allocReg(inst, gp);
         const dst_mcv = MCValue{ .register = dst_reg };
-        const dst_lock = self.register_manager.lockReg(dst_reg);
-        defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+        const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
+        defer self.register_manager.unlockReg(dst_lock);
 
+        const src_bits = src_ty.bitSize(self.target.*);
         if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) {
-            try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv);
-            const extra_bits = self.regExtraBits(src_ty);
-            if (extra_bits > 0) {
-                try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits });
-            }
+            if (src_bits <= 64) {
+                try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv);
+
+                const extra_bits = self.regExtraBits(src_ty);
+                if (extra_bits > 0) {
+                    try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits });
+                }
+            } else if (src_bits <= 128) {
+                const tmp_reg = try self.register_manager.allocReg(null, gp);
+                const tmp_mcv = MCValue{ .register = tmp_reg };
+                const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+                defer self.register_manager.unlockReg(tmp_lock);
+
+                try self.genBinOpMir(.lzcnt, Type.u64, dst_mcv, mat_src_mcv);
+                try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 });
+                try self.genBinOpMir(.lzcnt, Type.u64, tmp_mcv, mat_src_mcv.address().offset(8).deref());
+                try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
+
+                if (src_bits < 128) {
+                    try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = 128 - src_bits });
+                }
+            } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
             break :result dst_mcv;
         }
 
-        const src_bits = src_ty.bitSize(self.target.*);
+        if (src_bits > 64)
+            return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
         if (math.isPowerOfTwo(src_bits)) {
             const imm_reg = try self.copyToTmpRegister(dst_ty, .{
                 .immediate = src_bits ^ (src_bits - 1),
@@ -3870,24 +3889,52 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
         defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
 
         if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) {
-            const extra_bits = self.regExtraBits(src_ty);
-            const masked_mcv = if (extra_bits > 0) masked: {
-                const mask_mcv = MCValue{
-                    .immediate = ((@as(u64, 1) << @intCast(u6, extra_bits)) - 1) <<
-                        @intCast(u6, src_bits),
-                };
-                const tmp_mcv = tmp: {
-                    if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv;
-                    try self.genSetReg(dst_reg, src_ty, src_mcv);
-                    break :tmp dst_mcv;
-                };
-                try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv);
-                break :masked tmp_mcv;
-            } else mat_src_mcv;
-            try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv);
+            if (src_bits <= 64) {
+                const extra_bits = self.regExtraBits(src_ty);
+                const masked_mcv = if (extra_bits > 0) masked: {
+                    const tmp_mcv = tmp: {
+                        if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
+                            break :tmp src_mcv;
+                        try self.genSetReg(dst_reg, src_ty, src_mcv);
+                        break :tmp dst_mcv;
+                    };
+                    try self.genBinOpMir(
+                        .@"or",
+                        src_ty,
+                        tmp_mcv,
+                        .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) <<
+                            @intCast(u6, src_bits) },
+                    );
+                    break :masked tmp_mcv;
+                } else mat_src_mcv;
+                try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv);
+            } else if (src_bits <= 128) {
+                const tmp_reg = try self.register_manager.allocReg(null, gp);
+                const tmp_mcv = MCValue{ .register = tmp_reg };
+                const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+                defer self.register_manager.unlockReg(tmp_lock);
+
+                const masked_mcv = if (src_bits < 128) masked: {
+                    try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref());
+                    try self.genBinOpMir(
+                        .@"or",
+                        Type.u64,
+                        dst_mcv,
+                        .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) },
+                    );
+                    break :masked dst_mcv;
+                } else mat_src_mcv.address().offset(8).deref();
+                try self.genBinOpMir(.tzcnt, Type.u64, dst_mcv, masked_mcv);
+                try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 });
+                try self.genBinOpMir(.tzcnt, Type.u64, tmp_mcv, mat_src_mcv);
+                try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
+            } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
             break :result dst_mcv;
         }
 
+        if (src_bits > 64)
+            return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
+
         const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
         try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv);
 
test/behavior/bugs/2114.zig
@@ -9,7 +9,8 @@ fn ctz(x: anytype) usize {
 
 test "fixed" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .bmi)) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/math.zig
@@ -77,7 +77,8 @@ fn testClz() !void {
 }
 
 test "@clz big ints" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .lzcnt)) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO