Commit 7289a073e5

Alex Rønne Petersen <alex@alexrp.com>
2025-04-03 02:26:17
std.zig.system.x86: Synchronize CPUID/XGETBV checks with LLVM 19.
Also fix a bunch of cases where we didn't toggle features off if the relevant leaf isn't available, and switch XCR0 checks to a packed struct. Closes #23385.
1 parent de62dc8
Changed files (1)
lib
std
zig
system
lib/std/zig/system/x86.zig
@@ -2,11 +2,30 @@ const std = @import("std");
 const builtin = @import("builtin");
 const Target = std.Target;
 
-const XCR0_XMM = 0x02;
-const XCR0_YMM = 0x04;
-const XCR0_MASKREG = 0x20;
-const XCR0_ZMM0_15 = 0x40;
-const XCR0_ZMM16_31 = 0x80;
+/// Only covers EAX for now.
+const Xcr0 = packed struct(u32) {
+    x87: bool,
+    sse: bool,
+    avx: bool,
+    bndreg: bool,
+    bndcsr: bool,
+    opmask: bool,
+    zmm_hi256: bool,
+    hi16_zmm: bool,
+    pt: bool,
+    pkru: bool,
+    pasid: bool,
+    cet_u: bool,
+    cet_s: bool,
+    hdc: bool,
+    uintr: bool,
+    lbr: bool,
+    hwp: bool,
+    xtilecfg: bool,
+    xtiledata: bool,
+    apx: bool,
+    _reserved: u12,
+};
 
 fn setFeature(cpu: *Target.Cpu, feature: Target.x86.Feature, enabled: bool) void {
     const idx = @as(Target.Cpu.Feature.Set.Index, @intFromEnum(feature));
@@ -339,12 +358,6 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
 
     leaf = cpuid(1, 0);
 
-    setFeature(cpu, .cx8, bit(leaf.edx, 8));
-    setFeature(cpu, .cmov, bit(leaf.edx, 15));
-    setFeature(cpu, .mmx, bit(leaf.edx, 23));
-    setFeature(cpu, .fxsr, bit(leaf.edx, 24));
-    setFeature(cpu, .sse, bit(leaf.edx, 25));
-    setFeature(cpu, .sse2, bit(leaf.edx, 26));
     setFeature(cpu, .sse3, bit(leaf.ecx, 0));
     setFeature(cpu, .pclmul, bit(leaf.ecx, 1));
     setFeature(cpu, .ssse3, bit(leaf.ecx, 9));
@@ -356,13 +369,20 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
     setFeature(cpu, .aes, bit(leaf.ecx, 25));
     setFeature(cpu, .rdrnd, bit(leaf.ecx, 30));
 
+    setFeature(cpu, .cx8, bit(leaf.edx, 8));
+    setFeature(cpu, .cmov, bit(leaf.edx, 15));
+    setFeature(cpu, .mmx, bit(leaf.edx, 23));
+    setFeature(cpu, .fxsr, bit(leaf.edx, 24));
+    setFeature(cpu, .sse, bit(leaf.edx, 25));
+    setFeature(cpu, .sse2, bit(leaf.edx, 26));
+
     const has_xsave = bit(leaf.ecx, 27);
     const has_avx = bit(leaf.ecx, 28);
 
     // Make sure not to call xgetbv if xsave is not supported
-    const xcr0_eax = if (has_xsave and has_avx) getXCR0() else 0;
+    const xcr0: Xcr0 = if (has_xsave and has_avx) @bitCast(getXCR0()) else @bitCast(@as(u32, 0));
 
-    const has_avx_save = hasMask(xcr0_eax, XCR0_XMM | XCR0_YMM);
+    const has_avx_save = xcr0.sse and xcr0.avx;
 
     // LLVM approaches avx512_save by hardcoding it to true on Darwin,
     // because the kernel saves the context even if the bit is not set.
@@ -384,22 +404,26 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
     // Darwin lazily saves the AVX512 context on first use: trust that the OS will
     // save the AVX512 context if we use AVX512 instructions, even if the bit is not
     // set right now.
-    const has_avx512_save = switch (os_tag.isDarwin()) {
-        true => true,
-        false => hasMask(xcr0_eax, XCR0_MASKREG | XCR0_ZMM0_15 | XCR0_ZMM16_31),
-    };
+    const has_avx512_save = if (os_tag.isDarwin())
+        true
+    else
+        xcr0.zmm_hi256 and xcr0.hi16_zmm;
+
+    // AMX requires additional context to be saved by the OS.
+    const has_amx_save = xcr0.xtilecfg and xcr0.xtiledata;
 
     setFeature(cpu, .avx, has_avx_save);
-    setFeature(cpu, .fma, has_avx_save and bit(leaf.ecx, 12));
+    setFeature(cpu, .fma, bit(leaf.ecx, 12) and has_avx_save);
     // Only enable XSAVE if OS has enabled support for saving YMM state.
-    setFeature(cpu, .xsave, has_avx_save and bit(leaf.ecx, 26));
-    setFeature(cpu, .f16c, has_avx_save and bit(leaf.ecx, 29));
+    setFeature(cpu, .xsave, bit(leaf.ecx, 26) and has_avx_save);
+    setFeature(cpu, .f16c, bit(leaf.ecx, 29) and has_avx_save);
 
     leaf = cpuid(0x80000000, 0);
     const max_ext_level = leaf.eax;
 
     if (max_ext_level >= 0x80000001) {
         leaf = cpuid(0x80000001, 0);
+
         setFeature(cpu, .sahf, bit(leaf.ecx, 0));
         setFeature(cpu, .lzcnt, bit(leaf.ecx, 5));
         setFeature(cpu, .sse4a, bit(leaf.ecx, 6));
@@ -409,11 +433,21 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         setFeature(cpu, .fma4, bit(leaf.ecx, 16) and has_avx_save);
         setFeature(cpu, .tbm, bit(leaf.ecx, 21));
         setFeature(cpu, .mwaitx, bit(leaf.ecx, 29));
+
         setFeature(cpu, .@"64bit", bit(leaf.edx, 29));
     } else {
         for ([_]Target.x86.Feature{
-            .sahf, .lzcnt, .sse4a, .prfchw, .xop,
-            .lwp,  .fma4,  .tbm,   .mwaitx, .@"64bit",
+            .sahf,
+            .lzcnt,
+            .sse4a,
+            .prfchw,
+            .xop,
+            .lwp,
+            .fma4,
+            .tbm,
+            .mwaitx,
+
+            .@"64bit",
         }) |feat| {
             setFeature(cpu, feat, false);
         }
@@ -422,10 +456,16 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
     // Misc. memory-related features.
     if (max_ext_level >= 0x80000008) {
         leaf = cpuid(0x80000008, 0);
+
         setFeature(cpu, .clzero, bit(leaf.ebx, 0));
+        setFeature(cpu, .rdpru, bit(leaf.ebx, 4));
         setFeature(cpu, .wbnoinvd, bit(leaf.ebx, 9));
     } else {
-        for ([_]Target.x86.Feature{ .clzero, .wbnoinvd }) |feat| {
+        for ([_]Target.x86.Feature{
+            .clzero,
+            .rdpru,
+            .wbnoinvd,
+        }) |feat| {
             setFeature(cpu, feat, false);
         }
     }
@@ -444,6 +484,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         setFeature(cpu, .rtm, bit(leaf.ebx, 11));
         // AVX512 is only supported if the OS supports the context save for it.
         setFeature(cpu, .avx512f, bit(leaf.ebx, 16) and has_avx512_save);
+        setFeature(cpu, .evex512, bit(leaf.ebx, 16) and has_avx512_save);
         setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save);
         setFeature(cpu, .rdseed, bit(leaf.ebx, 18));
         setFeature(cpu, .adx, bit(leaf.ebx, 19));
@@ -470,8 +511,8 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         setFeature(cpu, .avx512vnni, bit(leaf.ecx, 11) and has_avx512_save);
         setFeature(cpu, .avx512bitalg, bit(leaf.ecx, 12) and has_avx512_save);
         setFeature(cpu, .avx512vpopcntdq, bit(leaf.ecx, 14) and has_avx512_save);
-        setFeature(cpu, .avx512vp2intersect, bit(leaf.edx, 8) and has_avx512_save);
         setFeature(cpu, .rdpid, bit(leaf.ecx, 22));
+        setFeature(cpu, .kl, bit(leaf.ecx, 23));
         setFeature(cpu, .cldemote, bit(leaf.ecx, 25));
         setFeature(cpu, .movdiri, bit(leaf.ecx, 27));
         setFeature(cpu, .movdir64b, bit(leaf.ecx, 28));
@@ -487,32 +528,153 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         // leaves using cpuid, since that information is ignored while
         // detecting features using the "-march=native" flag.
         // For more info, see X86 ISA docs.
-        setFeature(cpu, .pconfig, bit(leaf.edx, 18));
         setFeature(cpu, .uintr, bit(leaf.edx, 5));
+        setFeature(cpu, .avx512vp2intersect, bit(leaf.edx, 8) and has_avx512_save);
+        setFeature(cpu, .serialize, bit(leaf.edx, 14));
+        setFeature(cpu, .tsxldtrk, bit(leaf.edx, 16));
+        setFeature(cpu, .pconfig, bit(leaf.edx, 18));
+        setFeature(cpu, .amx_bf16, bit(leaf.edx, 22) and has_amx_save);
+        setFeature(cpu, .avx512fp16, bit(leaf.edx, 23) and has_avx512_save);
+        setFeature(cpu, .amx_tile, bit(leaf.edx, 24) and has_amx_save);
+        setFeature(cpu, .amx_int8, bit(leaf.edx, 25) and has_amx_save);
 
-        // TODO I feel unsure about this check.
-        //      It doesn't really seem to check for 7.1, just for 7.
-        //      Is this a sound assumption to make?
-        //      Note that this is what other implementations do, so I kind of trust it.
-        const has_leaf_7_1 = max_level >= 7;
-        if (has_leaf_7_1) {
+        if (leaf.eax >= 1) {
             leaf = cpuid(0x7, 0x1);
+
+            setFeature(cpu, .sha512, bit(leaf.eax, 0));
+            setFeature(cpu, .sm3, bit(leaf.eax, 1));
+            setFeature(cpu, .sm4, bit(leaf.eax, 2));
+            setFeature(cpu, .raoint, bit(leaf.eax, 3));
+            setFeature(cpu, .avxvnni, bit(leaf.eax, 4) and has_avx_save);
             setFeature(cpu, .avx512bf16, bit(leaf.eax, 5) and has_avx512_save);
+            setFeature(cpu, .cmpccxadd, bit(leaf.eax, 7));
+            setFeature(cpu, .amx_fp16, bit(leaf.eax, 21) and has_amx_save);
+            setFeature(cpu, .hreset, bit(leaf.eax, 22));
+            setFeature(cpu, .avxifma, bit(leaf.eax, 23) and has_avx_save);
+
+            setFeature(cpu, .avxvnniint8, bit(leaf.edx, 4) and has_avx_save);
+            setFeature(cpu, .avxneconvert, bit(leaf.edx, 5) and has_avx_save);
+            setFeature(cpu, .amx_complex, bit(leaf.edx, 8) and has_amx_save);
+            setFeature(cpu, .avxvnniint16, bit(leaf.edx, 10) and has_avx_save);
+            setFeature(cpu, .prefetchi, bit(leaf.edx, 14));
+            setFeature(cpu, .usermsr, bit(leaf.edx, 15));
+            setFeature(cpu, .avx10_1_256, bit(leaf.edx, 19));
+            // APX
+            setFeature(cpu, .egpr, bit(leaf.edx, 21));
+            setFeature(cpu, .push2pop2, bit(leaf.edx, 21));
+            setFeature(cpu, .ppx, bit(leaf.edx, 21));
+            setFeature(cpu, .ndd, bit(leaf.edx, 21));
+            setFeature(cpu, .ccmp, bit(leaf.edx, 21));
+            setFeature(cpu, .cf, bit(leaf.edx, 21));
         } else {
-            setFeature(cpu, .avx512bf16, false);
+            for ([_]Target.x86.Feature{
+                .sha512,
+                .sm3,
+                .sm4,
+                .raoint,
+                .avxvnni,
+                .avx512bf16,
+                .cmpccxadd,
+                .amx_fp16,
+                .hreset,
+                .avxifma,
+
+                .avxvnniint8,
+                .avxneconvert,
+                .amx_complex,
+                .avxvnniint16,
+                .prefetchi,
+                .usermsr,
+                .avx10_1_256,
+                .egpr,
+                .push2pop2,
+                .ppx,
+                .ndd,
+                .ccmp,
+                .cf,
+            }) |feat| {
+                setFeature(cpu, feat, false);
+            }
         }
     } else {
         for ([_]Target.x86.Feature{
-            .fsgsbase,           .sgx,        .bmi,          .avx2,
-            .bmi2,               .invpcid,    .rtm,          .avx512f,
-            .avx512dq,           .rdseed,     .adx,          .avx512ifma,
-            .clflushopt,         .clwb,       .avx512pf,     .avx512er,
-            .avx512cd,           .sha,        .avx512bw,     .avx512vl,
-            .prefetchwt1,        .avx512vbmi, .pku,          .waitpkg,
-            .avx512vbmi2,        .shstk,      .gfni,         .vaes,
-            .vpclmulqdq,         .avx512vnni, .avx512bitalg, .avx512vpopcntdq,
-            .avx512vp2intersect, .rdpid,      .cldemote,     .movdiri,
-            .movdir64b,          .enqcmd,     .pconfig,      .avx512bf16,
+            .fsgsbase,
+            .sgx,
+            .bmi,
+            .avx2,
+            .smep,
+            .bmi2,
+            .invpcid,
+            .rtm,
+            .avx512f,
+            .evex512,
+            .avx512dq,
+            .rdseed,
+            .adx,
+            .smap,
+            .avx512ifma,
+            .clflushopt,
+            .clwb,
+            .avx512pf,
+            .avx512er,
+            .avx512cd,
+            .sha,
+            .avx512bw,
+            .avx512vl,
+
+            .prefetchwt1,
+            .avx512vbmi,
+            .pku,
+            .waitpkg,
+            .avx512vbmi2,
+            .shstk,
+            .gfni,
+            .vaes,
+            .vpclmulqdq,
+            .avx512vnni,
+            .avx512bitalg,
+            .avx512vpopcntdq,
+            .rdpid,
+            .kl,
+            .cldemote,
+            .movdiri,
+            .movdir64b,
+            .enqcmd,
+
+            .uintr,
+            .avx512vp2intersect,
+            .serialize,
+            .tsxldtrk,
+            .pconfig,
+            .amx_bf16,
+            .avx512fp16,
+            .amx_tile,
+            .amx_int8,
+
+            .sha512,
+            .sm3,
+            .sm4,
+            .raoint,
+            .avxvnni,
+            .avx512bf16,
+            .cmpccxadd,
+            .amx_fp16,
+            .hreset,
+            .avxifma,
+
+            .avxvnniint8,
+            .avxneconvert,
+            .amx_complex,
+            .avxvnniint16,
+            .prefetchi,
+            .usermsr,
+            .avx10_1_256,
+            .egpr,
+            .push2pop2,
+            .ppx,
+            .ndd,
+            .ccmp,
+            .cf,
         }) |feat| {
             setFeature(cpu, feat, false);
         }
@@ -520,21 +682,55 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
 
     if (max_level >= 0xD and has_avx_save) {
         leaf = cpuid(0xD, 0x1);
+
         // Only enable XSAVE if OS has enabled support for saving YMM state.
         setFeature(cpu, .xsaveopt, bit(leaf.eax, 0));
         setFeature(cpu, .xsavec, bit(leaf.eax, 1));
         setFeature(cpu, .xsaves, bit(leaf.eax, 3));
     } else {
-        for ([_]Target.x86.Feature{ .xsaveopt, .xsavec, .xsaves }) |feat| {
+        for ([_]Target.x86.Feature{
+            .xsaveopt,
+            .xsavec,
+            .xsaves,
+        }) |feat| {
             setFeature(cpu, feat, false);
         }
     }
 
     if (max_level >= 0x14) {
         leaf = cpuid(0x14, 0);
+
         setFeature(cpu, .ptwrite, bit(leaf.ebx, 4));
     } else {
-        setFeature(cpu, .ptwrite, false);
+        for ([_]Target.x86.Feature{
+            .ptwrite,
+        }) |feat| {
+            setFeature(cpu, feat, false);
+        }
+    }
+
+    if (max_level >= 0x19) {
+        leaf = cpuid(0x19, 0);
+
+        setFeature(cpu, .widekl, bit(leaf.ebx, 2));
+    } else {
+        for ([_]Target.x86.Feature{
+            .widekl,
+        }) |feat| {
+            setFeature(cpu, feat, false);
+        }
+    }
+
+    if (max_level >= 0x24) {
+        leaf = cpuid(0x24, 0);
+
+        setFeature(cpu, .avx10_1_512, bit(leaf.ebx, 18));
+    } else {
+        for ([_]Target.x86.Feature{
+            .avx10_1_512,
+        }) |feat| {
+            setFeature(cpu, feat, false);
+        }
     }
 }