Commit f199182567

alichay <allisonalichay@gmail.com>
2020-03-06 05:12:15
Cleaned up CPU detection and fixed incorrect detection bits.
1 parent e24f29b
Changed files (3)
lib
std
src-self-hosted
lib/std/zig/system/x86.zig
@@ -10,70 +10,66 @@ fn setFeature(cpu: *Target.Cpu, feature: Target.x86.Feature, enabled: bool) void
     else cpu.features.removeFeature(idx);
 }
 
-fn hasFeature(cpu: *Target.Cpu, feature: Target.x86.Feature) bool {
-    const idx = @as(Target.Cpu.Feature.Set.Index, @enumToInt(feature));
-    return cpu.features.isEnabled(idx);
-}
-
 inline fn bit(input: u32, offset: u5) bool {
     return (input >> offset) & 1 != 0;
 }
 
-pub fn detectNativeCpuAndFeatures(cpu: *Target.Cpu) void {
+pub fn detectNativeCpuAndFeatures(cross_target: CrossTarget) Target.Cpu {
 
-    defer {
-        // Whenever we find a model, add that model's featureset.
-        cpu.features.addFeatureSet(cpu.model.features);
-    }
+    var arch = cross_target.getCpuArch();
 
-    // When we can't identify a specific model,
-    // we guess based on processor features.
-    // This seems to be the accepted standard.
+    var cpu = Target.Cpu {
+        .arch = arch,
+        .model = Target.Cpu.Model.baseline(arch),
+        .features = Target.Cpu.Feature.Set.empty,
+    };
 
-    detectNativeFeatures(cpu);
+    detectNativeFeatures(&cpu, cross_target.getOsTag());
 
     var leaf = cpuid(0, 0);
     const max_leaf = leaf.eax;
     const vendor = leaf.ebx;
-    if(max_leaf < 1) {
-        cpu.model = &Target.x86.cpu.generic;
-        return;
-    }
 
-    leaf = cpuid(0x1, 0);
+    if(max_leaf > 0) {
+
+        leaf = cpuid(0x1, 0);
 
-    const brand_id = leaf.ebx & 0xff;
-    var family: u32 = 0;
-    var model: u32 = 0;
+        const brand_id = leaf.ebx & 0xff;
+        var family: u32 = 0;
+        var model: u32 = 0;
 
-    { // Detect model and family
-        family = (leaf.eax >> 8) & 0xf;
-        model = (leaf.eax >> 4) & 0xf;
-        if (family == 6 or family == 0xf) {
-            if (family == 0xf) {
-                family += (leaf.eax >> 20) & 0xff;
+        { // Detect model and family
+            family = (leaf.eax >> 8) & 0xf;
+            model = (leaf.eax >> 4) & 0xf;
+            if (family == 6 or family == 0xf) {
+                if (family == 0xf) {
+                    family += (leaf.eax >> 20) & 0xff;
+                }
+                model += ((leaf.eax >> 16) & 0xf) << 4;
             }
-            model += ((leaf.eax >> 16) & 0xf) << 4;
         }
-    }
 
-    switch(vendor) {
-        0x756e6547 => {
-            detectIntelProcessor(cpu, family, model, brand_id);
-        },
-        0x68747541 => {
-            detectAMDProcessor(cpu, family, model);
-        },
-        else => {
-            cpu.model = &Target.x86.cpu.generic;
-        },
+        switch(vendor) {
+            0x756e6547 => {
+                detectIntelProcessor(&cpu, family, model, brand_id);
+            },
+            0x68747541 => {
+                detectAMDProcessor(&cpu, family, model);
+            },
+            else => {},
+        }
     }
 
+    var model_features = cpu.model.features;
+    model_features.populateDependencies(cpu.arch.allFeaturesList());
+    cpu.features.addFeatureSet(model_features);
+
+    return cpu;
+
 }
 
 fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32) void {
     if (brand_id != 0) {
-        cpu.model = &Target.x86.cpu.generic;
         return;
     }
     switch(family) {
@@ -86,7 +82,7 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
             return;
         },
         5 => {
-            if(hasFeature(cpu, .mmx)) {
+            if(Target.x86.featureSetHas(cpu.features, .mmx)) {
                 cpu.model = &Target.x86.cpu.pentium_mmx;
                 return;
             }
@@ -152,10 +148,10 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
                     return;
                 },
                 0x55 => {
-                    if(hasFeature(cpu, .avx512bf16)) {
+                    if(Target.x86.featureSetHas(cpu.features, .avx512bf16)) {
                         cpu.model = &Target.x86.cpu.cooperlake;
                         return;
-                    } else if(hasFeature(cpu, .avx512vnni)) {
+                    } else if(Target.x86.featureSetHas(cpu.features, .avx512vnni)) {
                         cpu.model = &Target.x86.cpu.cascadelake;
                         return;
                     } else {
@@ -204,114 +200,18 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
                     return;
                 },
                 else => {
-                    // Unknown, try to guess.
-                    // TODO detect tigerlake host
-                    if(hasFeature(cpu, .avx512vp2intersect)) {
-                        // TODO no tigerlake entry in Target.x86.cpu
-                        //cpu.model = &Target.x86.cpu.tigerlake;
-                        cpu.model = &Target.x86.cpu.nehalem;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx512vbmi2)) {
-                        cpu.model = &Target.x86.cpu.icelake_client;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx512vbmi)) {
-                        cpu.model = &Target.x86.cpu.cannonlake;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx512bf16)) {
-                        cpu.model = &Target.x86.cpu.cooperlake;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx512vnni)) {
-                        cpu.model = &Target.x86.cpu.cascadelake;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx512vl)) {
-                        cpu.model = &Target.x86.cpu.skylake_avx512;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx512er)) {
-                        cpu.model = &Target.x86.cpu.knl;
-                        return;
-                    }
-                    if(hasFeature(cpu, .clflushopt)) {
-                        if(hasFeature(cpu, .sha)) {
-                            cpu.model = &Target.x86.cpu.goldmont;
-                            return;
-                        } else {
-                            cpu.model = &Target.x86.cpu.skylake;
-                            return;
-                        }
-                    }
-                    if(hasFeature(cpu, .adx)) {
-                        cpu.model = &Target.x86.cpu.broadwell;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx2)) {
-                        cpu.model = &Target.x86.cpu.haswell;
-                        return;
-                    }
-                    if(hasFeature(cpu, .avx)) {
-                        cpu.model = &Target.x86.cpu.sandybridge;
-                        return;
-                    }
-                    if(hasFeature(cpu, .sse4_2)) {
-                        if(hasFeature(cpu, .movbe)) {
-                            cpu.model = &Target.x86.cpu.silvermont;
-                            return;
-                        } else {
-                            cpu.model = &Target.x86.cpu.nehalem;
-                            return;
-                        }
-                    }
-                    if(hasFeature(cpu, .sse4_1)) {
-                        cpu.model = &Target.x86.cpu.penryn;
-                        return;
-                    }
-                    if(hasFeature(cpu, .sse3)) {
-                        if(hasFeature(cpu, .movbe)) {
-                            cpu.model = &Target.x86.cpu.bonnell;
-                            return;
-                        } else {
-                            cpu.model = &Target.x86.cpu.core2;
-                            return;
-                        }
-                    }
-
-                    if(hasFeature(cpu, .@"64bit")) {
-                        cpu.model = &Target.x86.cpu.core2;
-                        return;
-                    }
-
-                    if(hasFeature(cpu, .sse3)) {
-                        cpu.model = &Target.x86.cpu.yonah;
-                        return;
-                    }
-                    if(hasFeature(cpu, .sse2)) {
-                        cpu.model = &Target.x86.cpu.pentium_m;
-                        return;
-                    }
-                    if(hasFeature(cpu, .sse)) {
-                        cpu.model = &Target.x86.cpu.pentium3;
-                        return;
-                    }
-                    if(hasFeature(cpu, .mmx)) {
-                        cpu.model = &Target.x86.cpu.pentium2;
-                        return;
-                    }
-                    cpu.model = &Target.x86.cpu.pentiumpro;
+                    // Unknown CPU.
+                    // Default to baseline x86_64 or i386 cpu.
                     return;
                 },
             }
         },
         15 => {
-            if(hasFeature(cpu, .@"64bit")) {
+            if(Target.x86.featureSetHas(cpu.features, .@"64bit")) {
                 cpu.model = &Target.x86.cpu.nocona;
                 return;
             }
-            if(hasFeature(cpu, .sse3)) {
+            if(Target.x86.featureSetHas(cpu.features, .sse3)) {
                 cpu.model = &Target.x86.cpu.prescott;
                 return;
             }
@@ -319,7 +219,8 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
             return;
         },
         else => {
-            cpu.model = &Target.x86.cpu.generic;
+            // Unknown CPU.
+            // Default to baseline x86_64 or i386 cpu.
             return;
         }
     }
@@ -358,7 +259,7 @@ fn detectAMDProcessor(cpu: *Target.Cpu, family: u32, model: u32) void {
             return;
         },
         6 => {
-            if(hasFeature(cpu, .sse)) {
+            if(Target.x86.featureSetHas(cpu.features, .sse)) {
                 cpu.model = &Target.x86.cpu.athlon_xp;
                 return;
             }
@@ -366,7 +267,7 @@ fn detectAMDProcessor(cpu: *Target.Cpu, family: u32, model: u32) void {
             return;
         },
         15 => {
-            if(hasFeature(cpu, .sse3)) {
+            if(Target.x86.featureSetHas(cpu.features, .sse3)) {
                 cpu.model = &Target.x86.cpu.k8_sse3;
                 return;
             }
@@ -410,13 +311,12 @@ fn detectAMDProcessor(cpu: *Target.Cpu, family: u32, model: u32) void {
             return;
         },
         else => {
-            cpu.model = &Target.x86.cpu.generic;
             return;
         }
     }
 }
 
-fn detectNativeFeatures(cpu: *Target.Cpu) void {
+fn detectNativeFeatures(cpu: *Target.Cpu, os_type: Target.Os.Tag) void {
 
     var leaf = cpuid(0, 0);
 
@@ -442,23 +342,42 @@ fn detectNativeFeatures(cpu: *Target.Cpu) void {
     setFeature(cpu, .aes,    bit(leaf.ecx, 25));
     setFeature(cpu, .rdrnd,  bit(leaf.ecx, 30));
 
-    const has_avx_save = bit(leaf.ecx, 27) and
-                         bit(leaf.ecx, 28) and
-                         ((leaf.eax & 0x6) == 0x6);
+    leaf.eax = getXCR0();
+
+    const has_avx = bit(leaf.ecx, 27) and
+                    bit(leaf.ecx, 28) and
+                    ((leaf.eax & 0x6) == 0x6);
+
+    // LLVM approaches avx512_save by hardcoding it to true on Darwin,
+    // because the kernel saves the context even if the bit is not set.
+    // https://github.com/llvm/llvm-project/blob/bca373f73fc82728a8335e7d6cd164e8747139ec/llvm/lib/Support/Host.cpp#L1378
+    //
+    // Google approaches this by using a different series of checks and flags,
+    // and this may report the feature more accurately on a technically correct
+    // but ultimately less useful level.
+    // https://github.com/google/cpu_features/blob/b5c271c53759b2b15ff91df19bd0b32f2966e275/src/cpuinfo_x86.c#L113
+    // (called from https://github.com/google/cpu_features/blob/b5c271c53759b2b15ff91df19bd0b32f2966e275/src/cpuinfo_x86.c#L1052)
+    //
+    // Right now, we use LLVM's approach, because even if the target doesn't support
+    // the feature, the kernel should provide the same functionality transparently,
+    // so the implementation details don't make a difference.
+    // That said, this flag impacts other CPU features' availability,
+    // so until we can verify that this doesn't come with side affects,
+    // we'll say TODO verify this.
 
     // Darwin lazily saves the AVX512 context on first use: trust that the OS will
-    // save the AVX512 context if we use AVX512 instructions, even the bit is not
+    // save the AVX512 context if we use AVX512 instructions, even if the bit is not
     // set right now.
-    const has_avx512_save = switch(Target.current.isDarwin()) {
+    const has_avx512_save = switch(os_type.isDarwin()) {
         true  => true,
-        false => has_avx_save and ((leaf.eax & 0xE0) == 0xE0),
+        false => has_avx and ((leaf.eax & 0xE0) == 0xE0),
     };
 
-    setFeature(cpu, .avx,    has_avx_save);
-    setFeature(cpu, .fma,    has_avx_save and bit(leaf.ecx, 12));
+    setFeature(cpu, .avx,    has_avx);
+    setFeature(cpu, .fma,    has_avx and bit(leaf.ecx, 12));
     // Only enable XSAVE if OS has enabled support for saving YMM state.
-    setFeature(cpu, .xsave,  has_avx_save and bit(leaf.ecx, 26));
-    setFeature(cpu, .f16c,   has_avx_save and bit(leaf.ecx, 29));
+    setFeature(cpu, .xsave,  has_avx and bit(leaf.ecx, 26));
+    setFeature(cpu, .f16c,   has_avx and bit(leaf.ecx, 29));
 
     leaf = cpuid(0x80000000, 0);
     const max_ext_level = leaf.eax;
@@ -469,9 +388,9 @@ fn detectNativeFeatures(cpu: *Target.Cpu) void {
         setFeature(cpu, .lzcnt,     bit(leaf.ecx,  5));
         setFeature(cpu, .sse4a,     bit(leaf.ecx,  6));
         setFeature(cpu, .prfchw,    bit(leaf.ecx,  8));
-        setFeature(cpu, .xop,       bit(leaf.ecx, 11) and has_avx_save);
+        setFeature(cpu, .xop,       bit(leaf.ecx, 11) and has_avx);
         setFeature(cpu, .lwp,       bit(leaf.ecx, 15));
-        setFeature(cpu, .fma4,      bit(leaf.ecx, 16) and has_avx_save);
+        setFeature(cpu, .fma4,      bit(leaf.ecx, 16) and has_avx);
         setFeature(cpu, .tbm,       bit(leaf.ecx, 21));
         setFeature(cpu, .mwaitx,    bit(leaf.ecx, 29));
         setFeature(cpu, .@"64bit",  bit(leaf.edx, 29));
@@ -486,7 +405,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu) void {
 
     // Misc. memory-related features.
     if(max_ext_level >= 0x80000008) {
-        leaf = cpuid(80000008, 0);
+        leaf = cpuid(0x80000008, 0);
         setFeature(cpu, .clzero,   bit(leaf.ebx, 0));
         setFeature(cpu, .wbnoinvd, bit(leaf.ebx, 9));
     } else {
@@ -495,14 +414,14 @@ fn detectNativeFeatures(cpu: *Target.Cpu) void {
         }
     }
 
-    if(max_level >= 7) {
+    if(max_level >= 0x7) {
         leaf = cpuid(0x7, 0);
 
         setFeature(cpu, .fsgsbase,        bit(leaf.ebx,  0));
         setFeature(cpu, .sgx,             bit(leaf.ebx,  2));
         setFeature(cpu, .bmi,             bit(leaf.ebx,  3));
         // AVX2 is only supported if we have the OS save support from AVX.
-        setFeature(cpu, .avx2,            bit(leaf.ebx,  5) and has_avx_save);
+        setFeature(cpu, .avx2,            bit(leaf.ebx,  5) and has_avx);
         setFeature(cpu, .bmi2,            bit(leaf.ebx,  8));
         setFeature(cpu, .invpcid,         bit(leaf.ebx, 10));
         setFeature(cpu, .rtm,             bit(leaf.ebx, 11));
@@ -528,8 +447,8 @@ fn detectNativeFeatures(cpu: *Target.Cpu) void {
         setFeature(cpu, .avx512vbmi2,        bit(leaf.ecx,  6) and has_avx512_save);
         setFeature(cpu, .shstk,              bit(leaf.ecx,  7));
         setFeature(cpu, .gfni,               bit(leaf.ecx,  8));
-        setFeature(cpu, .vaes,               bit(leaf.ecx,  9) and has_avx_save);
-        setFeature(cpu, .vpclmulqdq,         bit(leaf.ecx, 10) and has_avx_save);
+        setFeature(cpu, .vaes,               bit(leaf.ecx,  9) and has_avx);
+        setFeature(cpu, .vpclmulqdq,         bit(leaf.ecx, 10) and has_avx);
         setFeature(cpu, .avx512vnni,         bit(leaf.ecx, 11) and has_avx512_save);
         setFeature(cpu, .avx512bitalg,       bit(leaf.ecx, 12) and has_avx512_save);
         setFeature(cpu, .avx512vpopcntdq,    bit(leaf.ecx, 14) and has_avx512_save);
@@ -580,7 +499,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu) void {
         }
     }
 
-    if(max_level >= 0xD and has_avx_save) {
+    if(max_level >= 0xD and has_avx) {
         leaf = cpuid(0xD, 0x1);
         // Only enable XSAVE if OS has enabled support for saving YMM state.
         setFeature(cpu, .xsaveopt, bit(leaf.eax, 0));
lib/std/zig/system.zig
@@ -171,6 +171,8 @@ pub const NativeTargetInfo = struct {
 
     dynamic_linker: DynamicLinker = DynamicLinker{},
 
+    cpu_detected: bool = false,
+
     pub const DynamicLinker = Target.DynamicLinker;
 
     pub const DetectError = error{
@@ -191,6 +193,9 @@ pub const NativeTargetInfo = struct {
     /// deinitialization method.
     /// TODO Remove the Allocator requirement from this function.
     pub fn detect(allocator: *Allocator, cross_target: CrossTarget) DetectError!NativeTargetInfo {
+
+        var cpu_detected = true;
+
         const cpu = switch (cross_target.cpu_model) {
             .native => detectNativeCpuAndFeatures(cross_target),
             .baseline => baselineCpuAndFeatures(cross_target),
@@ -203,6 +208,11 @@ pub const NativeTargetInfo = struct {
                 cross_target.updateCpuFeatures(&adjusted_model.features);
                 break :blk adjusted_model;
             },
+        } orelse backup_cpu_detection: {
+
+            // Temporarily use LLVM's cpu info as a backup
+            cpu_detected = false;
+            break :backup_cpu_detection baselineCpuAndFeatures(cross_target);
         };
 
         var os = Target.Os.defaultVersionRange(cross_target.getOsTag());
@@ -318,7 +328,9 @@ pub const NativeTargetInfo = struct {
             os.version_range.linux.glibc = glibc;
         }
 
-        return detectAbiAndDynamicLinker(allocator, cpu, os, cross_target);
+        var target = try detectAbiAndDynamicLinker(allocator, cpu, os, cross_target);
+        target.cpu_detected = cpu_detected;
+        return target;
     }
 
     /// First we attempt to use the executable's own binary. If it is dynamically
@@ -843,19 +855,15 @@ pub const NativeTargetInfo = struct {
         }
     }
 
-    fn detectNativeCpuAndFeatures(cross_target: CrossTarget) Target.Cpu {
-
-        var baseline = baselineCpuAndFeatures(cross_target);
+    fn detectNativeCpuAndFeatures(cross_target: CrossTarget) ?Target.Cpu {
 
         switch(Target.current.cpu.arch) {
             .x86_64, .i386 => {
-                const x86_detection = @import("system/x86.zig");
-                x86_detection.detectNativeCpuAndFeatures(&baseline);
-                return baseline;
+                return @import("system/x86.zig").detectNativeCpuAndFeatures(cross_target);
             },
             else => {
-                // // TODO Detect native CPU model & features. Until that is implemented we use baseline.
-                return baseline;
+                // TODO flesh out CPU detection for more than just x86.
+                return null;
             }
         }
     }
src-self-hosted/stage2.zig
@@ -1154,8 +1154,7 @@ fn enumInt(comptime Enum: type, int: c_int) Enum {
 
 fn crossTargetToTarget(cross_target: CrossTarget, dynamic_linker_ptr: *?[*:0]u8) !Target {
     var info = try std.zig.system.NativeTargetInfo.detect(std.heap.c_allocator, cross_target);
-    if ((cross_target.cpu_arch == null or cross_target.cpu_model == .native) and
-        (Target.current.cpu.arch != .i386 and Target.current.cpu.arch != .x86_64)) {
+    if ((cross_target.cpu_arch == null or cross_target.cpu_model == .native) and !info.cpu_detected) {
         // TODO We want to just use detected_info.target but implementing
         // CPU model & feature detection is todo so here we rely on LLVM.
         const llvm = @import("llvm.zig");