Commit 1f61a00a41

Jakub Konka <kubkon@jakubkonka.com>
2022-11-28 21:36:56
Merge pull request #13659 from ziglang/arm-win-cpu-features
windows: add native CPU and features detection for Armv8 chips
1 parent 3357425
lib/std/os/windows/kernel32.zig
@@ -10,6 +10,7 @@ const DWORD = windows.DWORD;
 const FILE_INFO_BY_HANDLE_CLASS = windows.FILE_INFO_BY_HANDLE_CLASS;
 const HANDLE = windows.HANDLE;
 const HMODULE = windows.HMODULE;
+const HKEY = windows.HKEY;
 const HRESULT = windows.HRESULT;
 const LARGE_INTEGER = windows.LARGE_INTEGER;
 const LPCWSTR = windows.LPCWSTR;
@@ -57,6 +58,8 @@ const UCHAR = windows.UCHAR;
 const FARPROC = windows.FARPROC;
 const INIT_ONCE_FN = windows.INIT_ONCE_FN;
 const PMEMORY_BASIC_INFORMATION = windows.PMEMORY_BASIC_INFORMATION;
+const REGSAM = windows.REGSAM;
+const LSTATUS = windows.LSTATUS;
 
 pub extern "kernel32" fn AddVectoredExceptionHandler(First: c_ulong, Handler: ?VECTORED_EXCEPTION_HANDLER) callconv(WINAPI) ?*anyopaque;
 pub extern "kernel32" fn RemoveVectoredExceptionHandler(Handle: HANDLE) callconv(WINAPI) c_ulong;
@@ -231,6 +234,7 @@ pub extern "kernel32" fn GetQueuedCompletionStatusEx(
 
 pub extern "kernel32" fn GetSystemInfo(lpSystemInfo: *SYSTEM_INFO) callconv(WINAPI) void;
 pub extern "kernel32" fn GetSystemTimeAsFileTime(*FILETIME) callconv(WINAPI) void;
+pub extern "kernel32" fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) BOOL;
 
 pub extern "kernel32" fn HeapCreate(flOptions: DWORD, dwInitialSize: SIZE_T, dwMaximumSize: SIZE_T) callconv(WINAPI) ?HANDLE;
 pub extern "kernel32" fn HeapDestroy(hHeap: HANDLE) callconv(WINAPI) BOOL;
@@ -411,3 +415,11 @@ pub extern "kernel32" fn SleepConditionVariableSRW(
 pub extern "kernel32" fn TryAcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) BOOLEAN;
 pub extern "kernel32" fn AcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void;
 pub extern "kernel32" fn ReleaseSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void;
+
+pub extern "kernel32" fn RegOpenKeyExW(
+    hkey: HKEY,
+    lpSubKey: LPCWSTR,
+    ulOptions: DWORD,
+    samDesired: REGSAM,
+    phkResult: *HKEY,
+) callconv(WINAPI) LSTATUS;
lib/std/os/windows/ntdll.zig
@@ -22,6 +22,8 @@ const RTL_OSVERSIONINFOW = windows.RTL_OSVERSIONINFOW;
 const FILE_BASIC_INFORMATION = windows.FILE_BASIC_INFORMATION;
 const SIZE_T = windows.SIZE_T;
 const CURDIR = windows.CURDIR;
+const PCWSTR = windows.PCWSTR;
+const RTL_QUERY_REGISTRY_TABLE = windows.RTL_QUERY_REGISTRY_TABLE;
 
 pub const THREADINFOCLASS = enum(c_int) {
     ThreadBasicInformation,
@@ -253,3 +255,17 @@ pub extern "ntdll" fn NtUnlockFile(
     Length: *const LARGE_INTEGER,
     Key: ?*ULONG,
 ) callconv(WINAPI) NTSTATUS;
+
+pub extern "ntdll" fn NtOpenKey(
+    KeyHandle: *HANDLE,
+    DesiredAccess: ACCESS_MASK,
+    ObjectAttributes: OBJECT_ATTRIBUTES,
+) callconv(WINAPI) NTSTATUS;
+
+pub extern "ntdll" fn RtlQueryRegistryValues(
+    RelativeTo: ULONG,
+    Path: PCWSTR,
+    QueryTable: [*]RTL_QUERY_REGISTRY_TABLE,
+    Context: ?*anyopaque,
+    Environment: ?*anyopaque,
+) callconv(WINAPI) NTSTATUS;
lib/std/os/windows.zig
@@ -2089,6 +2089,7 @@ pub const LPWSTR = [*:0]WCHAR;
 pub const LPCWSTR = [*:0]const WCHAR;
 pub const PVOID = *anyopaque;
 pub const PWSTR = [*:0]WCHAR;
+pub const PCWSTR = [*:0]const WCHAR;
 pub const SIZE_T = usize;
 pub const UINT = c_uint;
 pub const ULONG_PTR = usize;
@@ -2104,6 +2105,7 @@ pub const USHORT = u16;
 pub const SHORT = i16;
 pub const ULONG = u32;
 pub const LONG = i32;
+pub const ULONG64 = u64;
 pub const ULONGLONG = u64;
 pub const LONGLONG = i64;
 pub const HLOCAL = HANDLE;
@@ -2504,6 +2506,7 @@ pub const STANDARD_RIGHTS_READ = READ_CONTROL;
 pub const STANDARD_RIGHTS_WRITE = READ_CONTROL;
 pub const STANDARD_RIGHTS_EXECUTE = READ_CONTROL;
 pub const STANDARD_RIGHTS_REQUIRED = DELETE | READ_CONTROL | WRITE_DAC | WRITE_OWNER;
+pub const MAXIMUM_ALLOWED = 0x02000000;
 
 // disposition for NtCreateFile
 pub const FILE_SUPERSEDE = 0;
@@ -2872,9 +2875,143 @@ pub const PROV_RSA_FULL = 1;
 
 pub const REGSAM = ACCESS_MASK;
 pub const ACCESS_MASK = DWORD;
-pub const HKEY = *opaque {};
 pub const LSTATUS = LONG;
 
+pub const HKEY = *opaque {};
+
+pub const HKEY_LOCAL_MACHINE: HKEY = @intToPtr(HKEY, 0x80000002);
+
+/// Combines the STANDARD_RIGHTS_REQUIRED, KEY_QUERY_VALUE, KEY_SET_VALUE, KEY_CREATE_SUB_KEY,
+/// KEY_ENUMERATE_SUB_KEYS, KEY_NOTIFY, and KEY_CREATE_LINK access rights.
+pub const KEY_ALL_ACCESS = 0xF003F;
+/// Reserved for system use.
+pub const KEY_CREATE_LINK = 0x0020;
+/// Required to create a subkey of a registry key.
+pub const KEY_CREATE_SUB_KEY = 0x0004;
+/// Required to enumerate the subkeys of a registry key.
+pub const KEY_ENUMERATE_SUB_KEYS = 0x0008;
+/// Equivalent to KEY_READ.
+pub const KEY_EXECUTE = 0x20019;
+/// Required to request change notifications for a registry key or for subkeys of a registry key.
+pub const KEY_NOTIFY = 0x0010;
+/// Required to query the values of a registry key.
+pub const KEY_QUERY_VALUE = 0x0001;
+/// Combines the STANDARD_RIGHTS_READ, KEY_QUERY_VALUE, KEY_ENUMERATE_SUB_KEYS, and KEY_NOTIFY values.
+pub const KEY_READ = 0x20019;
+/// Required to create, delete, or set a registry value.
+pub const KEY_SET_VALUE = 0x0002;
+/// Indicates that an application on 64-bit Windows should operate on the 32-bit registry view.
+/// This flag is ignored by 32-bit Windows.
+pub const KEY_WOW64_32KEY = 0x0200;
+/// Indicates that an application on 64-bit Windows should operate on the 64-bit registry view.
+/// This flag is ignored by 32-bit Windows.
+pub const KEY_WOW64_64KEY = 0x0100;
+/// Combines the STANDARD_RIGHTS_WRITE, KEY_SET_VALUE, and KEY_CREATE_SUB_KEY access rights.
+pub const KEY_WRITE = 0x20006;
+
+/// Open symbolic link.
+pub const REG_OPTION_OPEN_LINK: DWORD = 0x8;
+
+pub const RTL_QUERY_REGISTRY_TABLE = extern struct {
+    QueryRoutine: RTL_QUERY_REGISTRY_ROUTINE,
+    Flags: ULONG,
+    Name: ?PWSTR,
+    EntryContext: ?*anyopaque,
+    DefaultType: ULONG,
+    DefaultData: ?*anyopaque,
+    DefaultLength: ULONG,
+};
+
+pub const RTL_QUERY_REGISTRY_ROUTINE = ?std.meta.FnPtr(fn (
+    PWSTR,
+    ULONG,
+    ?*anyopaque,
+    ULONG,
+    ?*anyopaque,
+    ?*anyopaque,
+) callconv(WINAPI) NTSTATUS);
+
+/// Path is a full path
+pub const RTL_REGISTRY_ABSOLUTE = 0;
+/// \Registry\Machine\System\CurrentControlSet\Services
+pub const RTL_REGISTRY_SERVICES = 1;
+/// \Registry\Machine\System\CurrentControlSet\Control
+pub const RTL_REGISTRY_CONTROL = 2;
+/// \Registry\Machine\Software\Microsoft\Windows NT\CurrentVersion
+pub const RTL_REGISTRY_WINDOWS_NT = 3;
+/// \Registry\Machine\Hardware\DeviceMap
+pub const RTL_REGISTRY_DEVICEMAP = 4;
+/// \Registry\User\CurrentUser
+pub const RTL_REGISTRY_USER = 5;
+pub const RTL_REGISTRY_MAXIMUM = 6;
+
+/// Low order bits are registry handle
+pub const RTL_REGISTRY_HANDLE = 0x40000000;
+/// Indicates the key node is optional
+pub const RTL_REGISTRY_OPTIONAL = 0x80000000;
+
+/// Name is a subkey and remainder of table or until next subkey are value
+/// names for that subkey to look at.
+pub const RTL_QUERY_REGISTRY_SUBKEY = 0x00000001;
+
+/// Reset current key to original key for this and all following table entries.
+pub const RTL_QUERY_REGISTRY_TOPKEY = 0x00000002;
+
+/// Fail if no match found for this table entry.
+pub const RTL_QUERY_REGISTRY_REQUIRED = 0x00000004;
+
+/// Used to mark a table entry that has no value name, just wants a call out, not
+/// an enumeration of all values.
+pub const RTL_QUERY_REGISTRY_NOVALUE = 0x00000008;
+
+/// Used to suppress the expansion of REG_MULTI_SZ into multiple callouts or
+/// to prevent the expansion of environment variable values in REG_EXPAND_SZ.
+pub const RTL_QUERY_REGISTRY_NOEXPAND = 0x00000010;
+
+/// QueryRoutine field ignored.  EntryContext field points to location to store value.
+/// For null terminated strings, EntryContext points to UNICODE_STRING structure that
+/// that describes maximum size of buffer. If .Buffer field is NULL then a buffer is
+/// allocated.
+pub const RTL_QUERY_REGISTRY_DIRECT = 0x00000020;
+
+/// Used to delete value keys after they are queried.
+pub const RTL_QUERY_REGISTRY_DELETE = 0x00000040;
+
+/// Use this flag with the RTL_QUERY_REGISTRY_DIRECT flag to verify that the REG_XXX type
+/// of the stored registry value matches the type expected by the caller.
+/// If the types do not match, the call fails.
+pub const RTL_QUERY_REGISTRY_TYPECHECK = 0x00000100;
+
+pub const REG = struct {
+    /// No value type
+    pub const NONE: ULONG = 0;
+    /// Unicode nul terminated string
+    pub const SZ: ULONG = 1;
+    /// Unicode nul terminated string (with environment variable references)
+    pub const EXPAND_SZ: ULONG = 2;
+    /// Free form binary
+    pub const BINARY: ULONG = 3;
+    /// 32-bit number
+    pub const DWORD: ULONG = 4;
+    /// 32-bit number (same as REG_DWORD)
+    pub const DWORD_LITTLE_ENDIAN: ULONG = 4;
+    /// 32-bit number
+    pub const DWORD_BIG_ENDIAN: ULONG = 5;
+    /// Symbolic Link (unicode)
+    pub const LINK: ULONG = 6;
+    /// Multiple Unicode strings
+    pub const MULTI_SZ: ULONG = 7;
+    /// Resource list in the resource map
+    pub const RESOURCE_LIST: ULONG = 8;
+    /// Resource list in the hardware description
+    pub const FULL_RESOURCE_DESCRIPTOR: ULONG = 9;
+    pub const RESOURCE_REQUIREMENTS_LIST: ULONG = 10;
+    /// 64-bit number
+    pub const QWORD: ULONG = 11;
+    /// 64-bit number (same as REG_QWORD)
+    pub const QWORD_LITTLE_ENDIAN: ULONG = 11;
+};
+
 pub const FILE_NOTIFY_INFORMATION = extern struct {
     NextEntryOffset: DWORD,
     Action: DWORD,
@@ -3715,3 +3852,305 @@ pub const CTRL_LOGOFF_EVENT: DWORD = 5;
 pub const CTRL_SHUTDOWN_EVENT: DWORD = 6;
 
 pub const HANDLER_ROUTINE = std.meta.FnPtr(fn (dwCtrlType: DWORD) callconv(WINAPI) BOOL);
+
+/// Processor feature enumeration.
+pub const PF = enum(DWORD) {
+    /// On a Pentium, a floating-point precision error can occur in rare circumstances.
+    FLOATING_POINT_PRECISION_ERRATA = 0,
+
+    /// Floating-point operations are emulated using software emulator.
+    /// This function returns a nonzero value if floating-point operations are emulated; otherwise, it returns zero.
+    FLOATING_POINT_EMULATED = 1,
+
+    /// The atomic compare and exchange operation (cmpxchg) is available.
+    COMPARE_EXCHANGE_DOUBLE = 2,
+
+    /// The MMX instruction set is available.
+    MMX_INSTRUCTIONS_AVAILABLE = 3,
+
+    PPC_MOVEMEM_64BIT_OK = 4,
+    ALPHA_BYTE_INSTRUCTIONS = 5,
+
+    /// The SSE instruction set is available.
+    XMMI_INSTRUCTIONS_AVAILABLE = 6,
+
+    /// The 3D-Now instruction is available.
+    @"3DNOW_INSTRUCTIONS_AVAILABLE" = 7,
+
+    /// The RDTSC instruction is available.
+    RDTSC_INSTRUCTION_AVAILABLE = 8,
+
+    /// The processor is PAE-enabled.
+    PAE_ENABLED = 9,
+
+    /// The SSE2 instruction set is available.
+    XMMI64_INSTRUCTIONS_AVAILABLE = 10,
+
+    SSE_DAZ_MODE_AVAILABLE = 11,
+
+    /// Data execution prevention is enabled.
+    NX_ENABLED = 12,
+
+    /// The SSE3 instruction set is available.
+    SSE3_INSTRUCTIONS_AVAILABLE = 13,
+
+    /// The atomic compare and exchange 128-bit operation (cmpxchg16b) is available.
+    COMPARE_EXCHANGE128 = 14,
+
+    /// The atomic compare 64 and exchange 128-bit operation (cmp8xchg16) is available.
+    COMPARE64_EXCHANGE128 = 15,
+
+    /// The processor channels are enabled.
+    CHANNELS_ENABLED = 16,
+
+    /// The processor implements the XSAVI and XRSTOR instructions.
+    XSAVE_ENABLED = 17,
+
+    /// The VFP/Neon: 32 x 64bit register bank is present.
+    /// This flag has the same meaning as PF_ARM_VFP_EXTENDED_REGISTERS.
+    ARM_VFP_32_REGISTERS_AVAILABLE = 18,
+
+    /// This ARM processor implements the ARM v8 NEON instruction set.
+    ARM_NEON_INSTRUCTIONS_AVAILABLE = 19,
+
+    /// Second Level Address Translation is supported by the hardware.
+    SECOND_LEVEL_ADDRESS_TRANSLATION = 20,
+
+    /// Virtualization is enabled in the firmware and made available by the operating system.
+    VIRT_FIRMWARE_ENABLED = 21,
+
+    /// RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE instructions are available.
+    RDWRFSGBASE_AVAILABLE = 22,
+
+    /// _fastfail() is available.
+    FASTFAIL_AVAILABLE = 23,
+
+    /// The divide instruction_available.
+    ARM_DIVIDE_INSTRUCTION_AVAILABLE = 24,
+
+    /// The 64-bit load/store atomic instructions are available.
+    ARM_64BIT_LOADSTORE_ATOMIC = 25,
+
+    /// The external cache is available.
+    ARM_EXTERNAL_CACHE_AVAILABLE = 26,
+
+    /// The floating-point multiply-accumulate instruction is available.
+    ARM_FMAC_INSTRUCTIONS_AVAILABLE = 27,
+
+    RDRAND_INSTRUCTION_AVAILABLE = 28,
+
+    /// This ARM processor implements the ARM v8 instructions set.
+    ARM_V8_INSTRUCTIONS_AVAILABLE = 29,
+
+    /// This ARM processor implements the ARM v8 extra cryptographic instructions (i.e., AES, SHA1 and SHA2).
+    ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE = 30,
+
+    /// This ARM processor implements the ARM v8 extra CRC32 instructions.
+    ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31,
+
+    RDTSCP_INSTRUCTION_AVAILABLE = 32,
+    RDPID_INSTRUCTION_AVAILABLE = 33,
+
+    /// This ARM processor implements the ARM v8.1 atomic instructions (e.g., CAS, SWP).
+    ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34,
+
+    MONITORX_INSTRUCTION_AVAILABLE = 35,
+
+    /// The SSSE3 instruction set is available.
+    SSSE3_INSTRUCTIONS_AVAILABLE = 36,
+
+    /// The SSE4_1 instruction set is available.
+    SSE4_1_INSTRUCTIONS_AVAILABLE = 37,
+
+    /// The SSE4_2 instruction set is available.
+    SSE4_2_INSTRUCTIONS_AVAILABLE = 38,
+
+    /// The AVX instruction set is available.
+    AVX_INSTRUCTIONS_AVAILABLE = 39,
+
+    /// The AVX2 instruction set is available.
+    AVX2_INSTRUCTIONS_AVAILABLE = 40,
+
+    /// The AVX512F instruction set is available.
+    AVX512F_INSTRUCTIONS_AVAILABLE = 41,
+
+    ERMS_AVAILABLE = 42,
+
+    /// This ARM processor implements the ARM v8.2 Dot Product (DP) instructions.
+    ARM_V82_DP_INSTRUCTIONS_AVAILABLE = 43,
+
+    /// This ARM processor implements the ARM v8.3 JavaScript conversion (JSCVT) instructions.
+    ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44,
+};
+
+pub const MAX_WOW64_SHARED_ENTRIES = 16;
+pub const PROCESSOR_FEATURE_MAX = 64;
+pub const MAXIMUM_XSTATE_FEATURES = 64;
+
+pub const KSYSTEM_TIME = extern struct {
+    LowPart: ULONG,
+    High1Time: LONG,
+    High2Time: LONG,
+};
+
+pub const NT_PRODUCT_TYPE = enum(INT) {
+    NtProductWinNt = 1,
+    NtProductLanManNt,
+    NtProductServer,
+};
+
+pub const ALTERNATIVE_ARCHITECTURE_TYPE = enum(INT) {
+    StandardDesign,
+    NEC98x86,
+    EndAlternatives,
+};
+
+pub const XSTATE_FEATURE = extern struct {
+    Offset: ULONG,
+    Size: ULONG,
+};
+
+pub const XSTATE_CONFIGURATION = extern struct {
+    EnabledFeatures: ULONG64,
+    Size: ULONG,
+    OptimizedSave: ULONG,
+    Features: [MAXIMUM_XSTATE_FEATURES]XSTATE_FEATURE,
+};
+
+/// Shared Kernel User Data
+pub const KUSER_SHARED_DATA = extern struct {
+    TickCountLowDeprecated: ULONG,
+    TickCountMultiplier: ULONG,
+    InterruptTime: KSYSTEM_TIME,
+    SystemTime: KSYSTEM_TIME,
+    TimeZoneBias: KSYSTEM_TIME,
+    ImageNumberLow: USHORT,
+    ImageNumberHigh: USHORT,
+    NtSystemRoot: [260]WCHAR,
+    MaxStackTraceDepth: ULONG,
+    CryptoExponent: ULONG,
+    TimeZoneId: ULONG,
+    LargePageMinimum: ULONG,
+    AitSamplingValue: ULONG,
+    AppCompatFlag: ULONG,
+    RNGSeedVersion: ULONGLONG,
+    GlobalValidationRunlevel: ULONG,
+    TimeZoneBiasStamp: LONG,
+    NtBuildNumber: ULONG,
+    NtProductType: NT_PRODUCT_TYPE,
+    ProductTypeIsValid: BOOLEAN,
+    Reserved0: [1]BOOLEAN,
+    NativeProcessorArchitecture: USHORT,
+    NtMajorVersion: ULONG,
+    NtMinorVersion: ULONG,
+    ProcessorFeatures: [PROCESSOR_FEATURE_MAX]BOOLEAN,
+    Reserved1: ULONG,
+    Reserved3: ULONG,
+    TimeSlip: ULONG,
+    AlternativeArchitecture: ALTERNATIVE_ARCHITECTURE_TYPE,
+    BootId: ULONG,
+    SystemExpirationDate: LARGE_INTEGER,
+    SuiteMaskY: ULONG,
+    KdDebuggerEnabled: BOOLEAN,
+    DummyUnion1: extern union {
+        MitigationPolicies: UCHAR,
+        Alt: packed struct {
+            NXSupportPolicy: u2,
+            SEHValidationPolicy: u2,
+            CurDirDevicesSkippedForDlls: u2,
+            Reserved: u2,
+        },
+    },
+    CyclesPerYield: USHORT,
+    ActiveConsoleId: ULONG,
+    DismountCount: ULONG,
+    ComPlusPackage: ULONG,
+    LastSystemRITEventTickCount: ULONG,
+    NumberOfPhysicalPages: ULONG,
+    SafeBootMode: BOOLEAN,
+    DummyUnion2: extern union {
+        VirtualizationFlags: UCHAR,
+        Alt: packed struct {
+            ArchStartedInEl2: u1,
+            QcSlIsSupported: u1,
+            SpareBits: u6,
+        },
+    },
+    Reserved12: [2]UCHAR,
+    DummyUnion3: extern union {
+        SharedDataFlags: ULONG,
+        Alt: packed struct {
+            DbgErrorPortPresent: u1,
+            DbgElevationEnabled: u1,
+            DbgVirtEnabled: u1,
+            DbgInstallerDetectEnabled: u1,
+            DbgLkgEnabled: u1,
+            DbgDynProcessorEnabled: u1,
+            DbgConsoleBrokerEnabled: u1,
+            DbgSecureBootEnabled: u1,
+            DbgMultiSessionSku: u1,
+            DbgMultiUsersInSessionSku: u1,
+            DbgStateSeparationEnabled: u1,
+            SpareBits: u21,
+        },
+    },
+    DataFlagsPad: [1]ULONG,
+    TestRetInstruction: ULONGLONG,
+    QpcFrequency: LONGLONG,
+    SystemCall: ULONG,
+    Reserved2: ULONG,
+    SystemCallPad: [2]ULONGLONG,
+    DummyUnion4: extern union {
+        TickCount: KSYSTEM_TIME,
+        TickCountQuad: ULONG64,
+        Alt: extern struct {
+            ReservedTickCountOverlay: [3]ULONG,
+            TickCountPad: [1]ULONG,
+        },
+    },
+    Cookie: ULONG,
+    CookiePad: [1]ULONG,
+    ConsoleSessionForegroundProcessId: LONGLONG,
+    TimeUpdateLock: ULONGLONG,
+    BaselineSystemTimeQpc: ULONGLONG,
+    BaselineInterruptTimeQpc: ULONGLONG,
+    QpcSystemTimeIncrement: ULONGLONG,
+    QpcInterruptTimeIncrement: ULONGLONG,
+    QpcSystemTimeIncrementShift: UCHAR,
+    QpcInterruptTimeIncrementShift: UCHAR,
+    UnparkedProcessorCount: USHORT,
+    EnclaveFeatureMask: [4]ULONG,
+    TelemetryCoverageRound: ULONG,
+    UserModeGlobalLogger: [16]USHORT,
+    ImageFileExecutionOptions: ULONG,
+    LangGenerationCount: ULONG,
+    Reserved4: ULONGLONG,
+    InterruptTimeBias: ULONGLONG,
+    QpcBias: ULONGLONG,
+    ActiveProcessorCount: ULONG,
+    ActiveGroupCount: UCHAR,
+    Reserved9: UCHAR,
+    DummyUnion5: extern union {
+        QpcData: USHORT,
+        Alt: extern struct {
+            QpcBypassEnabled: UCHAR,
+            QpcShift: UCHAR,
+        },
+    },
+    TimeZoneBiasEffectiveStart: LARGE_INTEGER,
+    TimeZoneBiasEffectiveEnd: LARGE_INTEGER,
+    XState: XSTATE_CONFIGURATION,
+    FeatureConfigurationChangeStamp: KSYSTEM_TIME,
+    Spare: ULONG,
+    UserPointerAuthMask: ULONG64,
+};
+
+/// Read-only user-mode address for the shared data.
+/// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm
+/// https://msrc-blog.microsoft.com/2022/04/05/randomizing-the-kuser_shared_data-structure-on-windows/
+pub const SharedUserData: *const KUSER_SHARED_DATA = @intToPtr(*const KUSER_SHARED_DATA, 0x7FFE0000);
+
+pub fn IsProcessorFeaturePresent(feature: PF) bool {
+    if (@enumToInt(feature) >= PROCESSOR_FEATURE_MAX) return false;
+    return SharedUserData.ProcessorFeatures[@enumToInt(feature)] == 1;
+}
lib/std/zig/system/arm.zig
@@ -0,0 +1,134 @@
+const std = @import("std");
+
+pub const CoreInfo = struct {
+    architecture: u8 = 0,
+    implementer: u8 = 0,
+    variant: u8 = 0,
+    part: u16 = 0,
+};
+
+pub const cpu_models = struct {
+    // Shorthands to simplify the tables below.
+    const A32 = std.Target.arm.cpu;
+    const A64 = std.Target.aarch64.cpu;
+
+    const E = struct {
+        part: u16,
+        variant: ?u8 = null, // null if matches any variant
+        m32: ?*const std.Target.Cpu.Model = null,
+        m64: ?*const std.Target.Cpu.Model = null,
+    };
+
+    // implementer = 0x41
+    const ARM = [_]E{
+        E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null },
+        E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null },
+        E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null },
+        E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null },
+        E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null },
+        E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null },
+        E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null },
+        E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null },
+        E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null },
+        E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null },
+        E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null },
+        E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null },
+        E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null },
+        E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null },
+        E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null },
+        E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null },
+        E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null },
+        E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null },
+        E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null },
+        E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null },
+        E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null },
+        E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null },
+        E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null },
+        E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 },
+        E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 },
+        E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 },
+        E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 },
+        E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 },
+        E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 },
+        E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 },
+        E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 },
+        E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 },
+        E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 },
+        E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null },
+        E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null },
+        E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null },
+        E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 },
+        E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c },
+        // This is a guess based on https://www.notebookcheck.net/Qualcomm-Snapdragon-8cx-Gen-3-Processor-Benchmarks-and-Specs.652916.0.html
+        E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c },
+        E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 },
+        E{ .part = 0xd02, .m64 = &A64.cortex_a34 },
+        E{ .part = 0xd06, .m64 = &A64.cortex_a65 },
+        E{ .part = 0xd43, .m64 = &A64.cortex_a65ae },
+    };
+    // implementer = 0x42
+    const Broadcom = [_]E{
+        E{ .part = 0x516, .m64 = &A64.thunderx2t99 },
+    };
+    // implementer = 0x43
+    const Cavium = [_]E{
+        E{ .part = 0x0a0, .m64 = &A64.thunderx },
+        E{ .part = 0x0a2, .m64 = &A64.thunderxt81 },
+        E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
+        E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
+        E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
+    };
+    // implementer = 0x46
+    const Fujitsu = [_]E{
+        E{ .part = 0x001, .m64 = &A64.a64fx },
+    };
+    // implementer = 0x48
+    const HiSilicon = [_]E{
+        E{ .part = 0xd01, .m64 = &A64.tsv110 },
+    };
+    // implementer = 0x4e
+    const Nvidia = [_]E{
+        E{ .part = 0x004, .m64 = &A64.carmel },
+    };
+    // implementer = 0x50
+    const Ampere = [_]E{
+        E{ .part = 0x000, .variant = 3, .m64 = &A64.emag },
+        E{ .part = 0x000, .m64 = &A64.xgene1 },
+    };
+    // implementer = 0x51
+    const Qualcomm = [_]E{
+        E{ .part = 0x06f, .m32 = &A32.krait },
+        E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo },
+        E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo },
+        E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo },
+        E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
+        E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
+        E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
+        E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
+        E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
+        E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
+        E{ .part = 0xc00, .m64 = &A64.falkor },
+        E{ .part = 0xc01, .m64 = &A64.saphira },
+    };
+
+    pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const std.Target.Cpu.Model {
+        const models = switch (core.implementer) {
+            0x41 => &ARM,
+            0x42 => &Broadcom,
+            0x43 => &Cavium,
+            0x46 => &Fujitsu,
+            0x48 => &HiSilicon,
+            0x50 => &Ampere,
+            0x51 => &Qualcomm,
+            else => return null,
+        };
+
+        for (models) |model| {
+            if (model.part == core.part and
+                (model.variant == null or model.variant.? == core.variant))
+                return if (is_64bit) model.m64 else model.m32;
+        }
+
+        return null;
+    }
+};
lib/std/zig/system/linux.zig
@@ -159,129 +159,7 @@ const ArmCpuinfoImpl = struct {
         is_really_v6: bool = false,
     };
 
-    const cpu_models = struct {
-        // Shorthands to simplify the tables below.
-        const A32 = Target.arm.cpu;
-        const A64 = Target.aarch64.cpu;
-
-        const E = struct {
-            part: u16,
-            variant: ?u8 = null, // null if matches any variant
-            m32: ?*const Target.Cpu.Model = null,
-            m64: ?*const Target.Cpu.Model = null,
-        };
-
-        // implementer = 0x41
-        const ARM = [_]E{
-            E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null },
-            E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null },
-            E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null },
-            E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null },
-            E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null },
-            E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null },
-            E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null },
-            E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null },
-            E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null },
-            E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null },
-            E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null },
-            E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null },
-            E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null },
-            E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null },
-            E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null },
-            E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null },
-            E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null },
-            E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null },
-            E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null },
-            E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null },
-            E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null },
-            E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null },
-            E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null },
-            E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 },
-            E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 },
-            E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 },
-            E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 },
-            E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 },
-            E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 },
-            E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 },
-            E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 },
-            E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 },
-            E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 },
-            E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null },
-            E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null },
-            E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null },
-            E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 },
-            E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c },
-            E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 },
-            E{ .part = 0xd02, .m64 = &A64.cortex_a34 },
-            E{ .part = 0xd06, .m64 = &A64.cortex_a65 },
-            E{ .part = 0xd43, .m64 = &A64.cortex_a65ae },
-        };
-        // implementer = 0x42
-        const Broadcom = [_]E{
-            E{ .part = 0x516, .m64 = &A64.thunderx2t99 },
-        };
-        // implementer = 0x43
-        const Cavium = [_]E{
-            E{ .part = 0x0a0, .m64 = &A64.thunderx },
-            E{ .part = 0x0a2, .m64 = &A64.thunderxt81 },
-            E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
-            E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
-            E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
-        };
-        // implementer = 0x46
-        const Fujitsu = [_]E{
-            E{ .part = 0x001, .m64 = &A64.a64fx },
-        };
-        // implementer = 0x48
-        const HiSilicon = [_]E{
-            E{ .part = 0xd01, .m64 = &A64.tsv110 },
-        };
-        // implementer = 0x4e
-        const Nvidia = [_]E{
-            E{ .part = 0x004, .m64 = &A64.carmel },
-        };
-        // implementer = 0x50
-        const Ampere = [_]E{
-            E{ .part = 0x000, .variant = 3, .m64 = &A64.emag },
-            E{ .part = 0x000, .m64 = &A64.xgene1 },
-        };
-        // implementer = 0x51
-        const Qualcomm = [_]E{
-            E{ .part = 0x06f, .m32 = &A32.krait },
-            E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo },
-            E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo },
-            E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo },
-            E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
-            E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
-            E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
-            E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
-            E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
-            E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
-            E{ .part = 0xc00, .m64 = &A64.falkor },
-            E{ .part = 0xc01, .m64 = &A64.saphira },
-        };
-
-        fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model {
-            const models = switch (core.implementer) {
-                0x41 => &ARM,
-                0x42 => &Broadcom,
-                0x43 => &Cavium,
-                0x46 => &Fujitsu,
-                0x48 => &HiSilicon,
-                0x50 => &Ampere,
-                0x51 => &Qualcomm,
-                else => return null,
-            };
-
-            for (models) |model| {
-                if (model.part == core.part and
-                    (model.variant == null or model.variant.? == core.variant))
-                    return if (is_64bit) model.m64 else model.m32;
-            }
-
-            return null;
-        }
-    };
+    const cpu_models = @import("arm.zig").cpu_models;
 
     fn addOne(self: *ArmCpuinfoImpl) void {
         if (self.have_fields == 4 and self.core_no < self.cores.len) {
@@ -346,7 +224,12 @@ const ArmCpuinfoImpl = struct {
 
         var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined;
         for (self.cores[0..self.core_no]) |core, i| {
-            known_models[i] = cpu_models.isKnown(core, is_64bit);
+            known_models[i] = cpu_models.isKnown(.{
+                .architecture = core.architecture,
+                .implementer = core.implementer,
+                .variant = core.variant,
+                .part = core.part,
+            }, is_64bit);
         }
 
         // XXX We pick the first core on big.LITTLE systems, hopefully the
lib/std/zig/system/NativeTargetInfo.zig
@@ -978,6 +978,7 @@ fn detectNativeCpuAndFeatures(cpu_arch: Target.Cpu.Arch, os: Target.Os, cross_ta
     switch (builtin.os.tag) {
         .linux => return linux.detectNativeCpuAndFeatures(),
         .macos => return darwin.macos.detectNativeCpuAndFeatures(),
+        .windows => return windows.detectNativeCpuAndFeatures(),
         else => {},
     }
 
lib/std/zig/system/windows.zig
@@ -1,6 +1,12 @@
 const std = @import("std");
+const builtin = @import("builtin");
+const mem = std.mem;
+const Target = std.Target;
 
 pub const WindowsVersion = std.Target.Os.WindowsVersion;
+pub const PF = std.os.windows.PF;
+pub const REG = std.os.windows.REG;
+pub const IsProcessorFeaturePresent = std.os.windows.IsProcessorFeaturePresent;
 
 /// Returns the highest known WindowsVersion deduced from reported runtime information.
 /// Discards information about in-between versions we don't differentiate.
@@ -38,3 +44,318 @@ pub fn detectRuntimeVersion() WindowsVersion {
 
     return @intToEnum(WindowsVersion, version);
 }
+
+// Technically, a registry value can be as long as 1MB. However, MS recommends storing
+// values larger than 2048 bytes in a file rather than directly in the registry, and since we
+// are only accessing a system hive \Registry\Machine, we stick to MS guidelines.
+// https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits
+const max_value_len = 2048;
+
+const RegistryPair = struct {
+    key: []const u8,
+    value: std.os.windows.ULONG,
+};
+
+fn getCpuInfoFromRegistry(
+    core: usize,
+    comptime pairs_num: comptime_int,
+    comptime pairs: [pairs_num]RegistryPair,
+    out_buf: *[pairs_num][max_value_len]u8,
+) !void {
+    // Originally, I wanted to issue a single call with a more complex table structure such that we
+    // would sequentially visit each CPU#d subkey in the registry and pull the value of interest into
+    // a buffer, however, NT seems to be expecting a single buffer per each table meaning we would
+    // end up pulling only the last CPU core info, overwriting everything else.
+    // If anyone can come up with a solution to this, please do!
+    const table_size = 1 + pairs.len;
+    var table: [table_size + 1]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined;
+
+    const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor");
+
+    const max_cpu_buf = 4;
+    var next_cpu_buf: [max_cpu_buf]u8 = undefined;
+    const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{core});
+
+    var subkey: [max_cpu_buf + 1]u16 = undefined;
+    const subkey_len = try std.unicode.utf8ToUtf16Le(&subkey, next_cpu);
+    subkey[subkey_len] = 0;
+
+    table[0] = .{
+        .QueryRoutine = null,
+        .Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED,
+        .Name = subkey[0..subkey_len :0],
+        .EntryContext = null,
+        .DefaultType = REG.NONE,
+        .DefaultData = null,
+        .DefaultLength = 0,
+    };
+
+    inline for (pairs) |pair, i| {
+        const ctx: *anyopaque = blk: {
+            switch (pair.value) {
+                REG.SZ,
+                REG.EXPAND_SZ,
+                REG.MULTI_SZ,
+                => {
+                    var buf: [max_value_len / 2]u16 = undefined;
+                    var unicode = std.os.windows.UNICODE_STRING{
+                        .Length = 0,
+                        .MaximumLength = max_value_len,
+                        .Buffer = &buf,
+                    };
+                    break :blk &unicode;
+                },
+
+                REG.DWORD,
+                REG.DWORD_BIG_ENDIAN,
+                => {
+                    var buf: [4]u8 = undefined;
+                    break :blk &buf;
+                },
+
+                REG.QWORD => {
+                    var buf: [8]u8 = undefined;
+                    break :blk &buf;
+                },
+
+                else => unreachable,
+            }
+        };
+        const key_namee = std.unicode.utf8ToUtf16LeStringLiteral(pair.key);
+
+        table[i + 1] = .{
+            .QueryRoutine = null,
+            .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED,
+            .Name = @intToPtr([*:0]u16, @ptrToInt(key_namee)),
+            .EntryContext = ctx,
+            .DefaultType = REG.NONE,
+            .DefaultData = null,
+            .DefaultLength = 0,
+        };
+    }
+
+    // Table sentinel
+    table[table_size] = .{
+        .QueryRoutine = null,
+        .Flags = 0,
+        .Name = null,
+        .EntryContext = null,
+        .DefaultType = 0,
+        .DefaultData = null,
+        .DefaultLength = 0,
+    };
+
+    const res = std.os.windows.ntdll.RtlQueryRegistryValues(
+        std.os.windows.RTL_REGISTRY_ABSOLUTE,
+        topkey,
+        &table,
+        null,
+        null,
+    );
+    switch (res) {
+        .SUCCESS => {
+            inline for (pairs) |pair, i| switch (pair.value) {
+                REG.NONE => unreachable,
+
+                REG.SZ,
+                REG.EXPAND_SZ,
+                REG.MULTI_SZ,
+                => {
+                    const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[i + 1].EntryContext);
+                    const len = try std.unicode.utf16leToUtf8(out_buf[i][0..], entry.Buffer[0 .. entry.Length / 2]);
+                    out_buf[i][len] = 0;
+                },
+
+                REG.DWORD,
+                REG.DWORD_BIG_ENDIAN,
+                REG.QWORD,
+                => {
+                    const entry = @ptrCast([*]align(1) const u8, table[i + 1].EntryContext);
+                    switch (pair.value) {
+                        REG.DWORD, REG.DWORD_BIG_ENDIAN => {
+                            mem.copy(u8, out_buf[i][0..4], entry[0..4]);
+                        },
+                        REG.QWORD => {
+                            mem.copy(u8, out_buf[i][0..8], entry[0..8]);
+                        },
+                        else => unreachable,
+                    }
+                },
+
+                else => unreachable,
+            };
+        },
+        else => return error.Unexpected,
+    }
+}
+
+fn getCpuCount() usize {
+    return std.os.windows.peb().NumberOfProcessors;
+}
+
+const ArmCpuInfoImpl = struct {
+    cores: [4]CoreInfo = undefined,
+    core_no: usize = 0,
+    have_fields: usize = 0,
+
+    const CoreInfo = @import("arm.zig").CoreInfo;
+    const cpu_models = @import("arm.zig").cpu_models;
+
+    const Data = struct {
+        cp_4000: []const u8,
+        identifier: []const u8,
+    };
+
+    fn parseDataHook(self: *ArmCpuInfoImpl, data: Data) !void {
+        const info = &self.cores[self.core_no];
+        info.* = .{};
+
+        // CPU part
+        info.part = mem.readIntLittle(u16, data.cp_4000[0..2]) >> 4;
+        self.have_fields += 1;
+
+        // CPU implementer
+        info.implementer = data.cp_4000[3];
+        self.have_fields += 1;
+
+        var tokens = mem.tokenize(u8, data.identifier, " ");
+        while (tokens.next()) |token| {
+            if (mem.eql(u8, "Family", token)) {
+                // CPU architecture
+                const family = tokens.next() orelse continue;
+                info.architecture = try std.fmt.parseInt(u8, family, 10);
+                self.have_fields += 1;
+                break;
+            }
+        } else return;
+
+        self.addOne();
+    }
+
+    fn addOne(self: *ArmCpuInfoImpl) void {
+        if (self.have_fields == 3 and self.core_no < self.cores.len) {
+            if (self.core_no > 0) {
+                // Deduplicate the core info.
+                for (self.cores[0..self.core_no]) |it| {
+                    if (std.meta.eql(it, self.cores[self.core_no]))
+                        return;
+                }
+            }
+            self.core_no += 1;
+        }
+    }
+
+    fn finalize(self: ArmCpuInfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu {
+        if (self.core_no == 0) return null;
+
+        const is_64bit = switch (arch) {
+            .aarch64, .aarch64_be, .aarch64_32 => true,
+            else => false,
+        };
+
+        var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined;
+        for (self.cores[0..self.core_no]) |core, i| {
+            known_models[i] = cpu_models.isKnown(core, is_64bit);
+        }
+
+        // XXX We pick the first core on big.LITTLE systems, hopefully the
+        // LITTLE one.
+        const model = known_models[0] orelse return null;
+        return Target.Cpu{
+            .arch = arch,
+            .model = model,
+            .features = model.features,
+        };
+    }
+};
+
+const ArmCpuInfoParser = CpuInfoParser(ArmCpuInfoImpl);
+
+fn CpuInfoParser(comptime impl: anytype) type {
+    return struct {
+        fn parse(arch: Target.Cpu.Arch) !?Target.Cpu {
+            var obj: impl = .{};
+            var out_buf: [2][max_value_len]u8 = undefined;
+
+            var i: usize = 0;
+            while (i < getCpuCount()) : (i += 1) {
+                try getCpuInfoFromRegistry(i, 2, .{
+                    .{ .key = "CP 4000", .value = REG.QWORD },
+                    .{ .key = "Identifier", .value = REG.SZ },
+                }, &out_buf);
+
+                const cp_4000 = out_buf[0][0..8];
+                const identifier = mem.sliceTo(out_buf[1][0..], 0);
+
+                try obj.parseDataHook(.{
+                    .cp_4000 = cp_4000,
+                    .identifier = identifier,
+                });
+            }
+
+            return obj.finalize(arch);
+        }
+    };
+}
+
+fn genericCpu(comptime arch: Target.Cpu.Arch) Target.Cpu {
+    return .{
+        .arch = arch,
+        .model = Target.Cpu.Model.generic(arch),
+        .features = Target.Cpu.Feature.Set.empty,
+    };
+}
+
+pub fn detectNativeCpuAndFeatures() ?Target.Cpu {
+    const current_arch = builtin.cpu.arch;
+    switch (current_arch) {
+        .aarch64, .aarch64_be, .aarch64_32 => {
+            var cpu = cpu: {
+                var maybe_cpu = ArmCpuInfoParser.parse(current_arch) catch break :cpu genericCpu(current_arch);
+                break :cpu maybe_cpu orelse genericCpu(current_arch);
+            };
+
+            const Feature = Target.aarch64.Feature;
+
+            // Override any features that are either present or absent
+            if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.neon));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.neon));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.crc));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.crc));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.crypto));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.crypto));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.lse));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.lse));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.dotprod));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.dotprod));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.jsconv));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.jsconv));
+            }
+
+            return cpu;
+        },
+        else => {},
+    }
+}