Commit b55377a5ab

Jacob Young <jacobly0@users.noreply.github.com>
2023-10-24 18:08:26
x86_64: pass more tests
* 128-bit integer multiplication with overflow * more instruction encodings used by std inline asm * implement the `try_ptr` air instruction * follow correct stack frame abi * enable full panic handler * enable stack traces
1 parent 030da45
lib/compiler_rt/int.zig
@@ -42,8 +42,6 @@ pub fn __divmodti4(a: i128, b: i128, rem: *i128) callconv(.C) i128 {
 }
 
 test "test_divmodti4" {
-    if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const cases = [_][4]i128{
         [_]i128{ 0, 1, 0, 0 },
         [_]i128{ 0, -1, 0, 0 },
lib/std/compress/lzma/test.zig
@@ -23,8 +23,6 @@ fn testDecompressError(expected: anyerror, compressed: []const u8) !void {
 }
 
 test "LZMA: decompress empty world" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try testDecompressEqual(
         "",
         &[_]u8{
@@ -88,8 +86,6 @@ test "LZMA: known size with end of payload marker" {
 }
 
 test "LZMA: too big uncompressed size in header" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try testDecompressError(
         error.CorruptInput,
         @embedFile("testdata/bad-too_big_size-with_eopm.lzma"),
@@ -97,8 +93,6 @@ test "LZMA: too big uncompressed size in header" {
 }
 
 test "LZMA: too small uncompressed size in header" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try testDecompressError(
         error.CorruptInput,
         @embedFile("testdata/bad-too_small_size-without_eopm-3.lzma"),
lib/std/crypto/25519/curve25519.zig
@@ -1,5 +1,4 @@
 const std = @import("std");
-const builtin = @import("builtin");
 const crypto = std.crypto;
 
 const IdentityElementError = crypto.errors.IdentityElementError;
@@ -112,8 +111,6 @@ pub const Curve25519 = struct {
 };
 
 test "curve25519" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var s = [32]u8{ 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 };
     const p = try Curve25519.basePoint.clampedMul(s);
     try p.rejectIdentity();
@@ -128,8 +125,6 @@ test "curve25519" {
 }
 
 test "curve25519 small order check" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var s: [32]u8 = [_]u8{1} ++ [_]u8{0} ** 31;
     const small_order_ss: [7][32]u8 = .{
         .{
lib/std/crypto/25519/ed25519.zig
@@ -484,8 +484,6 @@ pub const Ed25519 = struct {
 };
 
 test "ed25519 key pair creation" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var seed: [32]u8 = undefined;
     _ = try fmt.hexToBytes(seed[0..], "8052030376d47112be7f73ed7a019293dd12ad910b654455798b4667d73de166");
     const key_pair = try Ed25519.KeyPair.create(seed);
lib/std/crypto/25519/edwards25519.zig
@@ -1,5 +1,4 @@
 const std = @import("std");
-const builtin = @import("builtin");
 const crypto = std.crypto;
 const debug = std.debug;
 const fmt = std.fmt;
@@ -495,8 +494,6 @@ pub const Edwards25519 = struct {
 const htest = @import("../test.zig");
 
 test "edwards25519 packing/unpacking" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const s = [_]u8{170} ++ [_]u8{0} ** 31;
     var b = Edwards25519.basePoint;
     const pk = try b.mul(s);
@@ -533,8 +530,6 @@ test "edwards25519 packing/unpacking" {
 }
 
 test "edwards25519 point addition/subtraction" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var s1: [32]u8 = undefined;
     var s2: [32]u8 = undefined;
     crypto.random.bytes(&s1);
@@ -549,8 +544,6 @@ test "edwards25519 point addition/subtraction" {
 }
 
 test "edwards25519 uniform-to-point" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var r = [32]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
     var p = Edwards25519.fromUniform(r);
     try htest.assertEqual("0691eee3cf70a0056df6bfa03120635636581b5c4ea571dfc680f78c7e0b4137", p.toBytes()[0..]);
@@ -562,8 +555,6 @@ test "edwards25519 uniform-to-point" {
 
 // Test vectors from draft-irtf-cfrg-hash-to-curve-12
 test "edwards25519 hash-to-curve operation" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var p = Edwards25519.fromString(true, "QUUX-V01-CS02-with-edwards25519_XMD:SHA-512_ELL2_RO_", "abc");
     try htest.assertEqual("31558a26887f23fb8218f143e69d5f0af2e7831130bd5b432ef23883b895839a", p.toBytes()[0..]);
 
@@ -572,8 +563,6 @@ test "edwards25519 hash-to-curve operation" {
 }
 
 test "edwards25519 implicit reduction of invalid scalars" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const s = [_]u8{0} ** 31 ++ [_]u8{255};
     const p1 = try Edwards25519.basePoint.mulPublic(s);
     const p2 = try Edwards25519.basePoint.mul(s);
lib/std/crypto/25519/ristretto255.zig
@@ -168,8 +168,6 @@ pub const Ristretto255 = struct {
 };
 
 test "ristretto255" {
-    if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const p = Ristretto255.basePoint;
     var buf: [256]u8 = undefined;
     try std.testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&p.toBytes())}), "E2F2AE0A6ABC4E71A884A961C500515F58E30B6AA582DD8DB6A65945E08D2D76");
lib/std/crypto/25519/x25519.zig
@@ -1,5 +1,4 @@
 const std = @import("std");
-const builtin = @import("builtin");
 const crypto = std.crypto;
 const mem = std.mem;
 const fmt = std.fmt;
@@ -83,8 +82,6 @@ pub const X25519 = struct {
 const htest = @import("../test.zig");
 
 test "x25519 public key calculation from secret key" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var sk: [32]u8 = undefined;
     var pk_expected: [32]u8 = undefined;
     _ = try fmt.hexToBytes(sk[0..], "8052030376d47112be7f73ed7a019293dd12ad910b654455798b4667d73de166");
@@ -94,8 +91,6 @@ test "x25519 public key calculation from secret key" {
 }
 
 test "x25519 rfc7748 vector1" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const secret_key = [32]u8{ 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 };
     const public_key = [32]u8{ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c };
 
@@ -106,8 +101,6 @@ test "x25519 rfc7748 vector1" {
 }
 
 test "x25519 rfc7748 vector2" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const secret_key = [32]u8{ 0x4b, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x0d };
     const public_key = [32]u8{ 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x93 };
 
@@ -118,8 +111,6 @@ test "x25519 rfc7748 vector2" {
 }
 
 test "x25519 rfc7748 one iteration" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const initial_value = [32]u8{ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
     const expected_output = [32]u8{ 0x42, 0x2c, 0x8e, 0x7a, 0x62, 0x27, 0xd7, 0xbc, 0xa1, 0x35, 0x0b, 0x3e, 0x2b, 0xb7, 0x27, 0x9f, 0x78, 0x97, 0xb8, 0x7b, 0xb6, 0x85, 0x4b, 0x78, 0x3c, 0x60, 0xe8, 0x03, 0x11, 0xae, 0x30, 0x79 };
 
@@ -180,8 +171,6 @@ test "x25519 rfc7748 1,000,000 iterations" {
 }
 
 test "edwards25519 -> curve25519 map" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const ed_kp = try crypto.sign.Ed25519.KeyPair.create([_]u8{0x42} ** 32);
     const mont_kp = try X25519.KeyPair.fromEd25519(ed_kp);
     try htest.assertEqual("90e7595fc89e52fdfddce9c6a43d74dbf6047025ee0462d2d172e8b6a2841d6e", &mont_kp.secret_key);
lib/std/crypto/chacha20.zig
@@ -821,8 +821,6 @@ test "crypto.chacha20 test vector sunscreen" {
 
 // https://tools.ietf.org/html/draft-agl-tls-chacha20poly1305-04#section-7
 test "crypto.chacha20 test vector 1" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const expected_result = [_]u8{
         0x76, 0xb8, 0xe0, 0xad, 0xa0, 0xf1, 0x3d, 0x90,
         0x40, 0x5d, 0x6a, 0xe5, 0x53, 0x86, 0xbd, 0x28,
@@ -857,8 +855,6 @@ test "crypto.chacha20 test vector 1" {
 }
 
 test "crypto.chacha20 test vector 2" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const expected_result = [_]u8{
         0x45, 0x40, 0xf0, 0x5a, 0x9f, 0x1f, 0xb2, 0x96,
         0xd7, 0x73, 0x6e, 0x7b, 0x20, 0x8e, 0x3c, 0x96,
@@ -893,8 +889,6 @@ test "crypto.chacha20 test vector 2" {
 }
 
 test "crypto.chacha20 test vector 3" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const expected_result = [_]u8{
         0xde, 0x9c, 0xba, 0x7b, 0xf3, 0xd6, 0x9e, 0xf5,
         0xe7, 0x86, 0xdc, 0x63, 0x97, 0x3f, 0x65, 0x3a,
@@ -929,8 +923,6 @@ test "crypto.chacha20 test vector 3" {
 }
 
 test "crypto.chacha20 test vector 4" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const expected_result = [_]u8{
         0xef, 0x3f, 0xdf, 0xd6, 0xc6, 0x15, 0x78, 0xfb,
         0xf5, 0xcf, 0x35, 0xbd, 0x3d, 0xd3, 0x3b, 0x80,
@@ -965,8 +957,6 @@ test "crypto.chacha20 test vector 4" {
 }
 
 test "crypto.chacha20 test vector 5" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const expected_result = [_]u8{
         0xf7, 0x98, 0xa1, 0x89, 0xf1, 0x95, 0xe6, 0x69,
         0x82, 0x10, 0x5f, 0xfb, 0x64, 0x0b, 0xb7, 0x75,
lib/std/crypto/salsa20.zig
@@ -598,8 +598,6 @@ test "xsalsa20poly1305 secretbox" {
 }
 
 test "xsalsa20poly1305 box" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var msg: [100]u8 = undefined;
     var msg2: [msg.len]u8 = undefined;
     var nonce: [Box.nonce_length]u8 = undefined;
@@ -614,8 +612,6 @@ test "xsalsa20poly1305 box" {
 }
 
 test "xsalsa20poly1305 sealedbox" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var msg: [100]u8 = undefined;
     var msg2: [msg.len]u8 = undefined;
     var boxed: [msg.len + SealedBox.seal_length]u8 = undefined;
lib/std/fmt/parse_float.zig
@@ -84,8 +84,6 @@ test "fmt.parseFloat nan and inf" {
 }
 
 test "fmt.parseFloat #11169" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try expectEqual(try parseFloat(f128, "9007199254740993.0"), 9007199254740993.0);
 }
 
@@ -162,8 +160,6 @@ test "fmt.parseFloat hex.f64" {
     try testing.expectEqual(try parseFloat(f64, "-0x1p-1074"), -math.floatTrueMin(f64));
 }
 test "fmt.parseFloat hex.f128" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try testing.expectEqual(try parseFloat(f128, "0x1p0"), 1.0);
     try testing.expectEqual(try parseFloat(f128, "-0x1p-1"), -0.5);
     try testing.expectEqual(try parseFloat(f128, "0x10p+10"), 16384.0);
lib/std/fs/test.zig
@@ -1604,8 +1604,6 @@ test "File.Permissions" {
     if (builtin.os.tag == .wasi)
         return error.SkipZigTest;
 
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var tmp = tmpDir(.{});
     defer tmp.cleanup();
 
@@ -1632,8 +1630,6 @@ test "File.PermissionsUnix" {
     if (builtin.os.tag == .windows or builtin.os.tag == .wasi)
         return error.SkipZigTest;
 
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var tmp = tmpDir(.{});
     defer tmp.cleanup();
 
lib/std/json/static_test.zig
@@ -372,8 +372,6 @@ test "test all types" {
 }
 
 test "parse" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try testing.expectEqual(false, try parseFromSliceLeaky(bool, testing.allocator, "false", .{}));
     try testing.expectEqual(true, try parseFromSliceLeaky(bool, testing.allocator, "true", .{}));
     try testing.expectEqual(@as(u1, 1), try parseFromSliceLeaky(u1, testing.allocator, "1", .{}));
@@ -405,8 +403,6 @@ test "parse into enum" {
 }
 
 test "parse into that allocates a slice" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     {
         // string as string
         const parsed = try parseFromSlice([]u8, testing.allocator, "\"foo\"", .{});
@@ -427,16 +423,12 @@ test "parse into that allocates a slice" {
 }
 
 test "parse into sentinel slice" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const parsed = try parseFromSlice([:0]const u8, testing.allocator, "\"\\n\"", .{});
     defer parsed.deinit();
     try testing.expect(std.mem.eql(u8, parsed.value, "\n"));
 }
 
 test "parse into tagged union" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = union(enum) {
         nothing,
         int: i32,
@@ -452,8 +444,6 @@ test "parse into tagged union" {
 }
 
 test "parse into tagged union errors" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = union(enum) {
         nothing,
         int: i32,
@@ -485,8 +475,6 @@ test "parse into struct with no fields" {
 const test_const_value: usize = 123;
 
 test "parse into struct with default const pointer field" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct { a: *const usize = &test_const_value };
     const parsed = try parseFromSlice(T, testing.allocator, "{}", .{});
     defer parsed.deinit();
@@ -502,8 +490,6 @@ const test_default_str_slice: [2][]const u8 = [_][]const u8{
 };
 
 test "freeing parsed structs with pointers to default values" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct {
         int: *const usize = &test_default_usize,
         int_ptr: *allowzero align(1) const usize = test_default_usize_ptr,
@@ -517,15 +503,11 @@ test "freeing parsed structs with pointers to default values" {
 }
 
 test "parse into struct where destination and source lengths mismatch" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct { a: [2]u8 };
     try testing.expectError(error.LengthMismatch, parseFromSlice(T, testing.allocator, "{\"a\": \"bbb\"}", .{}));
 }
 
 test "parse into struct with misc fields" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct {
         int: i64,
         float: f64,
@@ -601,8 +583,6 @@ test "parse into struct with misc fields" {
 }
 
 test "parse into struct with strings and arrays with sentinels" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct {
         language: [:0]const u8,
         language_without_sentinel: []const u8,
@@ -631,8 +611,6 @@ test "parse into struct with strings and arrays with sentinels" {
 }
 
 test "parse into struct with duplicate field" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const options_first = ParseOptions{ .duplicate_field_behavior = .use_first };
     const options_last = ParseOptions{ .duplicate_field_behavior = .use_last };
 
@@ -652,8 +630,6 @@ test "parse into struct with duplicate field" {
 }
 
 test "parse into struct ignoring unknown fields" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct {
         int: i64,
         language: []const u8,
@@ -692,8 +668,6 @@ test "parse into struct ignoring unknown fields" {
 }
 
 test "parse into tuple" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const Union = union(enum) {
         char: u8,
         float: f64,
@@ -749,8 +723,6 @@ const ParseIntoRecursiveUnionDefinitionValue = union(enum) {
 };
 
 test "parse into recursive union definition" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct {
         values: ParseIntoRecursiveUnionDefinitionValue,
     };
@@ -772,8 +744,6 @@ const ParseIntoDoubleRecursiveUnionValueSecond = union(enum) {
 };
 
 test "parse into double recursive union definition" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct {
         values: ParseIntoDoubleRecursiveUnionValueFirst,
     };
@@ -785,8 +755,6 @@ test "parse into double recursive union definition" {
 }
 
 test "parse exponential into int" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct { int: i64 };
     const r = try parseFromSliceLeaky(T, testing.allocator, "{ \"int\": 4.2e2 }", .{});
     try testing.expectEqual(@as(i64, 420), r.int);
@@ -795,8 +763,6 @@ test "parse exponential into int" {
 }
 
 test "parseFromTokenSource" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     {
         var scanner = JsonScanner.initCompleteInput(testing.allocator, "123");
         defer scanner.deinit();
@@ -816,8 +782,6 @@ test "parseFromTokenSource" {
 }
 
 test "max_value_len" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try testing.expectError(error.ValueTooLong, parseFromSlice([]u8, testing.allocator, "\"0123456789\"", .{ .max_value_len = 5 }));
 }
 
@@ -856,8 +820,6 @@ fn assertKey(
     }
 }
 test "json parse partial" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const Inner = struct {
         num: u32,
         yes: bool,
@@ -913,8 +875,6 @@ test "json parse partial" {
 }
 
 test "json parse allocate when streaming" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const T = struct {
         not_const: []u8,
         is_const: []const u8,
lib/std/json/stringify_test.zig
@@ -198,8 +198,6 @@ test "stringify struct" {
 }
 
 test "emit_strings_as_arrays" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     // Should only affect string values, not object keys.
     try testStringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, .{});
     try testStringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, .{ .emit_strings_as_arrays = true });
lib/std/net/test.zig
@@ -5,7 +5,6 @@ const mem = std.mem;
 const testing = std.testing;
 
 test "parse and render IPv6 addresses" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.os.tag == .wasi) return error.SkipZigTest;
 
     var buffer: [100]u8 = undefined;
@@ -71,7 +70,6 @@ test "invalid but parseable IPv6 scope ids" {
 }
 
 test "parse and render IPv4 addresses" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.os.tag == .wasi) return error.SkipZigTest;
 
     var buffer: [18]u8 = undefined;
lib/std/os/linux/x86_64.zig
@@ -431,11 +431,11 @@ fn getContextInternal() callconv(.Naked) usize {
         \\ leaq %[stack_offset:c](%%rdi), %%rsi
         \\ movq %%rdi, %%r8
         \\ xorl %%edi, %%edi
-        \\ movq %[sigaltstack], %%rax
+        \\ movl %[sigaltstack], %%eax
         \\ syscall
         \\ testq %%rax, %%rax
         \\ jnz 0f
-        \\ movq %[sigprocmask], %%rax
+        \\ movl %[sigprocmask], %%eax
         \\ xorl %%esi, %%esi
         \\ leaq %[sigmask_offset:c](%%r8), %%rdx
         \\ movl %[sigset_size], %%r10d
lib/std/os/test.zig
@@ -509,8 +509,6 @@ fn iter_fn(info: *dl_phdr_info, size: usize, counter: *usize) IterFnError!void {
 test "dl_iterate_phdr" {
     if (builtin.object_format != .elf) return error.SkipZigTest;
 
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var counter: usize = 0;
     try os.dl_iterate_phdr(&counter, IterFnError, iter_fn);
     try expect(counter != 0);
@@ -804,8 +802,6 @@ test "sigaction" {
     if (native_os == .wasi or native_os == .windows)
         return error.SkipZigTest;
 
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     // https://github.com/ziglang/zig/issues/7427
     if (native_os == .linux and builtin.target.cpu.arch == .x86)
         return error.SkipZigTest;
lib/std/rand/test.zig
@@ -158,8 +158,6 @@ fn testRandomEnumValue() !void {
 }
 
 test "Random intLessThan" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     @setEvalBranchQuota(10000);
     try testRandomIntLessThan();
     try comptime testRandomIntLessThan();
lib/std/Thread/Condition.zig
@@ -371,6 +371,8 @@ test "Condition - signal" {
         return error.SkipZigTest;
     }
 
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+
     const num_threads = 4;
 
     const SignalTest = struct {
lib/std/zig/CrossTarget.zig
@@ -791,8 +791,6 @@ fn parseOs(result: *CrossTarget, diags: *ParseOptions.Diagnostics, text: []const
 }
 
 test "CrossTarget.parse" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     if (builtin.target.isGnuLibC()) {
         var cross_target = try CrossTarget.parse(.{});
         cross_target.setGnuLibCVersion(2, 1, 1);
lib/std/zig/fmt.zig
@@ -95,8 +95,6 @@ pub fn fmtEscapes(bytes: []const u8) std.fmt.Formatter(stringEscape) {
 }
 
 test "escape invalid identifiers" {
-    if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const expectFmt = std.testing.expectFmt;
     try expectFmt("@\"while\"", "{}", .{fmtId("while")});
     try expectFmt("hello", "{}", .{fmtId("hello")});
lib/std/builtin.zig
@@ -738,7 +738,6 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace, ret_addr
     if (builtin.zig_backend == .stage2_wasm or
         builtin.zig_backend == .stage2_arm or
         builtin.zig_backend == .stage2_aarch64 or
-        builtin.zig_backend == .stage2_x86_64 or
         builtin.zig_backend == .stage2_x86 or
         builtin.zig_backend == .stage2_riscv64 or
         builtin.zig_backend == .stage2_sparc64 or
lib/std/debug.zig
@@ -671,8 +671,8 @@ pub const StackIterator = struct {
             if (self.unwind_state) |*unwind_state| {
                 if (!unwind_state.failed) {
                     if (unwind_state.dwarf_context.pc == 0) return null;
+                    defer self.fp = unwind_state.dwarf_context.getFp() catch 0;
                     if (self.next_unwind()) |return_address| {
-                        self.fp = unwind_state.dwarf_context.getFp() catch 0;
                         return return_address;
                     } else |err| {
                         unwind_state.last_error = err;
lib/std/fmt.zig
@@ -2129,8 +2129,6 @@ test "int.small" {
 }
 
 test "int.specifier" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     {
         const value: u8 = 'a';
         try expectFmt("u8: a\n", "u8: {c}\n", .{value});
@@ -2181,8 +2179,6 @@ test "int.padded" {
 }
 
 test "buffer" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     {
         var buf1: [32]u8 = undefined;
         var fbs = std.io.fixedBufferStream(&buf1);
@@ -2379,8 +2375,6 @@ test "float.scientific" {
 }
 
 test "float.scientific.precision" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try expectFmt("f64: 1.40971e-42", "f64: {e:.5}", .{@as(f64, 1.409706e-42)});
     try expectFmt("f64: 1.00000e-09", "f64: {e:.5}", .{@as(f64, @as(f32, @bitCast(@as(u32, 814313563))))});
     try expectFmt("f64: 7.81250e-03", "f64: {e:.5}", .{@as(f64, @as(f32, @bitCast(@as(u32, 1006632960))))});
@@ -2457,8 +2451,6 @@ test "float.hexadecimal.precision" {
 }
 
 test "float.decimal" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try expectFmt("f64: 152314000000000000000000000000", "f64: {d}", .{@as(f64, 1.52314e+29)});
     try expectFmt("f32: 0", "f32: {d}", .{@as(f32, 0.0)});
     try expectFmt("f32: 0", "f32: {d:.0}", .{@as(f32, 0.0)});
@@ -2482,8 +2474,6 @@ test "float.decimal" {
 }
 
 test "float.libc.sanity" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try expectFmt("f64: 0.00001", "f64: {d:.5}", .{@as(f64, @as(f32, @bitCast(@as(u32, 916964781))))});
     try expectFmt("f64: 0.00001", "f64: {d:.5}", .{@as(f64, @as(f32, @bitCast(@as(u32, 925353389))))});
     try expectFmt("f64: 0.10000", "f64: {d:.5}", .{@as(f64, @as(f32, @bitCast(@as(u32, 1036831278))))});
@@ -2503,8 +2493,6 @@ test "float.libc.sanity" {
 }
 
 test "custom" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const Vec2 = struct {
         const SelfType = @This();
         x: f32,
@@ -2683,8 +2671,6 @@ test "formatFloatValue with comptime_float" {
 }
 
 test "formatType max_depth" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const Vec2 = struct {
         const SelfType = @This();
         x: f32,
@@ -2759,14 +2745,10 @@ test "positional" {
 }
 
 test "positional with specifier" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try expectFmt("10.0", "{0d:.1}", .{@as(f64, 9.999)});
 }
 
 test "positional/alignment/width/precision" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     try expectFmt("10.0", "{0d: >3.1}", .{@as(f64, 9.999)});
 }
 
lib/std/json.zig
@@ -24,8 +24,6 @@ test Scanner {
 }
 
 test parseFromSlice {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var parsed_str = try parseFromSlice([]const u8, testing.allocator, "\"a\\u0020b\"", .{});
     defer parsed_str.deinit();
     try testing.expectEqualSlices(u8, "a b", parsed_str.value);
@@ -44,8 +42,6 @@ test Value {
 }
 
 test writeStream {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     var out = ArrayList(u8).init(testing.allocator);
     defer out.deinit();
     var write_stream = writeStream(out.writer(), .{ .whitespace = .indent_2 });
lib/std/mem.zig
@@ -1756,10 +1756,8 @@ test "comptime read/write int" {
 }
 
 test "readIntBig and readIntLittle" {
-    switch (builtin.zig_backend) {
-        .stage2_c, .stage2_x86_64 => return error.SkipZigTest,
-        else => {},
-    }
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
 
     try testing.expect(readIntSliceBig(u0, &[_]u8{}) == 0x0);
     try testing.expect(readIntSliceLittle(u0, &[_]u8{}) == 0x0);
lib/std/target.zig
@@ -733,7 +733,14 @@ pub const Target = struct {
 
                 /// Adds the specified feature set but not its dependencies.
                 pub fn addFeatureSet(set: *Set, other_set: Set) void {
-                    set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints);
+                    switch (builtin.zig_backend) {
+                        .stage2_x86_64 => {
+                            for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* |= other_set_int;
+                        },
+                        else => {
+                            set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints);
+                        },
+                    }
                 }
 
                 /// Removes the specified feature but not its dependents.
@@ -745,7 +752,14 @@ pub const Target = struct {
 
                 /// Removes the specified feature but not its dependents.
                 pub fn removeFeatureSet(set: *Set, other_set: Set) void {
-                    set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints);
+                    switch (builtin.zig_backend) {
+                        .stage2_x86_64 => {
+                            for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* &= ~other_set_int;
+                        },
+                        else => {
+                            set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints);
+                        },
+                    }
                 }
 
                 pub fn populateDependencies(set: *Set, all_features_list: []const Cpu.Feature) void {
@@ -774,10 +788,20 @@ pub const Target = struct {
                 }
 
                 pub fn isSuperSetOf(set: Set, other_set: Set) bool {
-                    const V = @Vector(usize_count, usize);
-                    const set_v: V = set.ints;
-                    const other_v: V = other_set.ints;
-                    return @reduce(.And, (set_v & other_v) == other_v);
+                    switch (builtin.zig_backend) {
+                        .stage2_x86_64 => {
+                            var result = true;
+                            for (&set.ints, other_set.ints) |*set_int, other_set_int|
+                                result = result and (set_int.* & other_set_int) == other_set_int;
+                            return result;
+                        },
+                        else => {
+                            const V = @Vector(usize_count, usize);
+                            const set_v: V = set.ints;
+                            const other_v: V = other_set.ints;
+                            return @reduce(.And, (set_v & other_v) == other_v);
+                        },
+                    }
                 }
             };
 
lib/std/Uri.zig
@@ -735,8 +735,6 @@ test "Special test" {
 }
 
 test "URI escaping" {
-    if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
     const input = "\\รถ/ รครถรŸ ~~.adas-https://canvas:123/#ads&&sad";
     const expected = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
 
src/arch/x86_64/bits.zig
@@ -470,6 +470,7 @@ pub const Memory = union(enum) {
     };
 
     pub const PtrSize = enum {
+        none,
         byte,
         word,
         dword,
@@ -508,6 +509,7 @@ pub const Memory = union(enum) {
 
         pub fn bitSize(s: PtrSize) u64 {
             return switch (s) {
+                .none => 0,
                 .byte => 8,
                 .word => 16,
                 .dword => 32,
@@ -518,6 +520,17 @@ pub const Memory = union(enum) {
                 .zword => 512,
             };
         }
+
+        pub fn format(
+            s: PtrSize,
+            comptime _: []const u8,
+            _: std.fmt.FormatOptions,
+            writer: anytype,
+        ) @TypeOf(writer).Error!void {
+            if (s == .none) return;
+            try writer.writeAll(@tagName(s));
+            try writer.writeAll(" ptr");
+        }
     };
 
     pub const Sib = struct {
src/arch/x86_64/CodeGen.zig
@@ -1145,7 +1145,7 @@ fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
 }
 
 /// A `cc` of `.z_and_np` clobbers `reg2`!
-fn asmCmovccRegisterRegister(self: *Self, cc: bits.Condition, reg1: Register, reg2: Register) !void {
+fn asmCmovccRegisterRegister(self: *Self, cc: Condition, reg1: Register, reg2: Register) !void {
     _ = try self.addInst(.{
         .tag = switch (cc) {
             else => .cmov,
@@ -1168,7 +1168,7 @@ fn asmCmovccRegisterRegister(self: *Self, cc: bits.Condition, reg1: Register, re
 }
 
 /// A `cc` of `.z_and_np` is not supported by this encoding!
-fn asmCmovccRegisterMemory(self: *Self, cc: bits.Condition, reg: Register, m: Memory) !void {
+fn asmCmovccRegisterMemory(self: *Self, cc: Condition, reg: Register, m: Memory) !void {
     _ = try self.addInst(.{
         .tag = switch (cc) {
             else => .cmov,
@@ -1204,7 +1204,7 @@ fn asmCmovccRegisterMemory(self: *Self, cc: bits.Condition, reg: Register, m: Me
     });
 }
 
-fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void {
+fn asmSetccRegister(self: *Self, cc: Condition, reg: Register) !void {
     _ = try self.addInst(.{
         .tag = switch (cc) {
             else => .set,
@@ -1228,7 +1228,7 @@ fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void {
     });
 }
 
-fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void {
+fn asmSetccMemory(self: *Self, cc: Condition, m: Memory) !void {
     const payload = switch (m) {
         .sib => try self.addExtra(Mir.MemorySib.encode(m)),
         .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
@@ -1279,7 +1279,7 @@ fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index {
     });
 }
 
-fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Inst.Index {
+fn asmJccReloc(self: *Self, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Index {
     return self.addInst(.{
         .tag = switch (cc) {
             else => .j,
@@ -1759,8 +1759,8 @@ fn gen(self: *Self) InnerError!void {
     const cc = abi.resolveCallingConvention(fn_info.cc, self.target.*);
     if (cc != .Naked) {
         try self.asmRegister(.{ ._, .push }, .rbp);
-        const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
         try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
+        const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
         const backpatch_frame_align = try self.asmPlaceholder();
         const backpatch_frame_align_extra = try self.asmPlaceholder();
         const backpatch_stack_alloc = try self.asmPlaceholder();
@@ -1805,7 +1805,7 @@ fn gen(self: *Self) InnerError!void {
                     );
 
                 try self.asmRegisterImmediate(.{ ._, .cmp }, .al, Immediate.u(info.fp_count));
-                const skip_sse_reloc = try self.asmJccReloc(undefined, .na);
+                const skip_sse_reloc = try self.asmJccReloc(.na, undefined);
 
                 const vec_2_f64 = try mod.vectorType(.{ .len = 2, .child = .f64_type });
                 for (abi.SysV.c_abi_sse_param_regs[info.fp_count..], info.fp_count..) |reg, reg_i|
@@ -1913,11 +1913,14 @@ fn gen(self: *Self) InnerError!void {
         }
         if (need_frame_align or need_stack_adjust) {
             self.mir_instructions.set(backpatch_stack_dealloc, .{
-                .tag = .mov,
-                .ops = .rr,
-                .data = .{ .rr = .{
+                .tag = .lea,
+                .ops = .rm_sib,
+                .data = .{ .rx = .{
                     .r1 = .rsp,
-                    .r2 = .rbp,
+                    .payload = try self.addExtra(Mir.MemorySib.encode(Memory.sib(.qword, .{
+                        .base = .{ .reg = .rbp },
+                        .disp = -frame_layout.save_reg_list.size(),
+                    }))),
                 } },
             });
         }
@@ -2262,7 +2265,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void {
                 const tag_val = try mod.enumValueFieldIndex(enum_ty, index);
                 const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val });
                 try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv);
-                const skip_reloc = try self.asmJccReloc(undefined, .ne);
+                const skip_reloc = try self.asmJccReloc(.ne, undefined);
 
                 try self.genSetMem(
                     .{ .reg = ret_reg },
@@ -2298,8 +2301,8 @@ fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void {
 fn freeValue(self: *Self, value: MCValue) !void {
     switch (value) {
         .register => |reg| {
-            if (reg.class() == .x87) try self.asmRegister(.{ .f_, .free }, reg);
             self.register_manager.freeReg(reg);
+            if (reg.class() == .x87) try self.asmRegister(.{ .f_, .free }, reg);
         },
         .register_pair => |regs| for (regs) |reg| self.register_manager.freeReg(reg),
         .register_offset => |reg_off| self.register_manager.freeReg(reg_off.reg),
@@ -2416,17 +2419,19 @@ fn computeFrameLayout(self: *Self, cc: std.builtin.CallingConvention) !FrameLayo
     const callee_preserved_regs =
         abi.getCalleePreservedRegs(abi.resolveCallingConvention(cc, self.target.*));
     for (callee_preserved_regs) |reg| {
-        if (self.register_manager.isRegAllocated(reg)) {
+        if (self.register_manager.isRegAllocated(reg) or true) {
             save_reg_list.push(callee_preserved_regs, reg);
         }
     }
 
-    var rbp_offset: i32 = @intCast(save_reg_list.count() * 8);
+    var rbp_offset: i32 = 0;
     self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false);
     self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false);
     self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false);
-    const stack_frame_align_offset =
-        if (need_align_stack) 0 else frame_offset[@intFromEnum(FrameIndex.args_frame)];
+    const stack_frame_align_offset = if (need_align_stack)
+        0
+    else
+        save_reg_list.size() + frame_offset[@intFromEnum(FrameIndex.args_frame)];
 
     var rsp_offset: i32 = 0;
     self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true);
@@ -3242,7 +3247,7 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void {
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ty = self.typeOf(bin_op.lhs);
     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
-        "TODO implement addMulSat for {}",
+        "TODO implement airAddSat for {}",
         .{ty.fmt(mod)},
     );
 
@@ -3325,7 +3330,7 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void {
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ty = self.typeOf(bin_op.lhs);
     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
-        "TODO implement addMulSat for {}",
+        "TODO implement airSubSat for {}",
         .{ty.fmt(mod)},
     );
 
@@ -3401,7 +3406,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ty = self.typeOf(bin_op.lhs);
     if (ty.zigTypeTag(mod) == .Vector or ty.abiSize(mod) > 8) return self.fail(
-        "TODO implement addMulSat for {}",
+        "TODO implement airMulSat for {}",
         .{ty.fmt(mod)},
     );
 
@@ -3617,7 +3622,7 @@ fn genSetFrameTruncatedOverflowCompare(
     defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
 
     const overflow_reg = temp_regs[0];
-    if (overflow_cc) |cc| try self.asmSetccRegister(overflow_reg.to8(), cc);
+    if (overflow_cc) |cc| try self.asmSetccRegister(cc, overflow_reg.to8());
 
     const scratch_reg = temp_regs[1];
     const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8;
@@ -3631,7 +3636,7 @@ fn genSetFrameTruncatedOverflowCompare(
 
     const eq_reg = temp_regs[2];
     if (overflow_cc) |_| {
-        try self.asmSetccRegister(eq_reg.to8(), .ne);
+        try self.asmSetccRegister(.ne, eq_reg.to8());
         try self.genBinOpMir(
             .{ ._, .@"or" },
             Type.u8,
@@ -3660,28 +3665,150 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
     const mod = self.bin_file.options.module.?;
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
+    const tuple_ty = self.typeOfIndex(inst);
     const dst_ty = self.typeOf(bin_op.lhs);
     const result: MCValue = switch (dst_ty.zigTypeTag(mod)) {
         .Vector => return self.fail("TODO implement airMulWithOverflow for {}", .{dst_ty.fmt(mod)}),
         .Int => result: {
+            const dst_info = dst_ty.intInfo(mod);
+            const lhs_active_bits = self.activeIntBits(bin_op.lhs);
+            const rhs_active_bits = self.activeIntBits(bin_op.rhs);
+            const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2);
+            const src_ty = try mod.intType(dst_info.signedness, src_bits);
+
+            if (src_bits > 64 and src_bits <= 128 and
+                dst_info.bits > 64 and dst_info.bits <= 128) switch (dst_info.signedness) {
+                .signed => {
+                    const ptr_c_int = try mod.singleMutPtrType(Type.c_int);
+                    const overflow = try self.allocTempRegOrMem(Type.c_int, false);
+                    const result = try self.genCall(.{ .lib = .{
+                        .return_type = .i128_type,
+                        .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() },
+                        .callee = "__muloti4",
+                    } }, &.{ Type.i128, Type.i128, ptr_c_int }, &.{
+                        .{ .air_ref = bin_op.lhs },
+                        .{ .air_ref = bin_op.rhs },
+                        overflow.address(),
+                    });
+
+                    const dst_mcv = try self.allocRegOrMem(inst, false);
+                    try self.genSetMem(
+                        .{ .frame = dst_mcv.load_frame.index },
+                        @intCast(tuple_ty.structFieldOffset(0, mod)),
+                        tuple_ty.structFieldType(0, mod),
+                        result,
+                    );
+                    try self.asmMemoryImmediate(
+                        .{ ._, .cmp },
+                        overflow.mem(self.memPtrSize(Type.c_int)),
+                        Immediate.s(0),
+                    );
+                    try self.genSetMem(
+                        .{ .frame = dst_mcv.load_frame.index },
+                        @intCast(tuple_ty.structFieldOffset(1, mod)),
+                        tuple_ty.structFieldType(1, mod),
+                        .{ .eflags = .ne },
+                    );
+                    try self.freeValue(overflow);
+                    break :result dst_mcv;
+                },
+                .unsigned => {
+                    try self.spillEflagsIfOccupied();
+                    try self.spillRegisters(&.{ .rax, .rdx });
+                    const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx });
+                    defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
+
+                    const tmp_regs =
+                        try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp);
+                    const tmp_locks = self.register_manager.lockRegsAssumeUnused(4, tmp_regs);
+                    defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock);
+
+                    const lhs_mcv = try self.resolveInst(bin_op.lhs);
+                    const rhs_mcv = try self.resolveInst(bin_op.rhs);
+
+                    if (lhs_mcv.isMemory())
+                        try self.asmRegisterMemory(.{ ._, .mov }, .rax, lhs_mcv.mem(.qword))
+                    else
+                        try self.asmRegisterRegister(.{ ._, .mov }, .rax, lhs_mcv.register_pair[0]);
+                    if (rhs_mcv.isMemory()) try self.asmRegisterMemory(
+                        .{ ._, .mov },
+                        tmp_regs[0],
+                        rhs_mcv.address().offset(8).deref().mem(.qword),
+                    ) else try self.asmRegisterRegister(
+                        .{ ._, .mov },
+                        tmp_regs[0],
+                        rhs_mcv.register_pair[1],
+                    );
+                    try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
+                    try self.asmSetccRegister(.nz, tmp_regs[1].to8());
+                    try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax);
+                    try self.asmSetccRegister(.o, tmp_regs[2].to8());
+                    if (rhs_mcv.isMemory())
+                        try self.asmMemory(.{ ._, .mul }, rhs_mcv.mem(.qword))
+                    else
+                        try self.asmRegister(.{ ._, .mul }, rhs_mcv.register_pair[0]);
+                    try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
+                    try self.asmSetccRegister(.c, tmp_regs[3].to8());
+                    try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8());
+                    if (lhs_mcv.isMemory()) try self.asmRegisterMemory(
+                        .{ ._, .mov },
+                        tmp_regs[0],
+                        lhs_mcv.address().offset(8).deref().mem(.qword),
+                    ) else try self.asmRegisterRegister(
+                        .{ ._, .mov },
+                        tmp_regs[0],
+                        lhs_mcv.register_pair[1],
+                    );
+                    try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]);
+                    try self.asmSetccRegister(.nz, tmp_regs[3].to8());
+                    try self.asmRegisterRegister(
+                        .{ ._, .@"and" },
+                        tmp_regs[1].to8(),
+                        tmp_regs[3].to8(),
+                    );
+                    try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
+                    if (rhs_mcv.isMemory())
+                        try self.asmRegisterMemory(.{ .i_, .mul }, tmp_regs[0], rhs_mcv.mem(.qword))
+                    else
+                        try self.asmRegisterRegister(
+                            .{ .i_, .mul },
+                            tmp_regs[0],
+                            rhs_mcv.register_pair[0],
+                        );
+                    try self.asmSetccRegister(.o, tmp_regs[2].to8());
+                    try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
+                    try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
+                    try self.asmSetccRegister(.c, tmp_regs[2].to8());
+                    try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
+
+                    const dst_mcv = try self.allocRegOrMem(inst, false);
+                    try self.genSetMem(
+                        .{ .frame = dst_mcv.load_frame.index },
+                        @intCast(tuple_ty.structFieldOffset(0, mod)),
+                        tuple_ty.structFieldType(0, mod),
+                        .{ .register_pair = .{ .rax, .rdx } },
+                    );
+                    try self.genSetMem(
+                        .{ .frame = dst_mcv.load_frame.index },
+                        @intCast(tuple_ty.structFieldOffset(1, mod)),
+                        tuple_ty.structFieldType(1, mod),
+                        .{ .register = tmp_regs[1] },
+                    );
+                    break :result dst_mcv;
+                },
+            };
+
             try self.spillEflagsIfOccupied();
             try self.spillRegisters(&.{ .rax, .rdx });
 
-            const dst_info = dst_ty.intInfo(mod);
             const cc: Condition = switch (dst_info.signedness) {
                 .unsigned => .c,
                 .signed => .o,
             };
 
-            const lhs_active_bits = self.activeIntBits(bin_op.lhs);
-            const rhs_active_bits = self.activeIntBits(bin_op.rhs);
-            const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2);
-            const src_ty = try mod.intType(dst_info.signedness, src_bits);
-
             const lhs = try self.resolveInst(bin_op.lhs);
             const rhs = try self.resolveInst(bin_op.rhs);
 
-            const tuple_ty = self.typeOfIndex(inst);
             const extra_bits = if (dst_info.bits <= 64)
                 self.regExtraBits(dst_ty)
             else
@@ -3741,6 +3868,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
 fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void {
     const mod = self.bin_file.options.module.?;
     const abi_size: u32 = @intCast(ty.abiSize(mod));
+    const bit_size: u32 = @intCast(self.regBitSize(ty));
     if (abi_size > 8) {
         return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{});
     }
@@ -3752,14 +3880,14 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue
         .div => switch (tag[0]) {
             ._ => {
                 const hi_reg: Register =
-                    switch (self.regBitSize(ty)) {
+                    switch (bit_size) {
                     8 => .ah,
                     16, 32, 64 => .edx,
                     else => unreachable,
                 };
                 try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg);
             },
-            .i_ => try self.asmOpOnly(.{ ._, switch (self.regBitSize(ty)) {
+            .i_ => try self.asmOpOnly(.{ ._, switch (bit_size) {
                 8 => .cbw,
                 16 => .cwd,
                 32 => .cdq,
@@ -3782,6 +3910,7 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue
         ),
         else => unreachable,
     }
+    if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah);
 }
 
 /// Always returns a register.
@@ -3981,54 +4110,12 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airUnwrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-    const err_union_ty = self.typeOf(ty_op.operand);
+    const operand_ty = self.typeOf(ty_op.operand);
     const operand = try self.resolveInst(ty_op.operand);
-    const result = try self.genUnwrapErrorUnionPayloadMir(inst, err_union_ty, operand);
+    const result = try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand);
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
-fn genUnwrapErrorUnionPayloadMir(
-    self: *Self,
-    maybe_inst: ?Air.Inst.Index,
-    err_union_ty: Type,
-    err_union: MCValue,
-) !MCValue {
-    const mod = self.bin_file.options.module.?;
-    const payload_ty = err_union_ty.errorUnionPayload(mod);
-
-    const result: MCValue = result: {
-        if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
-
-        const payload_off = errUnionPayloadOffset(payload_ty, mod);
-        switch (err_union) {
-            .load_frame => |frame_addr| break :result .{ .load_frame = .{
-                .index = frame_addr.index,
-                .off = frame_addr.off + @as(i32, @intCast(payload_off)),
-            } },
-            .register => |reg| {
-                // TODO reuse operand
-                const eu_lock = self.register_manager.lockReg(reg);
-                defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
-
-                const result_mcv: MCValue = if (maybe_inst) |inst|
-                    try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union)
-                else
-                    .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
-                if (payload_off > 0) try self.genShiftBinOpMir(
-                    .{ ._r, .sh },
-                    err_union_ty,
-                    result_mcv,
-                    .{ .immediate = @as(u6, @intCast(payload_off * 8)) },
-                ) else try self.truncateRegister(payload_ty, result_mcv.register);
-                break :result result_mcv;
-            },
-            else => return self.fail("TODO implement genUnwrapErrorUnionPayloadMir for {}", .{err_union}),
-        }
-    };
-
-    return result;
-}
-
 // *(E!T) -> E
 fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void {
     const mod = self.bin_file.options.module.?;
@@ -4067,38 +4154,11 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void {
 
 // *(E!T) -> *T
 fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
-    const mod = self.bin_file.options.module.?;
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
-
-    const src_ty = self.typeOf(ty_op.operand);
-    const src_mcv = try self.resolveInst(ty_op.operand);
-    const src_reg = switch (src_mcv) {
-        .register => |reg| reg,
-        else => try self.copyToTmpRegister(src_ty, src_mcv),
-    };
-    const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
-    defer self.register_manager.unlockReg(src_lock);
-
-    const dst_ty = self.typeOfIndex(inst);
-    const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
-        src_reg
-    else
-        try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
-    const dst_mcv = MCValue{ .register = dst_reg };
-    const dst_lock = self.register_manager.lockReg(dst_reg);
-    defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-
-    const eu_ty = src_ty.childType(mod);
-    const pl_ty = eu_ty.errorUnionPayload(mod);
-    const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
-    const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
-    try self.asmRegisterMemory(
-        .{ ._, .lea },
-        registerAlias(dst_reg, dst_abi_size),
-        Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }),
-    );
-
-    return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
+    const operand_ty = self.typeOf(ty_op.operand);
+    const operand = try self.resolveInst(ty_op.operand);
+    const result = try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand);
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
 fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
@@ -4150,6 +4210,71 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn genUnwrapErrUnionPayloadMir(
+    self: *Self,
+    maybe_inst: ?Air.Inst.Index,
+    err_union_ty: Type,
+    err_union: MCValue,
+) !MCValue {
+    const mod = self.bin_file.options.module.?;
+    const payload_ty = err_union_ty.errorUnionPayload(mod);
+
+    const result: MCValue = result: {
+        if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
+
+        const payload_off: u31 = @intCast(errUnionPayloadOffset(payload_ty, mod));
+        switch (err_union) {
+            .load_frame => |frame_addr| break :result .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + payload_off,
+            } },
+            .register => |reg| {
+                // TODO reuse operand
+                const eu_lock = self.register_manager.lockReg(reg);
+                defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
+
+                const result_mcv: MCValue = if (maybe_inst) |inst|
+                    try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union)
+                else
+                    .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
+                if (payload_off > 0) try self.genShiftBinOpMir(
+                    .{ ._r, .sh },
+                    err_union_ty,
+                    result_mcv,
+                    .{ .immediate = @as(u6, @intCast(payload_off * 8)) },
+                ) else try self.truncateRegister(payload_ty, result_mcv.register);
+                break :result result_mcv;
+            },
+            else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {}", .{err_union}),
+        }
+    };
+
+    return result;
+}
+
+fn genUnwrapErrUnionPayloadPtrMir(
+    self: *Self,
+    maybe_inst: ?Air.Inst.Index,
+    ptr_ty: Type,
+    ptr_mcv: MCValue,
+) !MCValue {
+    const mod = self.bin_file.options.module.?;
+    const err_union_ty = ptr_ty.childType(mod);
+    const payload_ty = err_union_ty.errorUnionPayload(mod);
+
+    const result: MCValue = result: {
+        const payload_off = errUnionPayloadOffset(payload_ty, mod);
+        const result_mcv: MCValue = if (maybe_inst) |inst|
+            try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv)
+        else
+            .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) };
+        try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off });
+        break :result result_mcv;
+    };
+
+    return result;
+}
+
 fn airErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void {
     _ = inst;
     return self.fail("TODO implement airErrReturnTrace for {}", .{self.target.cpu.arch});
@@ -6425,7 +6550,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
                         break :result .{ .eflags = ro.eflags };
                     } else {
                         const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
-                        try self.asmSetccRegister(dst_reg.to8(), ro.eflags);
+                        try self.asmSetccRegister(ro.eflags, dst_reg.to8());
                         break :result .{ .register = dst_reg.to8() };
                     },
                     else => unreachable,
@@ -9712,55 +9837,59 @@ fn genCall(self: *Self, info: union(enum) {
     switch (info) {
         .air => |callee| if (try self.air.value(callee, mod)) |func_value| {
             const func_key = mod.intern_pool.indexToKey(func_value.ip_index);
-            if (switch (func_key) {
-                .func => |func| func.owner_decl,
+            switch (switch (func_key) {
+                else => func_key,
                 .ptr => |ptr| switch (ptr.addr) {
-                    .decl => |decl| decl,
-                    else => null,
+                    .decl => |decl| mod.intern_pool.indexToKey(try mod.declPtr(decl).internValue(mod)),
+                    else => func_key,
                 },
-                else => null,
-            }) |owner_decl| {
-                if (self.bin_file.cast(link.File.Elf)) |elf_file| {
-                    const sym_index = try elf_file.getOrCreateMetadataForDecl(owner_decl);
-                    const sym = elf_file.symbol(sym_index);
-                    _ = try sym.getOrCreateZigGotEntry(sym_index, elf_file);
-                    if (self.bin_file.options.pic) {
-                        try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym.esym_index });
+            }) {
+                .func => |func| {
+                    try mod.markDeclAlive(mod.declPtr(func.owner_decl));
+                    if (self.bin_file.cast(link.File.Elf)) |elf_file| {
+                        const sym_index = try elf_file.getOrCreateMetadataForDecl(func.owner_decl);
+                        const sym = elf_file.symbol(sym_index);
+                        _ = try sym.getOrCreateZigGotEntry(sym_index, elf_file);
+                        if (self.bin_file.options.pic) {
+                            try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym.esym_index });
+                            try self.asmRegister(.{ ._, .call }, .rax);
+                        } else {
+                            _ = try self.addInst(.{
+                                .tag = .call,
+                                .ops = .direct_got_reloc,
+                                .data = .{ .reloc = .{
+                                    .atom_index = try self.owner.getSymbolIndex(self),
+                                    .sym_index = sym.esym_index,
+                                } },
+                            });
+                        }
+                    } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
+                        const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl);
+                        const sym_index = coff_file.getAtom(atom).getSymbolIndex().?;
+                        try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
                         try self.asmRegister(.{ ._, .call }, .rax);
-                    } else {
-                        _ = try self.addInst(.{
-                            .tag = .call,
-                            .ops = .direct_got_reloc,
-                            .data = .{ .reloc = .{
-                                .atom_index = try self.owner.getSymbolIndex(self),
-                                .sym_index = sym.esym_index,
-                            } },
-                        });
-                    }
-                } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
-                    const atom = try coff_file.getOrCreateAtomForDecl(owner_decl);
-                    const sym_index = coff_file.getAtom(atom).getSymbolIndex().?;
-                    try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
-                    try self.asmRegister(.{ ._, .call }, .rax);
-                } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                    const atom = try macho_file.getOrCreateAtomForDecl(owner_decl);
-                    const sym_index = macho_file.getAtom(atom).getSymbolIndex().?;
-                    try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
-                    try self.asmRegister(.{ ._, .call }, .rax);
-                } else if (self.bin_file.cast(link.File.Plan9)) |p9| {
-                    const atom_index = try p9.seeDecl(owner_decl);
-                    const atom = p9.getAtom(atom_index);
-                    try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{
-                        .base = .{ .reg = .ds },
-                        .disp = @intCast(atom.getOffsetTableAddress(p9)),
-                    }));
-                } else unreachable;
-            } else if (func_value.getExternFunc(mod)) |extern_func| {
-                const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name);
-                const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name);
-                try self.genExternSymbolRef(.call, lib_name, decl_name);
-            } else {
-                return self.fail("TODO implement calling bitcasted functions", .{});
+                    } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
+                        const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl);
+                        const sym_index = macho_file.getAtom(atom).getSymbolIndex().?;
+                        try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
+                        try self.asmRegister(.{ ._, .call }, .rax);
+                    } else if (self.bin_file.cast(link.File.Plan9)) |p9| {
+                        const atom_index = try p9.seeDecl(func.owner_decl);
+                        const atom = p9.getAtom(atom_index);
+                        try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{
+                            .base = .{ .reg = .ds },
+                            .disp = @intCast(atom.getOffsetTableAddress(p9)),
+                        }));
+                    } else unreachable;
+                },
+                .extern_func => |extern_func| {
+                    const owner_decl = mod.declPtr(extern_func.decl);
+                    try mod.markDeclAlive(owner_decl);
+                    const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name);
+                    const decl_name = mod.intern_pool.stringToSlice(owner_decl.name);
+                    try self.genExternSymbolRef(.call, lib_name, decl_name);
+                },
+                else => return self.fail("TODO implement calling bitcasted functions", .{}),
             }
         } else {
             assert(self.typeOf(callee).zigTypeTag(mod) == .Pointer);
@@ -10241,47 +10370,46 @@ fn airTry(self: *Self, inst: Air.Inst.Index) !void {
     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
     const extra = self.air.extraData(Air.Try, pl_op.payload);
     const body = self.air.extra[extra.end..][0..extra.data.body_len];
-    const err_union_ty = self.typeOf(pl_op.operand);
-    const result = try self.genTry(inst, pl_op.operand, body, err_union_ty, false);
+    const operand_ty = self.typeOf(pl_op.operand);
+    const result = try self.genTry(inst, pl_op.operand, body, operand_ty, false);
     return self.finishAir(inst, result, .{ .none, .none, .none });
 }
 
 fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void {
-    const mod = self.bin_file.options.module.?;
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const extra = self.air.extraData(Air.TryPtr, ty_pl.payload);
     const body = self.air.extra[extra.end..][0..extra.data.body_len];
-    const err_union_ty = self.typeOf(extra.data.ptr).childType(mod);
-    const result = try self.genTry(inst, extra.data.ptr, body, err_union_ty, true);
+    const operand_ty = self.typeOf(extra.data.ptr);
+    const result = try self.genTry(inst, extra.data.ptr, body, operand_ty, true);
     return self.finishAir(inst, result, .{ .none, .none, .none });
 }
 
 fn genTry(
     self: *Self,
     inst: Air.Inst.Index,
-    err_union: Air.Inst.Ref,
+    operand: Air.Inst.Ref,
     body: []const Air.Inst.Index,
-    err_union_ty: Type,
+    operand_ty: Type,
     operand_is_ptr: bool,
 ) !MCValue {
-    if (operand_is_ptr) {
-        return self.fail("TODO genTry for pointers", .{});
-    }
     const liveness_cond_br = self.liveness.getCondBr(inst);
 
-    const err_union_mcv = try self.resolveInst(err_union);
-    const is_err_mcv = try self.isErr(null, err_union_ty, err_union_mcv);
+    const operand_mcv = try self.resolveInst(operand);
+    const is_err_mcv = if (operand_is_ptr)
+        try self.isErrPtr(null, operand_ty, operand_mcv)
+    else
+        try self.isErr(null, operand_ty, operand_mcv);
 
     const reloc = try self.genCondBrMir(Type.anyerror, is_err_mcv);
 
     if (self.liveness.operandDies(inst, 0)) {
-        if (Air.refToIndex(err_union)) |err_union_inst| try self.processDeath(err_union_inst);
+        if (Air.refToIndex(operand)) |operand_inst| try self.processDeath(operand_inst);
     }
 
     self.scope_generation += 1;
     const state = try self.saveState();
 
-    for (liveness_cond_br.else_deaths) |operand| try self.processDeath(operand);
+    for (liveness_cond_br.else_deaths) |death| try self.processDeath(death);
     try self.genBody(body);
     try self.restoreState(state, &.{}, .{
         .emit_instructions = false,
@@ -10292,12 +10420,14 @@ fn genTry(
 
     try self.performReloc(reloc);
 
-    for (liveness_cond_br.then_deaths) |operand| try self.processDeath(operand);
+    for (liveness_cond_br.then_deaths) |death| try self.processDeath(death);
 
     const result = if (self.liveness.isUnused(inst))
         .unreach
+    else if (operand_is_ptr)
+        try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand_mcv)
     else
-        try self.genUnwrapErrorUnionPayloadMir(inst, err_union_ty, err_union_mcv);
+        try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand_mcv);
     return result;
 }
 
@@ -10348,12 +10478,12 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index {
     switch (mcv) {
         .eflags => |cc| {
             // Here we map the opposites since the jump is to the false branch.
-            return self.asmJccReloc(undefined, cc.negate());
+            return self.asmJccReloc(cc.negate(), undefined);
         },
         .register => |reg| {
             try self.spillEflagsIfOccupied();
             try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1));
-            return self.asmJccReloc(undefined, .e);
+            return self.asmJccReloc(.e, undefined);
         },
         .immediate,
         .load_frame,
@@ -10391,7 +10521,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     self.scope_generation += 1;
     const state = try self.saveState();
 
-    for (liveness_cond_br.then_deaths) |operand| try self.processDeath(operand);
+    for (liveness_cond_br.then_deaths) |death| try self.processDeath(death);
     try self.genBody(then_body);
     try self.restoreState(state, &.{}, .{
         .emit_instructions = false,
@@ -10402,7 +10532,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     try self.performReloc(reloc);
 
-    for (liveness_cond_br.else_deaths) |operand| try self.processDeath(operand);
+    for (liveness_cond_br.else_deaths) |death| try self.processDeath(death);
     try self.genBody(else_body);
     try self.restoreState(state, &.{}, .{
         .emit_instructions = false,
@@ -10517,11 +10647,11 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC
 
 fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
     const mod = self.bin_file.options.module.?;
-    try self.spillEflagsIfOccupied();
-
     const opt_ty = ptr_ty.childType(mod);
     const pl_ty = opt_ty.optionalChild(mod);
 
+    try self.spillEflagsIfOccupied();
+
     const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(mod))
         .{ .off = 0, .ty = if (pl_ty.isSlice(mod)) pl_ty.slicePtrFieldType(mod) else pl_ty }
     else
@@ -10548,27 +10678,24 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue)
     return .{ .eflags = .e };
 }
 
-fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !MCValue {
+fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue {
     const mod = self.bin_file.options.module.?;
-    const err_type = ty.errorUnionSet(mod);
-
-    if (err_type.errorSetIsEmpty(mod)) {
-        return MCValue{ .immediate = 0 }; // always false
-    }
+    const err_ty = eu_ty.errorUnionSet(mod);
+    if (err_ty.errorSetIsEmpty(mod)) return MCValue{ .immediate = 0 }; // always false
 
     try self.spillEflagsIfOccupied();
 
-    const err_off = errUnionErrorOffset(ty.errorUnionPayload(mod), mod);
-    switch (operand) {
+    const err_off: u31 = @intCast(errUnionErrorOffset(eu_ty.errorUnionPayload(mod), mod));
+    switch (eu_mcv) {
         .register => |reg| {
             const eu_lock = self.register_manager.lockReg(reg);
             defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
 
-            const tmp_reg = try self.copyToTmpRegister(ty, operand);
+            const tmp_reg = try self.copyToTmpRegister(eu_ty, eu_mcv);
             if (err_off > 0) {
                 try self.genShiftBinOpMir(
                     .{ ._r, .sh },
-                    ty,
+                    eu_ty,
                     .{ .register = tmp_reg },
                     .{ .immediate = @as(u6, @intCast(err_off * 8)) },
                 );
@@ -10587,19 +10714,63 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !
             Type.anyerror,
             .{ .load_frame = .{
                 .index = frame_addr.index,
-                .off = frame_addr.off + @as(i32, @intCast(err_off)),
+                .off = frame_addr.off + err_off,
             } },
             .{ .immediate = 0 },
         ),
-        else => return self.fail("TODO implement isErr for {}", .{operand}),
+        else => return self.fail("TODO implement isErr for {}", .{eu_mcv}),
     }
 
     if (maybe_inst) |inst| self.eflags_inst = inst;
     return MCValue{ .eflags = .a };
 }
 
-fn isNonErr(self: *Self, inst: Air.Inst.Index, ty: Type, operand: MCValue) !MCValue {
-    const is_err_res = try self.isErr(inst, ty, operand);
+fn isErrPtr(self: *Self, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
+    const mod = self.bin_file.options.module.?;
+    const eu_ty = ptr_ty.childType(mod);
+    const err_ty = eu_ty.errorUnionSet(mod);
+    if (err_ty.errorSetIsEmpty(mod)) return MCValue{ .immediate = 0 }; // always false
+
+    try self.spillEflagsIfOccupied();
+
+    const ptr_reg = switch (ptr_mcv) {
+        .register => |reg| reg,
+        else => try self.copyToTmpRegister(ptr_ty, ptr_mcv),
+    };
+    const ptr_lock = self.register_manager.lockReg(ptr_reg);
+    defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const err_off: u31 = @intCast(errUnionErrorOffset(eu_ty.errorUnionPayload(mod), mod));
+    try self.asmMemoryImmediate(
+        .{ ._, .cmp },
+        Memory.sib(self.memPtrSize(Type.anyerror), .{
+            .base = .{ .reg = ptr_reg },
+            .disp = err_off,
+        }),
+        Immediate.u(0),
+    );
+
+    if (maybe_inst) |inst| self.eflags_inst = inst;
+    return MCValue{ .eflags = .a };
+}
+
+fn isNonErr(self: *Self, inst: Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue {
+    const is_err_res = try self.isErr(inst, eu_ty, eu_mcv);
+    switch (is_err_res) {
+        .eflags => |cc| {
+            assert(cc == .a);
+            return MCValue{ .eflags = cc.negate() };
+        },
+        .immediate => |imm| {
+            assert(imm == 0);
+            return MCValue{ .immediate = @intFromBool(imm == 0) };
+        },
+        else => unreachable,
+    }
+}
+
+fn isNonErrPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue {
+    const is_err_res = try self.isErrPtr(inst, ptr_ty, ptr_mcv);
     switch (is_err_res) {
         .eflags => |cc| {
             assert(cc == .a);
@@ -10607,7 +10778,7 @@ fn isNonErr(self: *Self, inst: Air.Inst.Index, ty: Type, operand: MCValue) !MCVa
         },
         .immediate => |imm| {
             assert(imm == 0);
-            return MCValue{ .immediate = 1 };
+            return MCValue{ .immediate = @intFromBool(imm == 0) };
         },
         else => unreachable,
     }
@@ -10660,29 +10831,10 @@ fn airIsErr(self: *Self, inst: Air.Inst.Index) !void {
 }
 
 fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void {
-    const mod = self.bin_file.options.module.?;
     const un_op = self.air.instructions.items(.data)[inst].un_op;
-
-    const operand_ptr = try self.resolveInst(un_op);
-    const operand_ptr_lock: ?RegisterLock = switch (operand_ptr) {
-        .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
-        else => null,
-    };
-    defer if (operand_ptr_lock) |lock| self.register_manager.unlockReg(lock);
-
-    const operand: MCValue = blk: {
-        if (self.reuseOperand(inst, un_op, 0, operand_ptr)) {
-            // The MCValue that holds the pointer can be re-used as the value.
-            break :blk operand_ptr;
-        } else {
-            break :blk try self.allocRegOrMem(inst, true);
-        }
-    };
-    const ptr_ty = self.typeOf(un_op);
-    try self.load(operand, ptr_ty, operand_ptr);
-
-    const result = try self.isErr(inst, ptr_ty.childType(mod), operand);
-
+    const operand = try self.resolveInst(un_op);
+    const ty = self.typeOf(un_op);
+    const result = try self.isErrPtr(inst, ty, operand);
     return self.finishAir(inst, result, .{ un_op, .none, .none });
 }
 
@@ -10695,29 +10847,10 @@ fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void {
 }
 
 fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void {
-    const mod = self.bin_file.options.module.?;
     const un_op = self.air.instructions.items(.data)[inst].un_op;
-
-    const operand_ptr = try self.resolveInst(un_op);
-    const operand_ptr_lock: ?RegisterLock = switch (operand_ptr) {
-        .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
-        else => null,
-    };
-    defer if (operand_ptr_lock) |lock| self.register_manager.unlockReg(lock);
-
-    const operand: MCValue = blk: {
-        if (self.reuseOperand(inst, un_op, 0, operand_ptr)) {
-            // The MCValue that holds the pointer can be re-used as the value.
-            break :blk operand_ptr;
-        } else {
-            break :blk try self.allocRegOrMem(inst, true);
-        }
-    };
-    const ptr_ty = self.typeOf(un_op);
-    try self.load(operand, ptr_ty, operand_ptr);
-
-    const result = try self.isNonErr(inst, ptr_ty.childType(mod), operand);
-
+    const operand = try self.resolveInst(un_op);
+    const ty = self.typeOf(un_op);
+    const result = try self.isNonErrPtr(inst, ty, operand);
     return self.finishAir(inst, result, .{ un_op, .none, .none });
 }
 
@@ -10810,7 +10943,7 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void {
         for (items, relocs, 0..) |item, *reloc, i| {
             const item_mcv = try self.resolveInst(item);
             try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv);
-            reloc.* = try self.asmJccReloc(undefined, if (i < relocs.len - 1) .e else .ne);
+            reloc.* = try self.asmJccReloc(if (i < relocs.len - 1) .e else .ne, undefined);
         }
 
         for (liveness.deaths[case_i]) |operand| try self.processDeath(operand);
@@ -11210,6 +11343,15 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
             mnem_size = null;
             break :mnem std.meta.stringToEnum(Instruction.Mnemonic, mnem_str);
         } orelse return self.fail("invalid mnemonic: '{s}'", .{mnem_str});
+        if (@as(?Memory.PtrSize, switch (mnem_tag) {
+            .fldenv, .fnstenv, .fstenv => .none,
+            .ldmxcsr, .stmxcsr, .vldmxcsr, .vstmxcsr => .dword,
+            else => null,
+        })) |fixed_mnem_size| {
+            if (mnem_size) |size| if (size != fixed_mnem_size)
+                return self.fail("invalid size: '{s}'", .{mnem_str});
+            mnem_size = fixed_mnem_size;
+        }
         const mnem_name = @tagName(mnem_tag);
         const mnem_fixed_tag: Mir.Inst.FixedTag = for (std.enums.values(Mir.Inst.Fixes)) |fixes| {
             const fixes_name = @tagName(fixes);
@@ -11359,8 +11501,31 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
                 if (sib_it.next()) |_| return self.fail("invalid memory operand: '{s}'", .{op_str});
                 op.* = .{ .mem = Memory.sib(mnem_size orelse
                     return self.fail("unknown size: '{s}'", .{op_str}), .{
-                    .disp = if (open > 0) std.fmt.parseInt(i32, op_str[0..open], 0) catch
-                        return self.fail("invalid displacement: '{s}'", .{op_str}) else 0,
+                    .disp = if (mem.startsWith(u8, op_str[0..open], "%[") and
+                        mem.endsWith(u8, op_str[0..open], "]"))
+                    disp: {
+                        const colon = mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':');
+                        const modifier = if (colon) |colon_pos|
+                            op_str[colon_pos + ":".len .. open - "]".len]
+                        else
+                            "";
+                        break :disp switch (args.items[
+                            arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse
+                                return self.fail("no matching constraint: '{s}'", .{op_str})
+                        ]) {
+                            .immediate => |imm| if (mem.eql(u8, modifier, "") or
+                                mem.eql(u8, modifier, "c"))
+                                math.cast(i32, @as(i64, @bitCast(imm))) orelse
+                                    return self.fail("invalid displacement: '{s}'", .{op_str})
+                            else
+                                return self.fail("invalid modifier: '{s}'", .{modifier}),
+                            else => return self.fail("invalid constraint: '{s}'", .{op_str}),
+                        };
+                    } else if (open > 0)
+                        std.fmt.parseInt(i32, op_str[0..open], 0) catch
+                            return self.fail("invalid displacement: '{s}'", .{op_str})
+                    else
+                        0,
                     .base = if (base_str.len > 0) .{ .reg = parseRegName(base_str["%%".len..]) orelse
                         return self.fail("invalid base register: '{s}'", .{base_str}) } else .none,
                     .scale_index = if (index_str.len > 0) .{
@@ -11840,7 +12005,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
         .reserved_frame,
         => unreachable,
         .undef => {},
-        .eflags => |cc| try self.asmSetccRegister(dst_reg.to8(), cc),
+        .eflags => |cc| try self.asmSetccRegister(cc, dst_reg.to8()),
         .immediate => |imm| {
             if (imm == 0) {
                 // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
@@ -12110,7 +12275,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
                 );
             },
         },
-        .eflags => |cc| try self.asmSetccMemory(Memory.sib(.byte, .{ .base = base, .disp = disp }), cc),
+        .eflags => |cc| try self.asmSetccMemory(cc, Memory.sib(.byte, .{ .base = base, .disp = disp })),
         .register => |src_reg| try (try self.moveStrategy(ty, switch (base) {
             .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
             .reg => |reg| switch (reg) {
@@ -12870,7 +13035,7 @@ fn atomicOp(
                 ptr_mem,
                 registerAlias(tmp_reg, val_abi_size),
             );
-            _ = try self.asmJccReloc(loop, .ne);
+            _ = try self.asmJccReloc(.ne, loop);
             return if (unused) .unreach else .{ .register = .rax };
         } else {
             try self.asmRegisterMemory(.{ ._, .mov }, .rax, Memory.sib(.qword, .{
@@ -12932,7 +13097,7 @@ fn atomicOp(
                 }),
             };
             try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem);
-            _ = try self.asmJccReloc(loop, .ne);
+            _ = try self.asmJccReloc(.ne, loop);
 
             if (unused) return .unreach;
             const dst_mcv = try self.allocTempRegOrMem(val_ty, false);
@@ -13084,7 +13249,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
 
             try self.genSetReg(len_reg, Type.usize, len);
 
-            const skip_reloc = try self.asmJccReloc(undefined, .z);
+            const skip_reloc = try self.asmJccReloc(.z, undefined);
             try self.store(slice_ptr_ty, ptr, src_val);
 
             const second_elem_ptr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
@@ -14041,7 +14206,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void {
                     try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, Immediate.u(
                         abi.SysV.c_abi_int_param_regs.len * 8,
                     ));
-                    const mem_reloc = try self.asmJccReloc(undefined, .ae);
+                    const mem_reloc = try self.asmJccReloc(.ae, undefined);
 
                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area);
                     if (!unused)
@@ -14080,7 +14245,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void {
                     try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, Immediate.u(
                         abi.SysV.c_abi_int_param_regs.len * 8 + abi.SysV.c_abi_sse_param_regs.len * 16,
                     ));
-                    const mem_reloc = try self.asmJccReloc(undefined, .ae);
+                    const mem_reloc = try self.asmJccReloc(.ae, undefined);
 
                     try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area);
                     if (!unused)
src/arch/x86_64/Disassembler.zig
@@ -38,7 +38,7 @@ pub fn next(dis: *Disassembler) Error!?Instruction {
 
     const enc = try dis.parseEncoding(prefixes) orelse return error.UnknownOpcode;
     switch (enc.data.op_en) {
-        .np => return inst(enc, .{}),
+        .zo => return inst(enc, .{}),
         .d, .i => {
             const imm = try dis.parseImm(enc.data.ops[0]);
             return inst(enc, .{
src/arch/x86_64/encoder.zig
@@ -102,7 +102,7 @@ pub const Instruction = struct {
                 .reg => |reg| try writer.writeAll(@tagName(reg)),
                 .mem => |mem| switch (mem) {
                     .rip => |rip| {
-                        try writer.print("{s} ptr [rip", .{@tagName(rip.ptr_size)});
+                        try writer.print("{} [rip", .{rip.ptr_size});
                         if (rip.disp != 0) try writer.print(" {c} 0x{x}", .{
                             @as(u8, if (rip.disp < 0) '-' else '+'),
                             @abs(rip.disp),
@@ -110,7 +110,7 @@ pub const Instruction = struct {
                         try writer.writeByte(']');
                     },
                     .sib => |sib| {
-                        try writer.print("{s} ptr ", .{@tagName(sib.ptr_size)});
+                        try writer.print("{} ", .{sib.ptr_size});
 
                         if (mem.isSegmentRegister()) {
                             return writer.print("{s}:0x{x}", .{ @tagName(sib.base.reg), sib.disp });
@@ -222,7 +222,7 @@ pub const Instruction = struct {
         }
 
         switch (data.op_en) {
-            .np, .o => {},
+            .zo, .o => {},
             .i, .d => try encodeImm(inst.ops[0].imm, data.ops[0], encoder),
             .zi, .oi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
             .fd => try encoder.imm64(inst.ops[1].mem.moffs.offset),
@@ -300,7 +300,7 @@ pub const Instruction = struct {
         }
 
         const segment_override: ?Register = switch (op_en) {
-            .i, .zi, .o, .oi, .d, .np => null,
+            .zo, .i, .zi, .o, .oi, .d => null,
             .fd => inst.ops[1].mem.base().reg,
             .td => inst.ops[0].mem.base().reg,
             .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister())
@@ -336,7 +336,7 @@ pub const Instruction = struct {
         rex.w = inst.encoding.data.mode == .long;
 
         switch (op_en) {
-            .np, .i, .zi, .fd, .td, .d => {},
+            .zo, .i, .zi, .fd, .td, .d => {},
             .o, .oi => rex.b = inst.ops[0].reg.isExtended(),
             .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => {
                 const r_op = switch (op_en) {
@@ -370,7 +370,7 @@ pub const Instruction = struct {
         vex.w = inst.encoding.data.mode.isLong();
 
         switch (op_en) {
-            .np, .i, .zi, .fd, .td, .d => {},
+            .zo, .i, .zi, .fd, .td, .d => {},
             .o, .oi => vex.b = inst.ops[0].reg.isExtended(),
             .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => {
                 const r_op = switch (op_en) {
src/arch/x86_64/Encoding.zig
@@ -166,7 +166,7 @@ pub fn format(
     for (opc) |byte| try writer.print("{x:0>2} ", .{byte});
 
     switch (encoding.data.op_en) {
-        .np, .fd, .td, .i, .zi, .d => {},
+        .zo, .fd, .td, .i, .zi, .d => {},
         .o, .oi => {
             const tag = switch (encoding.data.ops[0]) {
                 .r8 => "rb",
@@ -203,7 +203,7 @@ pub fn format(
             try writer.print("{s} ", .{tag});
         },
         .rvmr => try writer.writeAll("/is4 "),
-        .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {},
+        .zo, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {},
     }
 
     try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
@@ -246,7 +246,7 @@ pub const Mnemonic = enum {
     movsx, movsxd, movzx, mul,
     neg, nop, not,
     @"or",
-    pause, pop, popcnt, push,
+    pause, pop, popcnt, popfq, push, pushfq,
     rcl, rcr, ret, rol, ror,
     sal, sar, sbb,
     scas, scasb, scasd, scasq, scasw,
@@ -260,7 +260,7 @@ pub const Mnemonic = enum {
     ud2,
     xadd, xchg, xgetbv, xor,
     // X87
-    fabs, fchs, ffree, fisttp, fld, fst, fstp,
+    fabs, fchs, ffree, fisttp, fld, fldenv, fnstenv, fst, fstenv, fstp,
     // MMX
     movd, movq,
     packssdw, packsswb, packuswb,
@@ -280,6 +280,7 @@ pub const Mnemonic = enum {
     cmpps, cmpss,
     cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
     divps, divss,
+    ldmxcsr,
     maxps, maxss,
     minps, minss,
     movaps, movhlps, movlhps,
@@ -291,6 +292,7 @@ pub const Mnemonic = enum {
     pmaxsw, pmaxub, pminsw, pminub, pmovmskb,
     shufps,
     sqrtps, sqrtss,
+    stmxcsr,
     subps, subss,
     ucomiss,
     xorps,
@@ -358,6 +360,7 @@ pub const Mnemonic = enum {
     vdivpd, vdivps, vdivsd, vdivss,
     vextractf128, vextractps,
     vinsertf128, vinsertps,
+    vldmxcsr,
     vmaxpd, vmaxps, vmaxsd, vmaxss,
     vminpd, vminps, vminsd, vminss,
     vmovapd, vmovaps,
@@ -397,6 +400,7 @@ pub const Mnemonic = enum {
     vroundpd, vroundps, vroundsd, vroundss,
     vshufpd, vshufps,
     vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
+    vstmxcsr,
     vsubpd, vsubps, vsubsd, vsubss,
     vxorpd, vxorps,
     // F16C
@@ -411,7 +415,7 @@ pub const Mnemonic = enum {
 
 pub const OpEn = enum {
     // zig fmt: off
-    np,
+    zo,
     o, oi,
     i, zi,
     d, m,
@@ -481,6 +485,7 @@ pub const Op = enum {
             .mem => |mem| switch (mem) {
                 .moffs => .moffs,
                 .sib, .rip => switch (mem.bitSize()) {
+                    0 => .m,
                     8 => .m8,
                     16 => .m16,
                     32 => .m32,
@@ -610,7 +615,7 @@ pub const Op = enum {
             .imm8s, .imm16s, .imm32s,
             .rel8, .rel16, .rel32,
             .unity,
-            =>  true,
+            => true,
             else => false,
         };
         // zig fmt: on
@@ -626,7 +631,7 @@ pub const Op = enum {
             .mm_m64,
             .xmm_m32, .xmm_m64, .xmm_m128,
             .ymm_m256,
-            =>  true,
+            => true,
             else => false,
         };
         // zig fmt: on
@@ -657,7 +662,7 @@ pub const Op = enum {
     /// Given an operand `op` checks if `target` is a subset for the purposes of the encoding.
     pub fn isSubset(op: Op, target: Op) bool {
         switch (op) {
-            .m, .o16, .o32, .o64 => unreachable,
+            .o16, .o32, .o64 => unreachable,
             .moffs, .sreg => return op == target,
             .none => switch (target) {
                 .o16, .o32, .o64, .none => return true,
src/arch/x86_64/encodings.zig
@@ -121,17 +121,16 @@ pub const table = [_]Entry{
     .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none,  .none },
     .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long,  .none },
 
-    // This is M encoding according to Intel, but D makes more sense here.
     .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none },
     .{ .call, .m, &.{ .rm64  }, &.{ 0xff }, 2, .none, .none },
 
-    .{ .cbw,  .np, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none },
-    .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none,  .none },
-    .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long,  .none },
+    .{ .cbw,  .zo, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none },
+    .{ .cwde, .zo, &.{ .o32 }, &.{ 0x98 }, 0, .none,  .none },
+    .{ .cdqe, .zo, &.{ .o64 }, &.{ 0x98 }, 0, .long,  .none },
 
-    .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none },
-    .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none,  .none },
-    .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long,  .none },
+    .{ .cwd, .zo, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none },
+    .{ .cdq, .zo, &.{ .o32 }, &.{ 0x99 }, 0, .none,  .none },
+    .{ .cqo, .zo, &.{ .o64 }, &.{ 0x99 }, 0, .long,  .none },
 
     .{ .cmova,   .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none },
     .{ .cmova,   .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none,  .none },
@@ -247,15 +246,15 @@ pub const table = [_]Entry{
     .{ .cmp, .rm, &.{ .r32,  .rm32   }, &.{ 0x3b }, 0, .none,  .none },
     .{ .cmp, .rm, &.{ .r64,  .rm64   }, &.{ 0x3b }, 0, .long,  .none },
 
-    .{ .cmps,  .np, &.{ .m8,   .m8   }, &.{ 0xa6 }, 0, .none,  .none },
-    .{ .cmps,  .np, &.{ .m16,  .m16  }, &.{ 0xa7 }, 0, .short, .none },
-    .{ .cmps,  .np, &.{ .m32,  .m32  }, &.{ 0xa7 }, 0, .none,  .none },
-    .{ .cmps,  .np, &.{ .m64,  .m64  }, &.{ 0xa7 }, 0, .long,  .none },
+    .{ .cmps,  .zo, &.{ .m8,   .m8   }, &.{ 0xa6 }, 0, .none,  .none },
+    .{ .cmps,  .zo, &.{ .m16,  .m16  }, &.{ 0xa7 }, 0, .short, .none },
+    .{ .cmps,  .zo, &.{ .m32,  .m32  }, &.{ 0xa7 }, 0, .none,  .none },
+    .{ .cmps,  .zo, &.{ .m64,  .m64  }, &.{ 0xa7 }, 0, .long,  .none },
 
-    .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none,  .none },
-    .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short, .none },
-    .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none,  .none },
-    .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long,  .none },
+    .{ .cmpsb, .zo, &.{}, &.{ 0xa6 }, 0, .none,  .none },
+    .{ .cmpsw, .zo, &.{}, &.{ 0xa7 }, 0, .short, .none },
+    .{ .cmpsd, .zo, &.{}, &.{ 0xa7 }, 0, .none,  .none },
+    .{ .cmpsq, .zo, &.{}, &.{ 0xa7 }, 0, .long,  .none },
 
     .{ .cmpxchg, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xb0 }, 0, .none,  .none },
     .{ .cmpxchg, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xb0 }, 0, .rex,   .none },
@@ -266,7 +265,7 @@ pub const table = [_]Entry{
     .{ .cmpxchg8b,  .m, &.{ .m64  }, &.{ 0x0f, 0xc7 }, 1, .none, .none },
     .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long, .none },
 
-    .{ .cpuid, .np, &.{}, &.{ 0x0f, 0xa2 }, 0, .none, .none },
+    .{ .cpuid, .zo, &.{}, &.{ 0x0f, 0xa2 }, 0, .none, .none },
 
     .{ .div, .m, &.{ .rm8  }, &.{ 0xf6 }, 6, .none,  .none },
     .{ .div, .m, &.{ .rm8  }, &.{ 0xf6 }, 6, .rex,   .none },
@@ -295,7 +294,7 @@ pub const table = [_]Entry{
     .{ .imul, .rmi, &.{ .r32,  .rm32, .imm32 }, &.{ 0x69       }, 0, .none,  .none },
     .{ .imul, .rmi, &.{ .r64,  .rm64, .imm32 }, &.{ 0x69       }, 0, .long,  .none },
 
-    .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none, .none },
+    .{ .int3, .zo, &.{}, &.{ 0xcc }, 0, .none, .none },
 
     .{ .ja,    .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none },
     .{ .jae,   .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none },
@@ -336,23 +335,23 @@ pub const table = [_]Entry{
     .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none,  .none },
     .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long,  .none },
 
-    .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none },
+    .{ .lfence, .zo, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none },
 
-    .{ .lods,  .np, &.{ .m8  }, &.{ 0xac }, 0, .none,  .none },
-    .{ .lods,  .np, &.{ .m16 }, &.{ 0xad }, 0, .short, .none },
-    .{ .lods,  .np, &.{ .m32 }, &.{ 0xad }, 0, .none,  .none },
-    .{ .lods,  .np, &.{ .m64 }, &.{ 0xad }, 0, .long,  .none },
+    .{ .lods,  .zo, &.{ .m8  }, &.{ 0xac }, 0, .none,  .none },
+    .{ .lods,  .zo, &.{ .m16 }, &.{ 0xad }, 0, .short, .none },
+    .{ .lods,  .zo, &.{ .m32 }, &.{ 0xad }, 0, .none,  .none },
+    .{ .lods,  .zo, &.{ .m64 }, &.{ 0xad }, 0, .long,  .none },
 
-    .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none,  .none },
-    .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short, .none },
-    .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none,  .none },
-    .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long,  .none },
+    .{ .lodsb, .zo, &.{}, &.{ 0xac }, 0, .none,  .none },
+    .{ .lodsw, .zo, &.{}, &.{ 0xad }, 0, .short, .none },
+    .{ .lodsd, .zo, &.{}, &.{ 0xad }, 0, .none,  .none },
+    .{ .lodsq, .zo, &.{}, &.{ 0xad }, 0, .long,  .none },
 
     .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
     .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none,  .lzcnt },
     .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long,  .lzcnt },
 
-    .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none },
+    .{ .mfence, .zo, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none },
 
     .{ .mov, .mr, &.{ .rm8,     .r8      }, &.{ 0x88 }, 0, .none,  .none },
     .{ .mov, .mr, &.{ .rm8,     .r8      }, &.{ 0x88 }, 0, .rex,   .none },
@@ -396,15 +395,15 @@ pub const table = [_]Entry{
     .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none,  .movbe },
     .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long,  .movbe },
 
-    .{ .movs,  .np, &.{ .m8,  .m8  }, &.{ 0xa4 }, 0, .none,  .none },
-    .{ .movs,  .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none },
-    .{ .movs,  .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none,  .none },
-    .{ .movs,  .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long,  .none },
+    .{ .movs,  .zo, &.{ .m8,  .m8  }, &.{ 0xa4 }, 0, .none,  .none },
+    .{ .movs,  .zo, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none },
+    .{ .movs,  .zo, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none,  .none },
+    .{ .movs,  .zo, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long,  .none },
 
-    .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none,  .none },
-    .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short, .none },
-    .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none,  .none },
-    .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long,  .none },
+    .{ .movsb, .zo, &.{}, &.{ 0xa4 }, 0, .none,  .none },
+    .{ .movsw, .zo, &.{}, &.{ 0xa5 }, 0, .short, .none },
+    .{ .movsd, .zo, &.{}, &.{ 0xa5 }, 0, .none,  .none },
+    .{ .movsq, .zo, &.{}, &.{ 0xa5 }, 0, .long,  .none },
 
     .{ .movsx, .rm, &.{ .r16, .rm8  }, &.{ 0x0f, 0xbe }, 0, .short,     .none },
     .{ .movsx, .rm, &.{ .r16, .rm8  }, &.{ 0x0f, 0xbe }, 0, .rex_short, .none },
@@ -440,7 +439,7 @@ pub const table = [_]Entry{
     .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none,  .none },
     .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long,  .none },
 
-    .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none, .none },
+    .{ .nop, .zo, &.{}, &.{ 0x90 }, 0, .none, .none },
 
     .{ .not, .m, &.{ .rm8  }, &.{ 0xf6 }, 2, .none,  .none },
     .{ .not, .m, &.{ .rm8  }, &.{ 0xf6 }, 2, .rex,   .none },
@@ -471,7 +470,7 @@ pub const table = [_]Entry{
     .{ .@"or", .rm, &.{ .r32,  .rm32   }, &.{ 0x0b }, 0, .none,  .none },
     .{ .@"or", .rm, &.{ .r64,  .rm64   }, &.{ 0x0b }, 0, .long,  .none },
 
-    .{ .pause, .np, &.{}, &.{ 0xf3, 0x90 }, 0, .none, .none },
+    .{ .pause, .zo, &.{}, &.{ 0xf3, 0x90 }, 0, .none, .none },
 
     .{ .pop, .o, &.{ .r16  }, &.{ 0x58 }, 0, .short, .none },
     .{ .pop, .o, &.{ .r64  }, &.{ 0x58 }, 0, .none,  .none },
@@ -482,6 +481,8 @@ pub const table = [_]Entry{
     .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none,  .popcnt },
     .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long,  .popcnt },
 
+    .{ .popfq, .zo, &.{}, &.{ 0x9d }, 0, .none, .none },
+
     .{ .push, .o, &.{ .r16   }, &.{ 0x50 }, 0, .short, .none },
     .{ .push, .o, &.{ .r64   }, &.{ 0x50 }, 0, .none,  .none },
     .{ .push, .m, &.{ .rm16  }, &.{ 0xff }, 6, .short, .none },
@@ -490,7 +491,9 @@ pub const table = [_]Entry{
     .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .short, .none },
     .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none,  .none },
 
-    .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none, .none },
+    .{ .pushfq, .zo, &.{}, &.{ 0x9c }, 0, .none, .none },
+
+    .{ .ret, .zo, &.{}, &.{ 0xc3 }, 0, .none, .none },
 
     .{ .rcl, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 2, .none,  .none },
     .{ .rcl, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 2, .rex,   .none },
@@ -611,15 +614,15 @@ pub const table = [_]Entry{
     .{ .sbb, .rm, &.{ .r32,  .rm32   }, &.{ 0x1b }, 0, .none,  .none },
     .{ .sbb, .rm, &.{ .r64,  .rm64   }, &.{ 0x1b }, 0, .long,  .none },
 
-    .{ .scas,  .np, &.{ .m8  }, &.{ 0xae }, 0, .none,  .none },
-    .{ .scas,  .np, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none },
-    .{ .scas,  .np, &.{ .m32 }, &.{ 0xaf }, 0, .none,  .none },
-    .{ .scas,  .np, &.{ .m64 }, &.{ 0xaf }, 0, .long,  .none },
+    .{ .scas,  .zo, &.{ .m8  }, &.{ 0xae }, 0, .none,  .none },
+    .{ .scas,  .zo, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none },
+    .{ .scas,  .zo, &.{ .m32 }, &.{ 0xaf }, 0, .none,  .none },
+    .{ .scas,  .zo, &.{ .m64 }, &.{ 0xaf }, 0, .long,  .none },
 
-    .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none,  .none },
-    .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short, .none },
-    .{ .scasd, .np, &.{}, &.{ 0xaf }, 0, .none,  .none },
-    .{ .scasq, .np, &.{}, &.{ 0xaf }, 0, .long,  .none },
+    .{ .scasb, .zo, &.{}, &.{ 0xae }, 0, .none,  .none },
+    .{ .scasw, .zo, &.{}, &.{ 0xaf }, 0, .short, .none },
+    .{ .scasd, .zo, &.{}, &.{ 0xaf }, 0, .none,  .none },
+    .{ .scasq, .zo, &.{}, &.{ 0xaf }, 0, .long,  .none },
 
     .{ .seta,   .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none },
     .{ .seta,   .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex,  .none },
@@ -682,7 +685,7 @@ pub const table = [_]Entry{
     .{ .setz,   .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none },
     .{ .setz,   .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex,  .none },
 
-    .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none },
+    .{ .sfence, .zo, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none },
 
     .{ .shl, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 4, .none,  .none },
     .{ .shl, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 4, .rex,   .none },
@@ -730,15 +733,15 @@ pub const table = [_]Entry{
     .{ .shrd, .mrc, &.{ .rm32, .r32, .cl   }, &.{ 0x0f, 0xad }, 0, .none,  .none },
     .{ .shrd, .mrc, &.{ .rm64, .r64, .cl   }, &.{ 0x0f, 0xad }, 0, .long,  .none },
 
-    .{ .stos,  .np, &.{ .m8  }, &.{ 0xaa }, 0, .none,  .none },
-    .{ .stos,  .np, &.{ .m16 }, &.{ 0xab }, 0, .short, .none },
-    .{ .stos,  .np, &.{ .m32 }, &.{ 0xab }, 0, .none,  .none },
-    .{ .stos,  .np, &.{ .m64 }, &.{ 0xab }, 0, .long,  .none },
+    .{ .stos,  .zo, &.{ .m8  }, &.{ 0xaa }, 0, .none,  .none },
+    .{ .stos,  .zo, &.{ .m16 }, &.{ 0xab }, 0, .short, .none },
+    .{ .stos,  .zo, &.{ .m32 }, &.{ 0xab }, 0, .none,  .none },
+    .{ .stos,  .zo, &.{ .m64 }, &.{ 0xab }, 0, .long,  .none },
 
-    .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none,  .none },
-    .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short, .none },
-    .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none,  .none },
-    .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long,  .none },
+    .{ .stosb, .zo, &.{}, &.{ 0xaa }, 0, .none,  .none },
+    .{ .stosw, .zo, &.{}, &.{ 0xab }, 0, .short, .none },
+    .{ .stosd, .zo, &.{}, &.{ 0xab }, 0, .none,  .none },
+    .{ .stosq, .zo, &.{}, &.{ 0xab }, 0, .long,  .none },
 
     .{ .sub, .zi, &.{ .al,   .imm8   }, &.{ 0x2c }, 0, .none,  .none },
     .{ .sub, .zi, &.{ .ax,   .imm16  }, &.{ 0x2d }, 0, .short, .none },
@@ -763,7 +766,7 @@ pub const table = [_]Entry{
     .{ .sub, .rm, &.{ .r32,  .rm32   }, &.{ 0x2b }, 0, .none,  .none },
     .{ .sub, .rm, &.{ .r64,  .rm64   }, &.{ 0x2b }, 0, .long,  .none },
 
-    .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none },
+    .{ .syscall, .zo, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none },
 
     .{ .@"test", .zi, &.{ .al,   .imm8   }, &.{ 0xa8 }, 0, .none,  .none },
     .{ .@"test", .zi, &.{ .ax,   .imm16  }, &.{ 0xa9 }, 0, .short, .none },
@@ -784,7 +787,7 @@ pub const table = [_]Entry{
     .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none,  .bmi },
     .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long,  .bmi },
 
-    .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none },
+    .{ .ud2, .zo, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none },
 
     .{ .xadd, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xc0 }, 0, .none,  .none },
     .{ .xadd, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xc0 }, 0, .rex,   .none },
@@ -809,7 +812,7 @@ pub const table = [_]Entry{
     .{ .xchg, .rm, &.{ .r32,  .rm32 }, &.{ 0x87 }, 0, .none,  .none },
     .{ .xchg, .rm, &.{ .r64,  .rm64 }, &.{ 0x87 }, 0, .long,  .none },
 
-    .{ .xgetbv, .np, &.{}, &.{ 0x0f, 0x01 }, 0, .none, .none },
+    .{ .xgetbv, .zo, &.{}, &.{ 0x0f, 0x01, 0xd0 }, 0, .none, .none },
 
     .{ .xor, .zi, &.{ .al,   .imm8   }, &.{ 0x34 }, 0, .none,  .none },
     .{ .xor, .zi, &.{ .ax,   .imm16  }, &.{ 0x35 }, 0, .short, .none },
@@ -835,9 +838,9 @@ pub const table = [_]Entry{
     .{ .xor, .rm, &.{ .r64,  .rm64   }, &.{ 0x33 }, 0, .long,  .none },
 
     // X87
-    .{ .fabs, .np, &.{}, &.{ 0xd9, 0xe1 }, 0, .none, .x87 },
+    .{ .fabs, .zo, &.{}, &.{ 0xd9, 0xe1 }, 0, .none, .x87 },
 
-    .{ .fchs, .np, &.{}, &.{ 0xd9, 0xe0 }, 0, .none, .x87 },
+    .{ .fchs, .zo, &.{}, &.{ 0xd9, 0xe0 }, 0, .none, .x87 },
 
     .{ .ffree, .o, &.{ .st }, &.{ 0xdd, 0xc0 }, 0, .none, .x87 },
 
@@ -850,6 +853,8 @@ pub const table = [_]Entry{
     .{ .fld, .m, &.{ .m80 }, &.{ 0xdb       }, 5, .none, .x87 },
     .{ .fld, .o, &.{ .st  }, &.{ 0xd9, 0xc0 }, 0, .none, .x87 },
 
+    .{ .fldenv, .m, &.{ .m }, &.{ 0xd9 }, 4, .none, .x87 },
+
     .{ .fst,  .m, &.{ .m32 }, &.{ 0xd9       }, 2, .none, .x87 },
     .{ .fst,  .m, &.{ .m64 }, &.{ 0xdd       }, 2, .none, .x87 },
     .{ .fst,  .o, &.{ .st  }, &.{ 0xdd, 0xd0 }, 0, .none, .x87 },
@@ -858,6 +863,9 @@ pub const table = [_]Entry{
     .{ .fstp, .m, &.{ .m80 }, &.{ 0xdb       }, 7, .none, .x87 },
     .{ .fstp, .o, &.{ .st  }, &.{ 0xdd, 0xd8 }, 0, .none, .x87 },
 
+    .{ .fstenv,  .m, &.{ .m }, &.{ 0x9b, 0xd9 }, 6, .none, .x87 },
+    .{ .fnstenv, .m, &.{ .m }, &.{       0xd9 }, 6, .none, .x87 },
+
     // SSE
     .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse },
 
@@ -890,6 +898,8 @@ pub const table = [_]Entry{
 
     .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse },
 
+    .{ .ldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .none, .sse },
+
     .{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse },
 
     .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .none, .sse },
@@ -929,6 +939,8 @@ pub const table = [_]Entry{
 
     .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse },
 
+    .{ .stmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 3, .none, .sse },
+
     .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
 
     .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
@@ -1365,6 +1377,8 @@ pub const table = [_]Entry{
 
     .{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx },
 
+    .{ .vldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .vex_lz_wig, .avx },
+
     .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
     .{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
 
@@ -1635,6 +1649,8 @@ pub const table = [_]Entry{
 
     .{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx },
 
+    .{ .vstmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 3, .vex_lz_wig, .avx },
+
     .{ .vsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_128_wig, .avx },
     .{ .vsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_256_wig, .avx },
 
src/arch/x86_64/Mir.zig
@@ -394,8 +394,12 @@ pub const Inst = struct {
         pop,
         /// Return the count of number of bits set to 1
         popcnt,
+        /// Pop stack into EFLAGS register
+        popfq,
         /// Push
         push,
+        /// Push EFLAGS register onto the stack
+        pushfq,
         /// Rotate left through carry
         /// Rotate right through carry
         rc,
@@ -458,8 +462,14 @@ pub const Inst = struct {
         istt,
         /// Load floating-point value
         ld,
+        /// Load x87 FPU environment
+        ldenv,
+        /// Store x87 FPU environment
+        nstenv,
         /// Store floating-point value
         st,
+        /// Store x87 FPU environment
+        stenv,
 
         /// Pack with signed saturation
         ackssw,
@@ -505,6 +515,11 @@ pub const Inst = struct {
         /// Subtract packed unsigned integers with unsigned saturation
         subus,
 
+        /// Load MXCSR register
+        ldmxcsr,
+        /// Store MXCSR register state
+        stmxcsr,
+
         /// Convert packed doubleword integers to packed single-precision floating-point values
         /// Convert packed doubleword integers to packed double-precision floating-point values
         cvtpi2,
@@ -1079,9 +1094,13 @@ pub const RegisterList = struct {
         return self.bitset.iterator(options);
     }
 
-    pub fn count(self: Self) u32 {
+    pub fn count(self: Self) i32 {
         return @intCast(self.bitset.count());
     }
+
+    pub fn size(self: Self) i32 {
+        return @intCast(self.bitset.count() * 8);
+    }
 };
 
 pub const Imm32 = struct {
src/codegen.zig
@@ -850,7 +850,7 @@ fn genDeclRef(
     bin_file: *link.File,
     src_loc: Module.SrcLoc,
     tv: TypedValue,
-    decl_index: Module.Decl.Index,
+    ptr_decl_index: Module.Decl.Index,
 ) CodeGenError!GenResult {
     const mod = bin_file.options.module.?;
     log.debug("genDeclRef: ty = {}, val = {}", .{ tv.ty.fmt(mod), tv.val.fmtValue(tv.ty, mod) });
@@ -859,6 +859,12 @@ fn genDeclRef(
     const ptr_bits = target.ptrBitWidth();
     const ptr_bytes: u64 = @divExact(ptr_bits, 8);
 
+    const ptr_decl = mod.declPtr(ptr_decl_index);
+    const decl_index = switch (mod.intern_pool.indexToKey(try ptr_decl.internValue(mod))) {
+        .func => |func| func.owner_decl,
+        .extern_func => |extern_func| extern_func.decl,
+        else => ptr_decl_index,
+    };
     const decl = mod.declPtr(decl_index);
 
     if (!decl.ty.isFnOrHasRuntimeBitsIgnoreComptime(mod)) {
src/Module.zig
@@ -5846,7 +5846,8 @@ pub const Feature = enum {
 pub fn backendSupportsFeature(mod: Module, feature: Feature) bool {
     return switch (feature) {
         .panic_fn => mod.comp.bin_file.options.target.ofmt == .c or
-            mod.comp.bin_file.options.use_llvm,
+            mod.comp.bin_file.options.use_llvm or
+            mod.comp.bin_file.options.target.cpu.arch == .x86_64,
         .panic_unwrap_error => mod.comp.bin_file.options.target.ofmt == .c or
             mod.comp.bin_file.options.use_llvm,
         .safety_check_formatted => mod.comp.bin_file.options.target.ofmt == .c or
test/behavior/math.zig
@@ -670,11 +670,11 @@ fn should_not_be_zero(x: f128) !void {
 
 test "128-bit multiplication" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArmOrThumb()) return error.SkipZigTest;
 
     {
test/tests.zig
@@ -1006,7 +1006,7 @@ pub fn addModuleTests(b: *std.Build, options: ModuleTestOptions) *Step {
 
         // TODO get std lib tests passing for other self-hosted backends.
         if ((test_target.target.getCpuArch() != .x86_64 or
-            test_target.target.getObjectFormat() != .elf) and
+            test_target.target.getOsTag() != .linux) and
             test_target.use_llvm == false and mem.eql(u8, options.name, "std"))
             continue;