Commit 509be7cf1f
Changed files (45)
lib
std
atomic
compress
crypto
hash
math
net
os
rand
src
test
behavior
lib/std/atomic/Atomic.zig
@@ -467,8 +467,6 @@ test "Atomic.fetchSub" {
}
test "Atomic.fetchMin" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
inline for (atomicIntTypes()) |Int| {
inline for (atomic_rmw_orderings) |ordering| {
var x = Atomic(Int).init(5);
lib/std/atomic/queue.zig
@@ -175,8 +175,6 @@ const puts_per_thread = 500;
const put_thread_count = 3;
test "std.atomic.Queue" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var plenty_of_memory = try std.heap.page_allocator.alloc(u8, 300 * 1024);
defer std.heap.page_allocator.free(plenty_of_memory);
lib/std/compress/zstandard.zig
@@ -264,8 +264,6 @@ fn testReader(data: []const u8, comptime expected: []const u8) !void {
}
test "zstandard decompression" {
- if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const uncompressed = @embedFile("testdata/rfc8478.txt");
const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3");
const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19");
lib/std/crypto/25519/ed25519.zig
@@ -1,5 +1,4 @@
const std = @import("std");
-const builtin = @import("builtin");
const crypto = std.crypto;
const debug = std.debug;
const fmt = std.fmt;
@@ -276,8 +275,8 @@ pub const Ed25519 = struct {
pub fn fromSecretKey(secret_key: SecretKey) (NonCanonicalError || EncodingError || IdentityElementError)!KeyPair {
// It is critical for EdDSA to use the correct public key.
// In order to enforce this, a SecretKey implicitly includes a copy of the public key.
- // In Debug mode, we can still afford checking that the public key is correct for extra safety.
- if (builtin.mode == .Debug) {
+ // With runtime safety, we can still afford checking that the public key is correct.
+ if (std.debug.runtime_safety) {
const pk_p = try Curve.fromBytes(secret_key.publicKeyBytes());
const recomputed_kp = try create(secret_key.seed());
debug.assert(mem.eql(u8, &recomputed_kp.public_key.toBytes(), &pk_p.toBytes()));
@@ -493,8 +492,6 @@ test "ed25519 key pair creation" {
}
test "ed25519 signature" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var seed: [32]u8 = undefined;
_ = try fmt.hexToBytes(seed[0..], "8052030376d47112be7f73ed7a019293dd12ad910b654455798b4667d73de166");
const key_pair = try Ed25519.KeyPair.create(seed);
@@ -507,8 +504,6 @@ test "ed25519 signature" {
}
test "ed25519 batch verification" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var i: usize = 0;
while (i < 100) : (i += 1) {
const key_pair = try Ed25519.KeyPair.create(null);
@@ -538,8 +533,6 @@ test "ed25519 batch verification" {
}
test "ed25519 test vectors" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const Vec = struct {
msg_hex: *const [64:0]u8,
public_key_hex: *const [64:0]u8,
@@ -642,8 +635,6 @@ test "ed25519 test vectors" {
}
test "ed25519 with blind keys" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const BlindKeyPair = Ed25519.key_blinding.BlindKeyPair;
// Create a standard Ed25519 key pair
@@ -667,8 +658,6 @@ test "ed25519 with blind keys" {
}
test "ed25519 signatures with streaming" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const kp = try Ed25519.KeyPair.create(null);
var signer = try kp.signer(null);
lib/std/crypto/Certificate/Bundle.zig
@@ -318,8 +318,6 @@ const MapContext = struct {
test "scan for OS-provided certificates" {
if (builtin.os.tag == .wasi) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var bundle: Bundle = .{};
defer bundle.deinit(std.testing.allocator);
lib/std/crypto/pcurves/p256.zig
@@ -478,7 +478,5 @@ pub const AffineCoordinates = struct {
};
test {
- if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
_ = @import("tests/p256.zig");
}
lib/std/crypto/aes.zig
@@ -28,8 +28,6 @@ pub const Aes128 = impl.Aes128;
pub const Aes256 = impl.Aes256;
test "ctr" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
// NIST SP 800-38A pp 55-58
const ctr = @import("modes.zig").ctr;
lib/std/crypto/aes_gcm.zig
@@ -1,5 +1,4 @@
const std = @import("std");
-const builtin = @import("builtin");
const assert = std.debug.assert;
const crypto = std.crypto;
const debug = std.debug;
@@ -42,7 +41,7 @@ fn AesGcm(comptime Aes: anytype) type {
mac.pad();
mem.writeInt(u32, j[nonce_length..][0..4], 2, .big);
- modes.ctr(@TypeOf(aes), aes, c, m, j, std.builtin.Endian.big);
+ modes.ctr(@TypeOf(aes), aes, c, m, j, .big);
mac.update(c[0..m.len][0..]);
mac.pad();
@@ -104,7 +103,7 @@ fn AesGcm(comptime Aes: anytype) type {
}
mem.writeInt(u32, j[nonce_length..][0..4], 2, .big);
- modes.ctr(@TypeOf(aes), aes, m, c, j, std.builtin.Endian.big);
+ modes.ctr(@TypeOf(aes), aes, m, c, j, .big);
}
};
}
@@ -113,8 +112,6 @@ const htest = @import("test.zig");
const testing = std.testing;
test "Aes256Gcm - Empty message and no associated data" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length;
const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length;
const ad = "";
@@ -127,8 +124,6 @@ test "Aes256Gcm - Empty message and no associated data" {
}
test "Aes256Gcm - Associated data only" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length;
const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length;
const m = "";
@@ -141,8 +136,6 @@ test "Aes256Gcm - Associated data only" {
}
test "Aes256Gcm - Message only" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length;
const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length;
const m = "Test with message only";
@@ -160,8 +153,6 @@ test "Aes256Gcm - Message only" {
}
test "Aes256Gcm - Message and associated data" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key: [Aes256Gcm.key_length]u8 = [_]u8{0x69} ** Aes256Gcm.key_length;
const nonce: [Aes256Gcm.nonce_length]u8 = [_]u8{0x42} ** Aes256Gcm.nonce_length;
const m = "Test with message";
lib/std/crypto/argon2.zig
@@ -896,8 +896,6 @@ test "kdf" {
}
test "phc format hasher" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const allocator = std.testing.allocator;
const password = "testpass";
@@ -913,8 +911,6 @@ test "phc format hasher" {
}
test "password hash and password verify" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const allocator = std.testing.allocator;
const password = "testpass";
lib/std/crypto/bcrypt.zig
@@ -1,5 +1,4 @@
const std = @import("std");
-const builtin = @import("builtin");
const base64 = std.base64;
const crypto = std.crypto;
const debug = std.debug;
@@ -754,8 +753,6 @@ pub fn strVerify(
}
test "bcrypt codec" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var salt: [salt_length]u8 = undefined;
crypto.random.bytes(&salt);
var salt_str: [salt_str_length]u8 = undefined;
@@ -766,8 +763,6 @@ test "bcrypt codec" {
}
test "bcrypt crypt format" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var hash_options = HashOptions{
.params = .{ .rounds_log = 5 },
.encoding = .crypt,
@@ -808,8 +803,6 @@ test "bcrypt crypt format" {
}
test "bcrypt phc format" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var hash_options = HashOptions{
.params = .{ .rounds_log = 5 },
.encoding = .phc,
lib/std/crypto/Certificate.zig
@@ -614,18 +614,18 @@ const Date = struct {
};
pub fn parseTimeDigits(text: *const [2]u8, min: u8, max: u8) !u8 {
- const nn: @Vector(2, u16) = .{ text[0], text[1] };
- const zero: @Vector(2, u16) = .{ '0', '0' };
- const mm: @Vector(2, u16) = .{ 10, 1 };
- const result = @reduce(.Add, (nn -% zero) *% mm);
+ const result = if (use_vectors) result: {
+ const nn: @Vector(2, u16) = .{ text[0], text[1] };
+ const zero: @Vector(2, u16) = .{ '0', '0' };
+ const mm: @Vector(2, u16) = .{ 10, 1 };
+ break :result @reduce(.Add, (nn -% zero) *% mm);
+ } else std.fmt.parseInt(u8, text, 10) catch return error.CertificateTimeInvalid;
if (result < min) return error.CertificateTimeInvalid;
if (result > max) return error.CertificateTimeInvalid;
return @truncate(result);
}
test parseTimeDigits {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const expectEqual = std.testing.expectEqual;
try expectEqual(@as(u8, 0), try parseTimeDigits("00", 0, 99));
try expectEqual(@as(u8, 99), try parseTimeDigits("99", 0, 99));
@@ -638,17 +638,17 @@ test parseTimeDigits {
}
pub fn parseYear4(text: *const [4]u8) !u16 {
- const nnnn: @Vector(4, u32) = .{ text[0], text[1], text[2], text[3] };
- const zero: @Vector(4, u32) = .{ '0', '0', '0', '0' };
- const mmmm: @Vector(4, u32) = .{ 1000, 100, 10, 1 };
- const result = @reduce(.Add, (nnnn -% zero) *% mmmm);
+ const result = if (use_vectors) result: {
+ const nnnn: @Vector(4, u32) = .{ text[0], text[1], text[2], text[3] };
+ const zero: @Vector(4, u32) = .{ '0', '0', '0', '0' };
+ const mmmm: @Vector(4, u32) = .{ 1000, 100, 10, 1 };
+ break :result @reduce(.Add, (nnnn -% zero) *% mmmm);
+ } else std.fmt.parseInt(u16, text, 10) catch return error.CertificateTimeInvalid;
if (result > 9999) return error.CertificateTimeInvalid;
return @truncate(result);
}
test parseYear4 {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const expectEqual = std.testing.expectEqual;
try expectEqual(@as(u16, 0), try parseYear4("0000"));
try expectEqual(@as(u16, 9999), try parseYear4("9999"));
@@ -1124,4 +1124,4 @@ pub const rsa = struct {
}
};
-const builtin = @import("builtin");
+const use_vectors = @import("builtin").zig_backend != .stage2_x86_64;
lib/std/crypto/cmac.zig
@@ -1,5 +1,4 @@
const std = @import("std");
-const builtin = @import("builtin");
const crypto = std.crypto;
const mem = std.mem;
@@ -94,8 +93,6 @@ pub fn Cmac(comptime BlockCipher: type) type {
const testing = std.testing;
test "CmacAes128 - Example 1: len = 0" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key = [_]u8{
0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c,
};
@@ -109,8 +106,6 @@ test "CmacAes128 - Example 1: len = 0" {
}
test "CmacAes128 - Example 2: len = 16" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key = [_]u8{
0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c,
};
@@ -126,8 +121,6 @@ test "CmacAes128 - Example 2: len = 16" {
}
test "CmacAes128 - Example 3: len = 40" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key = [_]u8{
0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c,
};
@@ -145,8 +138,6 @@ test "CmacAes128 - Example 3: len = 40" {
}
test "CmacAes128 - Example 4: len = 64" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key = [_]u8{
0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c,
};
lib/std/crypto/ecdsa.zig
@@ -373,7 +373,6 @@ pub fn Ecdsa(comptime Curve: type, comptime Hash: type) type {
test "ECDSA - Basic operations over EcdsaP384Sha384" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
const Scheme = EcdsaP384Sha384;
const kp = try Scheme.KeyPair.create(null);
@@ -407,7 +406,6 @@ test "ECDSA - Basic operations over Secp256k1" {
test "ECDSA - Basic operations over EcdsaP384Sha256" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
const Scheme = Ecdsa(crypto.ecc.P384, crypto.hash.sha2.Sha256);
const kp = try Scheme.KeyPair.create(null);
@@ -424,7 +422,6 @@ test "ECDSA - Basic operations over EcdsaP384Sha256" {
test "ECDSA - Verifying a existing signature with EcdsaP384Sha256" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
const Scheme = Ecdsa(crypto.ecc.P384, crypto.hash.sha2.Sha256);
// zig fmt: off
@@ -469,7 +466,6 @@ const TestVector = struct {
test "ECDSA - Test vectors from Project Wycheproof" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
const vectors = [_]TestVector{
.{ .key = "042927b10512bae3eddcfe467828128bad2903269919f7086069c8c4df6c732838c7787964eaac00e5921fb1498a60f4606766b3d9685001558d1a974e7341513e", .msg = "313233343030", .sig = "304402202ba3a8be6b94d5ec80a6d9d1190a436effe50d85a1eee859b8cc6af9bd5c2e1802204cd60b855d442f5b3c7b11eb6c4e0ae7525fe710fab9aa7c77a67f79e6fadd76", .result = .valid },
@@ -884,7 +880,6 @@ fn tvTry(vector: TestVector) !void {
test "ECDSA - Sec1 encoding/decoding" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
const Scheme = EcdsaP384Sha384;
const kp = try Scheme.KeyPair.create(null);
lib/std/crypto/ghash_polyval.zig
@@ -422,8 +422,6 @@ fn Hash(comptime endian: std.builtin.Endian, comptime shift_key: bool) type {
const htest = @import("test.zig");
test "ghash" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const key = [_]u8{0x42} ** 16;
const m = [_]u8{0x69} ** 256;
@@ -441,8 +439,6 @@ test "ghash" {
}
test "ghash2" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var key: [16]u8 = undefined;
var i: usize = 0;
while (i < key.len) : (i += 1) {
lib/std/crypto/phc_encoding.zig
@@ -1,7 +1,6 @@
// https://github.com/P-H-C/phc-string-format
const std = @import("std");
-const builtin = @import("builtin");
const fmt = std.fmt;
const io = std.io;
const mem = std.mem;
@@ -264,8 +263,6 @@ fn kvSplit(str: []const u8) !struct { key: []const u8, value: []const u8 } {
}
test "phc format - encoding/decoding" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const Input = struct {
str: []const u8,
HashResult: type,
lib/std/crypto/sha2.zig
@@ -238,7 +238,7 @@ fn Sha2x32(comptime params: Sha2Params32) type {
return;
},
// C backend doesn't currently support passing vectors to inline asm.
- .x86_64 => if (builtin.zig_backend != .stage2_c and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) {
+ .x86_64 => if (builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) {
var x: v4u32 = [_]u32{ d.s[5], d.s[4], d.s[1], d.s[0] };
var y: v4u32 = [_]u32{ d.s[7], d.s[6], d.s[3], d.s[2] };
const s_v = @as(*[16]v4u32, @ptrCast(&s));
lib/std/fmt/parse_float.zig
@@ -83,8 +83,6 @@ test "fmt.parseFloat #11169" {
}
test "fmt.parseFloat hex.special" {
- if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testing.expect(math.isNan(try parseFloat(f32, "nAn")));
try testing.expect(math.isPositiveInf(try parseFloat(f32, "iNf")));
try testing.expect(math.isPositiveInf(try parseFloat(f32, "+Inf")));
lib/std/hash/xxhash.zig
@@ -2,6 +2,7 @@ const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const expectEqual = std.testing.expectEqual;
+const native_endian = builtin.cpu.arch.endian();
const rotl = std.math.rotl;
@@ -472,7 +473,7 @@ pub const XxHash3 = struct {
}
inline fn swap(x: anytype) @TypeOf(x) {
- return if (builtin.cpu.arch.endian() == .big) @byteSwap(x) else x;
+ return if (native_endian == .big) @byteSwap(x) else x;
}
inline fn disableAutoVectorization(x: anytype) void {
lib/std/http/Client.zig
@@ -9,6 +9,7 @@ const net = std.net;
const Uri = std.Uri;
const Allocator = mem.Allocator;
const assert = std.debug.assert;
+const use_vectors = builtin.zig_backend != .stage2_x86_64;
const Client = @This();
const proto = @import("protocol.zig");
@@ -408,7 +409,7 @@ pub const Response = struct {
else => return error.HttpHeadersInvalid,
};
if (first_line[8] != ' ') return error.HttpHeadersInvalid;
- const status = @as(http.Status, @enumFromInt(parseInt3(first_line[9..12].*)));
+ const status: http.Status = @enumFromInt(parseInt3(first_line[9..12]));
const reason = mem.trimLeft(u8, first_line[12..], " ");
res.version = version;
@@ -481,20 +482,24 @@ pub const Response = struct {
}
inline fn int64(array: *const [8]u8) u64 {
- return @as(u64, @bitCast(array.*));
+ return @bitCast(array.*);
}
- fn parseInt3(nnn: @Vector(3, u8)) u10 {
- const zero: @Vector(3, u8) = .{ '0', '0', '0' };
- const mmm: @Vector(3, u10) = .{ 100, 10, 1 };
- return @reduce(.Add, @as(@Vector(3, u10), nnn -% zero) *% mmm);
+ fn parseInt3(text: *const [3]u8) u10 {
+ if (use_vectors) {
+ const nnn: @Vector(3, u8) = text.*;
+ const zero: @Vector(3, u8) = .{ '0', '0', '0' };
+ const mmm: @Vector(3, u10) = .{ 100, 10, 1 };
+ return @reduce(.Add, @as(@Vector(3, u10), nnn -% zero) *% mmm);
+ }
+ return std.fmt.parseInt(u10, text, 10) catch unreachable;
}
test parseInt3 {
const expectEqual = testing.expectEqual;
- try expectEqual(@as(u10, 0), parseInt3("000".*));
- try expectEqual(@as(u10, 418), parseInt3("418".*));
- try expectEqual(@as(u10, 999), parseInt3("999".*));
+ try expectEqual(@as(u10, 0), parseInt3("000"));
+ try expectEqual(@as(u10, 418), parseInt3("418"));
+ try expectEqual(@as(u10, 999), parseInt3("999"));
}
version: http.Version,
@@ -1588,7 +1593,8 @@ test {
if (builtin.os.tag == .wasi) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_x86_64 and
+ !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .avx)) return error.SkipZigTest;
std.testing.refAllDecls(@This());
}
lib/std/http/protocol.zig
@@ -1,8 +1,10 @@
const std = @import("../std.zig");
+const builtin = @import("builtin");
const testing = std.testing;
const mem = std.mem;
const assert = std.debug.assert;
+const use_vectors = builtin.zig_backend != .stage2_x86_64;
pub const State = enum {
/// Begin header parsing states.
@@ -83,7 +85,7 @@ pub const HeadersParser = struct {
/// first byte of content is located at `bytes[result]`.
pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
const vector_len: comptime_int = @max(std.simd.suggestVectorSize(u8) orelse 1, 8);
- const len = @as(u32, @intCast(bytes.len));
+ const len: u32 = @intCast(bytes.len);
var index: u32 = 0;
while (true) {
@@ -175,18 +177,27 @@ pub const HeadersParser = struct {
continue;
},
else => {
- const Vector = @Vector(vector_len, u8);
- // const BoolVector = @Vector(vector_len, bool);
- const BitVector = @Vector(vector_len, u1);
- const SizeVector = @Vector(vector_len, u8);
-
const chunk = bytes[index..][0..vector_len];
- const v: Vector = chunk.*;
- const matches_r = @as(BitVector, @bitCast(v == @as(Vector, @splat('\r'))));
- const matches_n = @as(BitVector, @bitCast(v == @as(Vector, @splat('\n'))));
- const matches_or: SizeVector = matches_r | matches_n;
-
- const matches = @reduce(.Add, matches_or);
+ const matches = if (use_vectors) matches: {
+ const Vector = @Vector(vector_len, u8);
+ // const BoolVector = @Vector(vector_len, bool);
+ const BitVector = @Vector(vector_len, u1);
+ const SizeVector = @Vector(vector_len, u8);
+
+ const v: Vector = chunk.*;
+ const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r')));
+ const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n')));
+ const matches_or: SizeVector = matches_r | matches_n;
+
+ break :matches @reduce(.Add, matches_or);
+ } else matches: {
+ var matches: u8 = 0;
+ for (chunk) |byte| switch (byte) {
+ '\r', '\n' => matches += 1,
+ else => {},
+ };
+ break :matches matches;
+ };
switch (matches) {
0 => {},
1 => switch (chunk[vector_len - 1]) {
lib/std/http/Server.zig
@@ -736,8 +736,6 @@ test "HTTP server handles a chunked transfer coding request" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const native_endian = comptime builtin.cpu.arch.endian();
if (builtin.zig_backend == .stage2_llvm and native_endian == .big) {
// https://github.com/ziglang/zig/issues/13782
lib/std/math/big/int.zig
@@ -1318,7 +1318,7 @@ pub const Mutable = struct {
///
/// `limbs_buffer` is used for temporary storage.
/// The amount required is given by `calcPowLimbsBufferLen`.
- pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) !void {
+ pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void {
assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing
// Handle all the trivial cases first
@@ -3213,7 +3213,7 @@ pub const Managed = struct {
var m = try Managed.initCapacity(rma.allocator, needed_limbs);
errdefer m.deinit();
var m_mut = m.toMutable();
- try m_mut.pow(a.toConst(), b, limbs_buffer);
+ m_mut.pow(a.toConst(), b, limbs_buffer);
m.setMetadata(m_mut.positive, m_mut.len);
rma.deinit();
@@ -3221,7 +3221,7 @@ pub const Managed = struct {
} else {
try rma.ensureCapacity(needed_limbs);
var rma_mut = rma.toMutable();
- try rma_mut.pow(a.toConst(), b, limbs_buffer);
+ rma_mut.pow(a.toConst(), b, limbs_buffer);
rma.setMetadata(rma_mut.positive, rma_mut.len);
}
}
lib/std/math/big/int_test.zig
@@ -2568,8 +2568,6 @@ test "big.int const to managed" {
}
test "big.int pow" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
{
var a = try Managed.initSet(testing.allocator, -3);
defer a.deinit();
@@ -2763,8 +2761,6 @@ fn popCountTest(val: *const Managed, bit_count: usize, expected: usize) !void {
}
test "big int conversion read/write twos complement" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var a = try Managed.initSet(testing.allocator, (1 << 493) - 1);
defer a.deinit();
var b = try Managed.initSet(testing.allocator, (1 << 493) - 1);
@@ -2863,8 +2859,6 @@ test "big int write twos complement +/- zero" {
}
test "big int conversion write twos complement with padding" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var a = try Managed.initSet(testing.allocator, 0x01_ffffffff_ffffffff_ffffffff);
defer a.deinit();
lib/std/net/test.zig
@@ -60,7 +60,7 @@ test "parse and render IPv6 addresses" {
}
test "invalid but parseable IPv6 scope ids" {
- if (builtin.os.tag != .linux or comptime !builtin.os.tag.isDarwin()) {
+ if (builtin.os.tag != .linux and comptime !builtin.os.tag.isDarwin()) {
// Currently, resolveIp6 with alphanumerical scope IDs only works on Linux.
// TODO Make this test pass on other operating systems.
return error.SkipZigTest;
lib/std/os/test.zig
@@ -375,8 +375,6 @@ fn testThreadIdFn(thread_id: *Thread.Id) void {
test "std.Thread.getCurrentId" {
if (builtin.single_threaded) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var thread_current_id: Thread.Id = undefined;
const thread = try Thread.spawn(.{}, testThreadIdFn, .{&thread_current_id});
thread.join();
@@ -420,8 +418,6 @@ test "cpu count" {
test "thread local storage" {
if (builtin.single_threaded) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const thread1 = try Thread.spawn(.{}, testTls, .{});
const thread2 = try Thread.spawn(.{}, testTls, .{});
try testTls();
lib/std/rand/test.zig
@@ -1,5 +1,4 @@
const std = @import("../std.zig");
-const builtin = @import("builtin");
const math = std.math;
const DefaultPrng = std.rand.DefaultPrng;
const Random = std.rand.Random;
@@ -200,8 +199,6 @@ fn testRandomIntLessThan() !void {
}
test "Random intAtMost" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
@setEvalBranchQuota(10000);
try testRandomIntAtMost();
try comptime testRandomIntAtMost();
@@ -242,8 +239,6 @@ fn testRandomIntAtMost() !void {
}
test "Random Biased" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var prng = DefaultPrng.init(0);
const random = prng.random();
// Not thoroughly checking the logic here.
@@ -452,8 +447,6 @@ test "CSPRNG" {
}
test "Random weightedIndex" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
// Make sure weightedIndex works for various integers and floats
inline for (.{ u64, i4, f32, f64 }) |T| {
var prng = DefaultPrng.init(0);
lib/std/Thread/Condition.zig
@@ -324,8 +324,6 @@ test "Condition - wait and signal" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const num_threads = 4;
const MultiWait = struct {
@@ -371,8 +369,6 @@ test "Condition - signal" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const num_threads = 4;
const SignalTest = struct {
@@ -440,8 +436,6 @@ test "Condition - multi signal" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const num_threads = 4;
const num_iterations = 4;
@@ -504,8 +498,6 @@ test "Condition - broadcasting" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const num_threads = 10;
const BroadcastTest = struct {
@@ -573,8 +565,6 @@ test "Condition - broadcasting - wake all threads" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
var num_runs: usize = 1;
const num_threads = 10;
lib/std/Thread/Mutex.zig
@@ -289,8 +289,6 @@ test "Mutex - many contended" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const num_threads = 4;
const num_increments = 1000;
lib/std/Thread/RwLock.zig
@@ -297,8 +297,6 @@ test "RwLock - concurrent access" {
if (builtin.single_threaded)
return;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const num_writers: usize = 2;
const num_readers: usize = 4;
const num_writes: usize = 10000;
lib/std/Thread/Semaphore.zig
@@ -39,8 +39,6 @@ test "Thread.Semaphore" {
return error.SkipZigTest;
}
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const TestContext = struct {
sem: *Semaphore,
n: *i32,
lib/std/zig/tokenizer.zig
@@ -1,5 +1,4 @@
const std = @import("../std.zig");
-const builtin = @import("builtin");
pub const Token = struct {
tag: Tag,
@@ -1450,8 +1449,6 @@ test "chars" {
}
test "invalid token characters" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testTokenize("#", &.{.invalid});
try testTokenize("`", &.{.invalid});
try testTokenize("'c", &.{.invalid});
@@ -1571,8 +1568,6 @@ test "pipe and then invalid" {
}
test "line comment and doc comment" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testTokenize("//", &.{});
try testTokenize("// a / b", &.{});
try testTokenize("// /", &.{});
@@ -1647,8 +1642,6 @@ test "range literals" {
}
test "number literals decimal" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testTokenize("0", &.{.number_literal});
try testTokenize("1", &.{.number_literal});
try testTokenize("2", &.{.number_literal});
@@ -1897,8 +1890,6 @@ test "invalid token with unfinished escape right before eof" {
}
test "saturating operators" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testTokenize("<<", &.{.angle_bracket_angle_bracket_left});
try testTokenize("<<|", &.{.angle_bracket_angle_bracket_left_pipe});
try testTokenize("<<|=", &.{.angle_bracket_angle_bracket_left_pipe_equal});
lib/std/base64.zig
@@ -355,8 +355,6 @@ pub const Base64DecoderWithIgnore = struct {
};
test "base64" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
@setEvalBranchQuota(8000);
try testBase64();
try comptime testAllApis(standard, "comptime", "Y29tcHRpbWU=");
@@ -377,8 +375,6 @@ test "base64 padding dest overflow" {
}
test "base64 url_safe_no_pad" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
@setEvalBranchQuota(8000);
try testBase64UrlSafeNoPad();
try comptime testAllApis(url_safe_no_pad, "comptime", "Y29tcHRpbWU");
lib/std/bit_set.zig
@@ -1638,7 +1638,6 @@ fn testStaticBitSet(comptime Set: type) !void {
test "IntegerBitSet" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
try testStaticBitSet(IntegerBitSet(0));
try testStaticBitSet(IntegerBitSet(1));
@@ -1651,8 +1650,6 @@ test "IntegerBitSet" {
}
test "ArrayBitSet" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
inline for (.{ 0, 1, 2, 31, 32, 33, 63, 64, 65, 254, 500, 3000 }) |size| {
try testStaticBitSet(ArrayBitSet(u8, size));
try testStaticBitSet(ArrayBitSet(u16, size));
lib/std/math.zig
@@ -492,8 +492,6 @@ pub fn shl(comptime T: type, a: T, shift_amt: anytype) T {
}
test "shl" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) {
// https://github.com/ziglang/zig/issues/12012
return error.SkipZigTest;
@@ -539,8 +537,6 @@ pub fn shr(comptime T: type, a: T, shift_amt: anytype) T {
}
test "shr" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) {
// https://github.com/ziglang/zig/issues/12012
return error.SkipZigTest;
@@ -587,8 +583,6 @@ pub fn rotr(comptime T: type, x: T, r: anytype) T {
}
test "rotr" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) {
// https://github.com/ziglang/zig/issues/12012
return error.SkipZigTest;
@@ -634,8 +628,6 @@ pub fn rotl(comptime T: type, x: T, r: anytype) T {
}
test "rotl" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) {
// https://github.com/ziglang/zig/issues/12012
return error.SkipZigTest;
@@ -764,8 +756,6 @@ pub fn divTrunc(comptime T: type, numerator: T, denominator: T) !T {
}
test "divTrunc" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testDivTrunc();
try comptime testDivTrunc();
}
@@ -790,8 +780,6 @@ pub fn divFloor(comptime T: type, numerator: T, denominator: T) !T {
}
test "divFloor" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testDivFloor();
try comptime testDivFloor();
}
@@ -829,8 +817,6 @@ pub fn divCeil(comptime T: type, numerator: T, denominator: T) !T {
}
test "divCeil" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testDivCeil();
try comptime testDivCeil();
}
@@ -875,8 +861,6 @@ pub fn divExact(comptime T: type, numerator: T, denominator: T) !T {
}
test "divExact" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testDivExact();
try comptime testDivExact();
}
@@ -903,8 +887,6 @@ pub fn mod(comptime T: type, numerator: T, denominator: T) !T {
}
test "mod" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testMod();
try comptime testMod();
}
@@ -931,8 +913,6 @@ pub fn rem(comptime T: type, numerator: T, denominator: T) !T {
}
test "rem" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try testRem();
try comptime testRem();
}
@@ -1285,7 +1265,8 @@ pub fn lerp(a: anytype, b: anytype, t: anytype) @TypeOf(a, b, t) {
}
test "lerp" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_x86_64 and
+ !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest;
try testing.expectEqual(@as(f64, 75), lerp(50, 100, 0.5));
try testing.expectEqual(@as(f32, 43.75), lerp(50, 25, 0.25));
lib/std/mem.zig
@@ -315,8 +315,6 @@ pub fn zeroes(comptime T: type) T {
}
test "zeroes" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
const C_struct = extern struct {
x: u32,
y: u32 align(128),
@@ -4342,8 +4340,6 @@ pub fn alignInSlice(slice: anytype, comptime new_alignment: usize) ?AlignedSlice
}
test "read/write(Var)PackedInt" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
switch (builtin.cpu.arch) {
// This test generates too much code to execute on WASI.
// LLVM backend fails with "too many locals: locals exceed maximum"
lib/std/once.zig
@@ -46,8 +46,6 @@ fn incr() void {
}
test "Once executes its function just once" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
if (builtin.single_threaded) {
global_once.call();
global_once.call();
src/arch/x86_64/bits.zig
@@ -237,7 +237,7 @@ pub const Register = enum(u7) {
return @intCast(@intFromEnum(reg) - base);
}
- pub fn bitSize(reg: Register) u64 {
+ pub fn bitSize(reg: Register) u10 {
return switch (@intFromEnum(reg)) {
// zig fmt: off
@intFromEnum(Register.rax) ... @intFromEnum(Register.r15) => 64,
src/arch/x86_64/CodeGen.zig
@@ -388,7 +388,7 @@ pub const MCValue = union(enum) {
};
}
- fn mem(mcv: MCValue, size: Memory.Size) Memory {
+ fn mem(mcv: MCValue, function: *Self, size: Memory.Size) !Memory {
return switch (mcv) {
.none,
.unreach,
@@ -409,7 +409,6 @@ pub const MCValue = union(enum) {
.lea_frame,
.reserved_frame,
.air_ref,
- .load_symbol,
.lea_symbol,
=> unreachable,
.memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{
@@ -433,6 +432,19 @@ pub const MCValue = union(enum) {
.disp = frame_addr.off,
} },
},
+ .load_symbol => |sym_off| {
+ assert(sym_off.off == 0);
+ return .{
+ .base = .{ .reloc = .{
+ .atom_index = try function.owner.getSymbolIndex(function),
+ .sym_index = sym_off.sym,
+ } },
+ .mod = .{ .rm = .{
+ .size = size,
+ .disp = sym_off.off,
+ } },
+ };
+ },
};
}
@@ -722,12 +734,14 @@ const InstTracking = struct {
const FrameAlloc = struct {
abi_size: u31,
+ spill_pad: u3,
abi_align: Alignment,
ref_count: u16,
- fn init(alloc_abi: struct { size: u64, alignment: Alignment }) FrameAlloc {
+ fn init(alloc_abi: struct { size: u64, pad: u3 = 0, alignment: Alignment }) FrameAlloc {
return .{
.abi_size = @intCast(alloc_abi.size),
+ .spill_pad = alloc_abi.pad,
.abi_align = alloc_abi.alignment,
.ref_count = 0,
};
@@ -738,6 +752,20 @@ const FrameAlloc = struct {
.alignment = ty.abiAlignment(mod),
});
}
+ fn initSpill(ty: Type, mod: *Module) FrameAlloc {
+ const abi_size = ty.abiSize(mod);
+ const spill_size = if (abi_size < 8)
+ math.ceilPowerOfTwoAssert(u64, abi_size)
+ else
+ std.mem.alignForward(u64, abi_size, 8);
+ return init(.{
+ .size = spill_size,
+ .pad = @intCast(spill_size - abi_size),
+ .alignment = ty.abiAlignment(mod).maxStrict(
+ Alignment.fromNonzeroByteUnits(@min(spill_size, 8)),
+ ),
+ });
+ }
};
const StackAllocation = struct {
@@ -1668,8 +1696,7 @@ fn gen(self: *Self) InnerError!void {
// The address where to store the return value for the caller is in a
// register which the callee is free to clobber. Therefore, we purposely
// spill it to stack immediately.
- const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(Type.usize, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(Type.usize, mod));
try self.genSetMem(
.{ .frame = frame_index },
0,
@@ -2434,7 +2461,7 @@ fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: b
}
}
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ty, mod));
return .{ .load_frame = .{ .index = frame_index } };
}
@@ -2445,7 +2472,10 @@ fn regClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet {
80 => abi.RegisterClass.x87,
else => abi.RegisterClass.sse,
},
- .Vector => abi.RegisterClass.sse,
+ .Vector => switch (ty.childType(mod).toIntern()) {
+ .bool_type => abi.RegisterClass.gp,
+ else => abi.RegisterClass.sse,
+ },
else => abi.RegisterClass.gp,
};
}
@@ -2699,7 +2729,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
.{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
- src_mcv.mem(.qword),
+ try src_mcv.mem(self, .qword),
) else try self.asmRegisterRegisterRegister(
.{ .v_ss, .cvtsd2 },
dst_reg,
@@ -2711,7 +2741,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._ss, .cvtsd2 },
dst_reg,
- src_mcv.mem(.qword),
+ try src_mcv.mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._ss, .cvtsd2 },
dst_reg,
@@ -2798,7 +2828,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
.{ .v_sd, .cvtss2 },
dst_reg,
dst_reg,
- src_mcv.mem(.dword),
+ try src_mcv.mem(self, .dword),
) else try self.asmRegisterRegisterRegister(
.{ .v_sd, .cvtss2 },
dst_reg,
@@ -2810,7 +2840,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._sd, .cvtss2 },
dst_reg,
- src_mcv.mem(.dword),
+ try src_mcv.mem(self, .dword),
) else try self.asmRegisterRegister(
.{ ._sd, .cvtss2 },
dst_reg,
@@ -2851,8 +2881,8 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
};
const dst_mcv = if (dst_int_info.bits <= src_storage_bits and
- std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable ==
- std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and
+ math.divCeil(u16, dst_int_info.bits, 64) catch unreachable ==
+ math.divCeil(u32, src_storage_bits, 64) catch unreachable and
self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
const dst_mcv = try self.allocRegOrMem(inst, true);
try self.genCopy(min_ty, dst_mcv, src_mcv);
@@ -2869,22 +2899,28 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) };
}
- const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
- const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable;
+ const src_limbs_len = math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
+ const dst_limbs_len = math.divCeil(u16, dst_int_info.bits, 64) catch unreachable;
- const high_mcv = dst_mcv.address().offset((src_limbs_len - 1) * 8).deref();
- const high_reg = try self.copyToTmpRegister(switch (src_int_info.signedness) {
- .signed => Type.isize,
- .unsigned => Type.usize,
- }, high_mcv);
+ const high_mcv: MCValue = if (dst_mcv.isMemory())
+ dst_mcv.address().offset((src_limbs_len - 1) * 8).deref()
+ else
+ .{ .register = dst_mcv.register_pair[1] };
+ const high_reg = if (high_mcv.isRegister())
+ high_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(switch (src_int_info.signedness) {
+ .signed => Type.isize,
+ .unsigned => Type.usize,
+ }, high_mcv);
const high_lock = self.register_manager.lockRegAssumeUnused(high_reg);
defer self.register_manager.unlockReg(high_lock);
const high_bits = src_int_info.bits % 64;
if (high_bits > 0) {
- const high_ty = try mod.intType(extend, high_bits);
- try self.truncateRegister(high_ty, high_reg);
- try self.genCopy(Type.usize, high_mcv, .{ .register = high_reg });
+ try self.truncateRegister(src_ty, high_reg);
+ const high_ty = if (dst_int_info.bits >= 64) Type.usize else dst_ty;
+ try self.genCopy(high_ty, high_mcv, .{ .register = high_reg });
}
if (dst_limbs_len > src_limbs_len) try self.genInlineMemset(
@@ -2995,14 +3031,14 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
.{ .vp_, .@"and" },
dst_reg,
dst_reg,
- splat_addr_mcv.deref().mem(Memory.Size.fromSize(splat_abi_size)),
+ try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
);
try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg);
} else {
try self.asmRegisterMemory(
.{ .p_, .@"and" },
dst_reg,
- splat_addr_mcv.deref().mem(Memory.Size.fromSize(splat_abi_size)),
+ try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
);
try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg);
}
@@ -3048,7 +3084,7 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void {
const len = try self.resolveInst(bin_op.rhs);
const len_ty = self.typeOf(bin_op.rhs);
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, mod));
try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
try self.genSetMem(
.{ .frame = frame_index },
@@ -3068,8 +3104,36 @@ fn airUnOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
}
fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
+ const mod = self.bin_file.options.module.?;
const bin_op = self.air.instructions.items(.data)[inst].bin_op;
const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
+
+ const dst_ty = self.typeOfIndex(inst);
+ if (dst_ty.isAbiInt(mod)) {
+ const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
+ const bit_size: u32 = @intCast(dst_ty.bitSize(mod));
+ if (abi_size * 8 > bit_size) {
+ const dst_lock = switch (dst_mcv) {
+ .register => |dst_reg| self.register_manager.lockRegAssumeUnused(dst_reg),
+ else => null,
+ };
+ defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+
+ if (dst_mcv.isRegister()) {
+ try self.truncateRegister(dst_ty, dst_mcv.getReg().?);
+ } else {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ const hi_ty = try mod.intType(.unsigned, @intCast((dst_ty.bitSize(mod) - 1) % 64 + 1));
+ const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
+ try self.genSetReg(tmp_reg, hi_ty, hi_mcv);
+ try self.truncateRegister(dst_ty, tmp_reg);
+ try self.genCopy(hi_ty, hi_mcv, .{ .register = tmp_reg });
+ }
+ }
+ }
return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
}
@@ -3176,7 +3240,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
- mat_lhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._, .mov },
tmp_reg,
@@ -3200,7 +3264,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .xor },
tmp_reg,
- mat_rhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._, .xor },
tmp_reg,
@@ -3300,12 +3364,12 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
try self.asmRegisterMemory(
.{ ._, .add },
tmp_regs[0],
- mat_rhs_mcv.mem(.qword),
+ try mat_rhs_mcv.mem(self, .qword),
);
try self.asmRegisterMemory(
.{ ._, .adc },
tmp_regs[1],
- mat_rhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
);
} else for (
[_]Mir.Inst.Tag{ .add, .adc },
@@ -3534,7 +3598,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
- mat_lhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._, .mov },
tmp_reg,
@@ -3558,7 +3622,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .xor },
tmp_reg,
- mat_rhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._, .xor },
tmp_reg,
@@ -3567,7 +3631,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, Immediate.u(63));
try self.asmRegister(.{ ._, .not }, tmp_reg);
- try self.asmMemoryImmediate(.{ ._, .cmp }, overflow.mem(.dword), Immediate.s(0));
+ try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .dword), Immediate.s(0));
try self.freeValue(overflow);
try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg);
try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, Immediate.u(63));
@@ -3665,7 +3729,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
}
const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
+ try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
try self.genSetMem(
.{ .frame = frame_index },
@intCast(tuple_ty.structFieldOffset(1, mod)),
@@ -3682,7 +3746,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
}
const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
+ try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
break :result .{ .load_frame = .{ .index = frame_index } };
},
@@ -3738,7 +3802,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
}
const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
+ try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
try self.genSetMem(
.{ .frame = frame_index },
@intCast(tuple_ty.structFieldOffset(1, mod)),
@@ -3755,7 +3819,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
}
const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
+ try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
break :result .{ .load_frame = .{ .index = frame_index } };
},
@@ -3874,7 +3938,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
);
try self.asmMemoryImmediate(
.{ ._, .cmp },
- overflow.mem(self.memSize(Type.c_int)),
+ try overflow.mem(self, self.memSize(Type.c_int)),
Immediate.s(0),
);
try self.genSetMem(
@@ -3926,14 +3990,19 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
};
defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock);
- if (mat_lhs_mcv.isMemory())
- try self.asmRegisterMemory(.{ ._, .mov }, .rax, mat_lhs_mcv.mem(.qword))
- else
- try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]);
+ if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ ._, .mov },
+ .rax,
+ try mat_lhs_mcv.mem(self, .qword),
+ ) else try self.asmRegisterRegister(
+ .{ ._, .mov },
+ .rax,
+ mat_lhs_mcv.register_pair[0],
+ );
if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_regs[0],
- mat_rhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._, .mov },
tmp_regs[0],
@@ -3944,7 +4013,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax);
try self.asmSetccRegister(.o, tmp_regs[2].to8());
if (mat_rhs_mcv.isMemory())
- try self.asmMemory(.{ ._, .mul }, mat_rhs_mcv.mem(.qword))
+ try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword))
else
try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
@@ -3953,7 +4022,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_regs[0],
- mat_lhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._, .mov },
tmp_regs[0],
@@ -3967,14 +4036,15 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
tmp_regs[3].to8(),
);
try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
- if (mat_rhs_mcv.isMemory())
- try self.asmRegisterMemory(.{ .i_, .mul }, tmp_regs[0], mat_rhs_mcv.mem(.qword))
- else
- try self.asmRegisterRegister(
- .{ .i_, .mul },
- tmp_regs[0],
- mat_rhs_mcv.register_pair[0],
- );
+ if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .i_, .mul },
+ tmp_regs[0],
+ try mat_rhs_mcv.mem(self, .qword),
+ ) else try self.asmRegisterRegister(
+ .{ .i_, .mul },
+ tmp_regs[0],
+ mat_rhs_mcv.register_pair[0],
+ );
try self.asmSetccRegister(.o, tmp_regs[2].to8());
try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8());
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]);
@@ -4020,8 +4090,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
self.eflags_inst = inst;
break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } };
} else {
- const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
break :result .{ .load_frame = .{ .index = frame_index } };
},
@@ -4032,8 +4101,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
src_ty.fmt(mod), dst_ty.fmt(mod),
});
- const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, mod));
if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
try self.genSetMem(
.{ .frame = frame_index },
@@ -4106,7 +4174,7 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue
.register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)),
.memory, .indirect, .load_frame => try self.asmMemory(
tag,
- mat_rhs.mem(Memory.Size.fromSize(abi_size)),
+ try mat_rhs.mem(self, Memory.Size.fromSize(abi_size)),
),
else => unreachable,
}
@@ -4160,8 +4228,8 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa
);
try self.asmCmovccRegisterRegister(
.z,
- registerAlias(divisor, abi_size),
- registerAlias(.rdx, abi_size),
+ registerAlias(divisor, @max(abi_size, 2)),
+ registerAlias(.rdx, @max(abi_size, 2)),
);
try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax });
return MCValue{ .register = divisor };
@@ -4171,47 +4239,268 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void {
const mod = self.bin_file.options.module.?;
const bin_op = self.air.instructions.items(.data)[inst].bin_op;
- try self.spillRegisters(&.{.rcx});
-
- const tag = self.air.instructions.items(.tag)[inst];
- try self.register_manager.getReg(.rcx, null);
- const lhs = try self.resolveInst(bin_op.lhs);
- const rhs = try self.resolveInst(bin_op.rhs);
+ const air_tags = self.air.instructions.items(.tag);
+ const tag = air_tags[inst];
const lhs_ty = self.typeOf(bin_op.lhs);
const rhs_ty = self.typeOf(bin_op.rhs);
+ const result: MCValue = result: {
+ switch (lhs_ty.zigTypeTag(mod)) {
+ .Int => {
+ try self.spillRegisters(&.{.rcx});
+ try self.register_manager.getReg(.rcx, null);
+ const lhs_mcv = try self.resolveInst(bin_op.lhs);
+ const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const dst_mcv = try self.genShiftBinOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty);
- switch (tag) {
- .shr, .shr_exact, .shl_exact => {},
- .shl => switch (dst_mcv) {
- .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
- .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
- .load_frame => |frame_addr| {
- const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
- const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
- defer self.register_manager.unlockReg(tmp_lock);
+ const dst_mcv = try self.genShiftBinOp(tag, inst, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
+ switch (tag) {
+ .shr, .shr_exact, .shl_exact => {},
+ .shl => switch (dst_mcv) {
+ .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
+ .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
+ .load_frame => |frame_addr| {
+ const tmp_reg =
+ try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
- const lhs_bits: u31 = @intCast(lhs_ty.bitSize(mod));
- const tmp_ty = if (lhs_bits > 64) Type.usize else lhs_ty;
- const off = frame_addr.off + lhs_bits / 64 * 8;
- try self.genSetReg(
- tmp_reg,
- tmp_ty,
- .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
- );
- try self.truncateRegister(lhs_ty, tmp_reg);
- try self.genSetMem(
- .{ .frame = frame_addr.index },
- off,
- tmp_ty,
- .{ .register = tmp_reg },
- );
+ const lhs_bits: u31 = @intCast(lhs_ty.bitSize(mod));
+ const tmp_ty = if (lhs_bits > 64) Type.usize else lhs_ty;
+ const off = frame_addr.off + (lhs_bits - 1) / 64 * 8;
+ try self.genSetReg(
+ tmp_reg,
+ tmp_ty,
+ .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
+ );
+ try self.truncateRegister(lhs_ty, tmp_reg);
+ try self.genSetMem(
+ .{ .frame = frame_addr.index },
+ off,
+ tmp_ty,
+ .{ .register = tmp_reg },
+ );
+ },
+ else => {},
+ },
+ else => unreachable,
+ }
+ break :result dst_mcv;
+ },
+ .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
+ .Int => if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.childType(mod).intInfo(mod).bits) {
+ else => null,
+ 16 => switch (lhs_ty.vectorLen(mod)) {
+ else => null,
+ 1...8 => switch (tag) {
+ else => unreachable,
+ .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_w, .sra }
+ else
+ .{ .p_w, .sra },
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_w, .srl }
+ else
+ .{ .p_w, .srl },
+ },
+ .shl, .shl_exact => if (self.hasFeature(.avx))
+ .{ .vp_w, .sll }
+ else
+ .{ .p_w, .sll },
+ },
+ 9...16 => switch (tag) {
+ else => unreachable,
+ .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .sra } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .srl } else null,
+ },
+ .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_w, .sll } else null,
+ },
+ },
+ 32 => switch (lhs_ty.vectorLen(mod)) {
+ else => null,
+ 1...4 => switch (tag) {
+ else => unreachable,
+ .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_d, .sra }
+ else
+ .{ .p_d, .sra },
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_d, .srl }
+ else
+ .{ .p_d, .srl },
+ },
+ .shl, .shl_exact => if (self.hasFeature(.avx))
+ .{ .vp_d, .sll }
+ else
+ .{ .p_d, .sll },
+ },
+ 5...8 => switch (tag) {
+ else => unreachable,
+ .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .sra } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .srl } else null,
+ },
+ .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_d, .sll } else null,
+ },
+ },
+ 64 => switch (lhs_ty.vectorLen(mod)) {
+ else => null,
+ 1...2 => switch (tag) {
+ else => unreachable,
+ .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_q, .sra }
+ else
+ .{ .p_q, .sra },
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_q, .srl }
+ else
+ .{ .p_q, .srl },
+ },
+ .shl, .shl_exact => if (self.hasFeature(.avx))
+ .{ .vp_q, .sll }
+ else
+ .{ .p_q, .sll },
+ },
+ 3...4 => switch (tag) {
+ else => unreachable,
+ .shr, .shr_exact => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_q, .sra } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_q, .srl } else null,
+ },
+ .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_q, .sll } else null,
+ },
+ },
+ })) |mir_tag| if (try self.air.value(bin_op.rhs, mod)) |rhs_val| {
+ switch (mod.intern_pool.indexToKey(rhs_val.toIntern())) {
+ .aggregate => |rhs_aggregate| switch (rhs_aggregate.storage) {
+ .repeated_elem => |rhs_elem| {
+ const abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
+
+ const lhs_mcv = try self.resolveInst(bin_op.lhs);
+ const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
+ .{lhs_mcv.getReg().?} ** 2
+ else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
+ try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
+ lhs_mcv.getReg().?,
+ } else .{(try self.copyToRegisterWithInstTracking(
+ inst,
+ lhs_ty,
+ lhs_mcv,
+ )).register} ** 2;
+ const reg_locks =
+ self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
+ defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
+ self.register_manager.unlockReg(lock);
+
+ const shift_imm =
+ Immediate.u(@intCast(rhs_elem.toValue().toUnsignedInt(mod)));
+ if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate(
+ mir_tag,
+ registerAlias(dst_reg, abi_size),
+ registerAlias(lhs_reg, abi_size),
+ shift_imm,
+ ) else {
+ assert(dst_reg.id() == lhs_reg.id());
+ try self.asmRegisterImmediate(
+ mir_tag,
+ registerAlias(dst_reg, abi_size),
+ shift_imm,
+ );
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ else => {},
+ }
+ } else if (Air.refToIndex(bin_op.rhs)) |rhs_inst| switch (air_tags[rhs_inst]) {
+ .splat => {
+ const abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
+
+ const lhs_mcv = try self.resolveInst(bin_op.lhs);
+ const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv))
+ .{lhs_mcv.getReg().?} ** 2
+ else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{
+ try self.register_manager.allocReg(inst, abi.RegisterClass.sse),
+ lhs_mcv.getReg().?,
+ } else .{(try self.copyToRegisterWithInstTracking(
+ inst,
+ lhs_ty,
+ lhs_mcv,
+ )).register} ** 2;
+ const reg_locks = self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg });
+ defer for (reg_locks) |reg_lock| if (reg_lock) |lock|
+ self.register_manager.unlockReg(lock);
+
+ const shift_reg =
+ try self.copyToTmpRegister(rhs_ty, .{ .air_ref = bin_op.rhs });
+ const shift_lock = self.register_manager.lockRegAssumeUnused(shift_reg);
+ defer self.register_manager.unlockReg(shift_lock);
+
+ const mask_ty = try mod.vectorType(.{ .len = 16, .child = .u8_type });
+ const mask_mcv = try self.genTypedValue(.{
+ .ty = mask_ty,
+ .val = (try mod.intern(.{ .aggregate = .{
+ .ty = mask_ty.toIntern(),
+ .storage = .{ .elems = &([1]InternPool.Index{
+ (try rhs_ty.childType(mod).maxIntScalar(mod, Type.u8)).toIntern(),
+ } ++ [1]InternPool.Index{
+ (try mod.intValue(Type.u8, 0)).toIntern(),
+ } ** 15) },
+ } })).toValue(),
+ });
+ const mask_addr_reg =
+ try self.copyToTmpRegister(Type.usize, mask_mcv.address());
+ const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg);
+ defer self.register_manager.unlockReg(mask_addr_lock);
+
+ if (self.hasFeature(.avx)) {
+ try self.asmRegisterRegisterMemory(
+ .{ .vp_, .@"and" },
+ shift_reg.to128(),
+ shift_reg.to128(),
+ .{
+ .base = .{ .reg = mask_addr_reg },
+ .mod = .{ .rm = .{ .size = .xword } },
+ },
+ );
+ try self.asmRegisterRegisterRegister(
+ mir_tag,
+ registerAlias(dst_reg, abi_size),
+ registerAlias(lhs_reg, abi_size),
+ shift_reg.to128(),
+ );
+ } else {
+ try self.asmRegisterMemory(
+ .{ .p_, .@"and" },
+ shift_reg.to128(),
+ .{
+ .base = .{ .reg = mask_addr_reg },
+ .mod = .{ .rm = .{ .size = .xword } },
+ },
+ );
+ assert(dst_reg.id() == lhs_reg.id());
+ try self.asmRegisterRegister(
+ mir_tag,
+ registerAlias(dst_reg, abi_size),
+ shift_reg.to128(),
+ );
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ else => {},
},
else => {},
- },
- else => unreachable,
- }
- return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
+ }
+ return self.fail("TODO implement airShlShrBinOp for {}", .{lhs_ty.fmt(mod)});
+ };
+ return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
}
fn airShlSat(self: *Self, inst: Air.Inst.Index) !void {
@@ -4230,12 +4519,18 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void {
const opt_mcv = try self.resolveInst(ty_op.operand);
if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) {
- switch (opt_mcv) {
- .register => |reg| try self.truncateRegister(pl_ty, reg),
- .register_overflow => |ro| try self.truncateRegister(pl_ty, ro.reg),
+ const pl_mcv: MCValue = switch (opt_mcv) {
+ .register_overflow => |ro| pl: {
+ self.eflags_inst = null; // actually stop tracking the overflow part
+ break :pl .{ .register = ro.reg };
+ },
+ else => opt_mcv,
+ };
+ switch (pl_mcv) {
+ .register => |pl_reg| try self.truncateRegister(pl_ty, pl_reg),
else => {},
}
- break :result opt_mcv;
+ break :result pl_mcv;
}
const pl_mcv = try self.allocRegOrMem(inst, true);
@@ -4472,8 +4767,9 @@ fn genUnwrapErrUnionPayloadMir(
const eu_lock = self.register_manager.lockReg(reg);
defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
- const result_mcv: MCValue = if (maybe_inst) |inst|
- try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union)
+ const payload_in_gp = self.regClassForType(payload_ty).supersetOf(abi.RegisterClass.gp);
+ const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null)
+ try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union)
else
.{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
if (payload_off > 0) try self.genShiftBinOpMir(
@@ -4482,7 +4778,12 @@ fn genUnwrapErrUnionPayloadMir(
result_mcv,
.{ .immediate = @as(u6, @intCast(payload_off * 8)) },
) else try self.truncateRegister(payload_ty, result_mcv.register);
- break :result result_mcv;
+ break :result if (payload_in_gp)
+ result_mcv
+ else if (maybe_inst) |inst|
+ try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv)
+ else
+ .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) };
},
else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {}", .{err_union}),
}
@@ -4593,7 +4894,7 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
const result: MCValue = result: {
if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .{ .immediate = 0 };
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, mod));
const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand);
@@ -4615,7 +4916,7 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
const result: MCValue = result: {
if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result try self.resolveInst(ty_op.operand);
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, mod));
const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, mod));
const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, mod));
try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef);
@@ -4770,14 +5071,19 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
const mod = self.bin_file.options.module.?;
const bin_op = self.air.instructions.items(.data)[inst].bin_op;
- const slice_ty = self.typeOf(bin_op.lhs);
- const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod);
- const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
- const dst_mcv = try self.allocRegOrMem(inst, false);
- try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
+ const result: MCValue = result: {
+ const elem_ty = self.typeOfIndex(inst);
+ if (!elem_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none;
- return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
+ const slice_ty = self.typeOf(bin_op.lhs);
+ const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod);
+ const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
+ const dst_mcv = try self.allocRegOrMem(inst, false);
+ try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
+ break :result dst_mcv;
+ };
+ return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
}
fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void {
@@ -4810,11 +5116,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
};
defer if (index_lock) |lock| self.register_manager.unlockReg(lock);
- const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size);
- const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
- defer self.register_manager.unlockReg(offset_reg_lock);
-
const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
+ defer self.register_manager.unlockReg(addr_lock);
+
switch (array) {
.register => {
const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, mod));
@@ -4843,6 +5148,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
else => return self.fail("TODO implement array_elem_val when array is {}", .{array}),
}
+ const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size);
+ const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
+ defer self.register_manager.unlockReg(offset_lock);
+
// TODO we could allocate register here, but need to expect addr register and potentially
// offset register.
try self.spillEflagsIfOccupied();
@@ -5093,7 +5402,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
.{ ._, .sub },
dst_ty,
dst_mcv,
- .{ .immediate = 8 + self.regExtraBits(src_ty) },
+ .{ .immediate = 32 - src_bits },
);
} else if (src_bits <= 64) {
try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv);
@@ -5361,7 +5670,9 @@ fn airPopCount(self: *Self, inst: Air.Inst.Index) !void {
mat_src_mcv
else
.{ .register = mat_src_mcv.register_pair[0] }, false);
- try self.genPopCount(tmp_regs[1], Type.usize, if (mat_src_mcv.isMemory())
+ const src_info = src_ty.intInfo(mod);
+ const hi_ty = try mod.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1);
+ try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isMemory())
mat_src_mcv.address().offset(8).deref()
else
.{ .register = mat_src_mcv.register_pair[1] }, false);
@@ -5383,9 +5694,13 @@ fn genPopCount(
const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
if (self.hasFeature(.popcnt)) return self.genBinOpMir(
.{ ._, .popcnt },
- if (src_abi_size > 1) src_ty else Type.u16,
+ if (src_abi_size > 1) src_ty else Type.u32,
.{ .register = dst_reg },
- src_mcv,
+ if (src_abi_size > 1) src_mcv else src: {
+ if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv);
+ try self.truncateRegister(try src_ty.toUnsigned(mod), dst_reg);
+ break :src .{ .register = dst_reg };
+ },
);
const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8);
@@ -5517,9 +5832,9 @@ fn genByteSwap(
try self.asmRegisterMemory(
.{ ._, .movbe },
dst_regs[0],
- src_mcv.address().offset(8).deref().mem(.qword),
+ try src_mcv.address().offset(8).deref().mem(self, .qword),
);
- try self.asmRegisterMemory(.{ ._, .movbe }, dst_regs[1], src_mcv.mem(.qword));
+ try self.asmRegisterMemory(.{ ._, .movbe }, dst_regs[1], try src_mcv.mem(self, .qword));
} else for (dst_regs, src_mcv.register_pair) |dst_reg, src_reg| {
try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to64(), src_reg.to64());
try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
@@ -5762,7 +6077,7 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type)
else => unreachable,
} });
const sign_mem: Memory = if (sign_mcv.isMemory())
- sign_mcv.mem(Memory.Size.fromSize(abi_size))
+ try sign_mcv.mem(self, Memory.Size.fromSize(abi_size))
else
.{
.base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) },
@@ -5945,7 +6260,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro
mir_tag,
dst_alias,
dst_alias,
- src_mcv.mem(Memory.Size.fromSize(abi_size)),
+ try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
Immediate.u(@as(u5, @bitCast(mode))),
) else try self.asmRegisterRegisterRegisterImmediate(
mir_tag,
@@ -5960,7 +6275,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro
else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
mir_tag,
dst_alias,
- src_mcv.mem(Memory.Size.fromSize(abi_size)),
+ try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
Immediate.u(@as(u5, @bitCast(mode))),
) else try self.asmRegisterRegisterImmediate(
mir_tag,
@@ -6000,7 +6315,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
.memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
.l,
registerAlias(dst_mcv.register, cmov_abi_size),
- src_mcv.mem(Memory.Size.fromSize(cmov_abi_size)),
+ try src_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)),
),
else => {
const val_reg = try self.copyToTmpRegister(ty, src_mcv);
@@ -6100,7 +6415,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
if (src_mcv.isMemory()) try self.asmRegisterMemory(
mir_tag,
dst_alias,
- src_mcv.mem(self.memSize(ty)),
+ try src_mcv.mem(self, self.memSize(ty)),
) else try self.asmRegisterRegister(
mir_tag,
dst_alias,
@@ -6206,7 +6521,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
wide_reg,
- src_mcv.mem(Memory.Size.fromSize(
+ try src_mcv.mem(self, Memory.Size.fromSize(
@intCast(@divExact(wide_reg.bitSize(), 16)),
)),
) else try self.asmRegisterRegister(
@@ -6254,7 +6569,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
mir_tag,
dst_reg,
dst_reg,
- src_mcv.mem(Memory.Size.fromSize(abi_size)),
+ try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
) else try self.asmRegisterRegisterRegister(
mir_tag,
dst_reg,
@@ -6267,7 +6582,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
else => if (src_mcv.isMemory()) try self.asmRegisterMemory(
mir_tag,
dst_reg,
- src_mcv.mem(Memory.Size.fromSize(abi_size)),
+ try src_mcv.mem(self, Memory.Size.fromSize(abi_size)),
) else try self.asmRegisterRegister(
mir_tag,
dst_reg,
@@ -6332,7 +6647,7 @@ fn reuseOperandAdvanced(
return false;
switch (mcv) {
- .register, .register_pair => for (mcv.getRegs()) |reg| {
+ .register, .register_pair, .register_overflow => for (mcv.getRegs()) |reg| {
// If it's in the registers table, need to associate the register(s) with the
// new instruction.
if (maybe_tracked_inst) |tracked_inst| {
@@ -6346,6 +6661,10 @@ fn reuseOperandAdvanced(
.load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
else => return false,
}
+ switch (mcv) {
+ .eflags, .register_overflow => self.eflags_inst = maybe_tracked_inst,
+ else => {},
+ }
// Prevent the operand deaths processing code from deallocating it.
self.liveness.clearOperandDeath(inst, op_index);
@@ -6363,11 +6682,36 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn
if (!val_ty.hasRuntimeBitsIgnoreComptime(mod)) return;
const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
+ if (ptr_info.packed_offset.bit_offset % 8 == 0) {
+ try self.load(
+ dst_mcv,
+ ptr_ty,
+ ptr_mcv.offset(@intCast(@divExact(ptr_info.packed_offset.bit_offset, 8))),
+ );
+ const val_bit_size: u32 = @intCast(val_ty.bitSize(mod));
+ if (val_abi_size * 8 > val_bit_size) {
+ if (dst_mcv.isRegister()) {
+ try self.truncateRegister(val_ty, dst_mcv.getReg().?);
+ } else {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref();
+ try self.genSetReg(tmp_reg, Type.usize, hi_mcv);
+ try self.truncateRegister(val_ty, tmp_reg);
+ try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg });
+ }
+ }
+ return;
+ }
+
if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{val_ty.fmt(mod)});
const limb_abi_size: u32 = @min(val_abi_size, 8);
const limb_abi_bits = limb_abi_size * 8;
- const val_byte_off: i32 = @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size);
+ const val_byte_off: i32 =
+ @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size);
const val_bit_off = ptr_info.packed_offset.bit_offset % limb_abi_bits;
const val_extra_bits = self.regExtraBits(val_ty);
@@ -6530,7 +6874,7 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In
.base = .{ .reg = ptr_reg },
.mod = .{ .rm = .{
.size = Memory.Size.fromSize(limb_abi_size),
- .disp = src_byte_off + limb_i * limb_abi_bits,
+ .disp = src_byte_off + limb_i * limb_abi_size,
} },
};
@@ -6575,6 +6919,22 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In
limb_mem,
registerAlias(tmp_reg, limb_abi_size),
);
+ } else if (src_bit_size <= 128 and src_bit_off == 0) {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const tmp_mcv = MCValue{ .register = tmp_reg };
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) {
+ 0 => src_mcv,
+ else => src_mcv.address().offset(limb_i * limb_abi_size).deref(),
+ });
+ try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
+ try self.asmMemoryRegister(
+ .{ ._, .@"or" },
+ limb_mem,
+ registerAlias(tmp_reg, limb_abi_size),
+ );
} else return self.fail("TODO: implement packed store of {}", .{src_ty.fmt(mod)});
}
}
@@ -6808,17 +7168,17 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
.register_overflow => |ro| {
switch (index) {
// Get wrapped value for overflow operation.
- 0 => break :result if (self.liveness.operandDies(inst, 0))
- .{ .register = ro.reg }
- else
- try self.copyToRegisterWithInstTracking(
- inst,
- Type.usize,
- .{ .register = ro.reg },
- ),
+ 0 => if (self.reuseOperand(inst, extra.struct_operand, 0, src_mcv)) {
+ self.eflags_inst = null; // actually stop tracking the overflow part
+ break :result .{ .register = ro.reg };
+ } else break :result try self.copyToRegisterWithInstTracking(
+ inst,
+ Type.usize,
+ .{ .register = ro.reg },
+ ),
// Get overflow bit.
- 1 => if (self.liveness.operandDies(inst, 0)) {
- self.eflags_inst = inst;
+ 1 => if (self.reuseOperandAdvanced(inst, extra.struct_operand, 0, src_mcv, null)) {
+ self.eflags_inst = inst; // actually keep tracking the overflow part
break :result .{ .eflags = ro.eflags };
} else {
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
@@ -6833,11 +7193,12 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
if (field_off % 8 == 0) {
const off_mcv =
src_mcv.address().offset(@intCast(@divExact(field_off, 8))).deref();
+ const field_bit_size = field_ty.bitSize(mod);
if (field_abi_size <= 8) {
const int_ty = try mod.intType(
if (field_ty.isAbiInt(mod)) field_ty.intInfo(mod).signedness else .unsigned,
- @intCast(field_ty.bitSize(mod)),
+ @intCast(field_bit_size),
);
const dst_reg = try self.register_manager.allocReg(
@@ -6856,10 +7217,24 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
}
- if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv;
-
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(field_ty, dst_mcv, off_mcv);
+ const dst_mcv = if (self.reuseOperand(inst, operand, 0, src_mcv))
+ off_mcv
+ else dst: {
+ const dst_mcv = try self.allocRegOrMem(inst, true);
+ try self.genCopy(field_ty, dst_mcv, off_mcv);
+ break :dst dst_mcv;
+ };
+ if (field_abi_size * 8 > field_bit_size and dst_mcv.isMemory()) {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ const hi_mcv =
+ dst_mcv.address().offset(@intCast(field_bit_size / 64 * 8)).deref();
+ try self.genSetReg(tmp_reg, Type.usize, hi_mcv);
+ try self.truncateRegister(field_ty, tmp_reg);
+ try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg });
+ }
break :result dst_mcv;
}
@@ -7013,7 +7388,25 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air:
} else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv);
}
},
- .neg => try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv),
+ .neg => {
+ try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv);
+ const abi_size: u16 = @intCast(src_ty.abiSize(mod));
+ const bit_size = src_ty.intInfo(mod).bits;
+ if (abi_size * 8 > bit_size) {
+ if (dst_mcv.isRegister()) {
+ try self.truncateRegister(src_ty, dst_mcv.getReg().?);
+ } else {
+ const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref();
+ try self.genSetReg(tmp_reg, Type.usize, hi_mcv);
+ try self.truncateRegister(src_ty, tmp_reg);
+ try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg });
+ }
+ }
+ },
else => unreachable,
}
return dst_mcv;
@@ -7054,7 +7447,7 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MC
},
.indirect, .load_frame => try self.asmMemory(
mir_tag,
- dst_mcv.mem(Memory.Size.fromSize(abi_size)),
+ try dst_mcv.mem(self, Memory.Size.fromSize(abi_size)),
),
}
}
@@ -7552,27 +7945,27 @@ fn genMulDivBinOp(
defer self.register_manager.unlockReg(tmp_lock);
if (mat_lhs_mcv.isMemory())
- try self.asmRegisterMemory(.{ ._, .mov }, .rax, mat_lhs_mcv.mem(.qword))
+ try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .qword))
else
try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]);
if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
- mat_rhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]);
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax);
if (mat_rhs_mcv.isMemory())
- try self.asmMemory(.{ ._, .mul }, mat_rhs_mcv.mem(.qword))
+ try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword))
else
try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]);
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .mov },
tmp_reg,
- mat_lhs_mcv.address().offset(8).deref().mem(.qword),
+ try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword),
) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]);
if (mat_rhs_mcv.isMemory())
- try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.mem(.qword))
+ try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .qword))
else
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]);
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
@@ -7833,7 +8226,7 @@ fn genBinOp(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
- rhs_mcv.mem(.word),
+ try rhs_mcv.mem(self, .word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .vp_, .unpcklwd },
@@ -7858,7 +8251,7 @@ fn genBinOp(
mir_tag,
dst_reg,
dst_reg,
- src_mcv.mem(Memory.Size.fromBitSize(float_bits)),
+ try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)),
) else try self.asmRegisterRegisterRegister(
mir_tag,
dst_reg,
@@ -7877,7 +8270,7 @@ fn genBinOp(
if (src_mcv.isMemory()) try self.asmRegisterMemory(
mir_tag,
dst_reg,
- src_mcv.mem(Memory.Size.fromBitSize(float_bits)),
+ try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)),
) else try self.asmRegisterRegister(
mir_tag,
dst_reg,
@@ -7919,12 +8312,18 @@ fn genBinOp(
};
}
- if ((lhs_ty.scalarType(mod).isRuntimeFloat() and
+ const sse_op = switch (lhs_ty.zigTypeTag(mod)) {
+ else => false,
+ .Float => true,
+ .Vector => switch (lhs_ty.childType(mod).toIntern()) {
+ .bool_type => false,
+ else => true,
+ },
+ };
+ if (sse_op and ((lhs_ty.scalarType(mod).isRuntimeFloat() and
lhs_ty.scalarType(mod).floatBits(self.target.*) == 80) or
- lhs_ty.abiSize(mod) > @as(u6, if (self.hasFeature(.avx)) 32 else 16))
- return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(air_tag), lhs_ty.fmt(mod),
- });
+ lhs_ty.abiSize(mod) > @as(u6, if (self.hasFeature(.avx)) 32 else 16)))
+ return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(mod) });
const maybe_mask_reg = switch (air_tag) {
else => null,
@@ -7941,10 +8340,16 @@ fn genBinOp(
if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
- const ordered_air = if (lhs_ty.isVector(mod) and lhs_ty.childType(mod).isAbiInt(mod) and
- switch (air_tag) {
- .cmp_lt, .cmp_gte => true,
- else => false,
+ const ordered_air = if (lhs_ty.isVector(mod) and switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
+ .Int => switch (air_tag) {
+ .cmp_lt, .cmp_gte => true,
+ else => false,
+ },
+ .Float => switch (air_tag) {
+ .cmp_gte, .cmp_gt => true,
+ else => false,
+ },
+ else => unreachable,
}) .{ .lhs = rhs_air, .rhs = lhs_air } else .{ .lhs = lhs_air, .rhs = rhs_air };
const lhs_mcv = try self.resolveInst(ordered_air.lhs);
@@ -7971,14 +8376,12 @@ fn genBinOp(
.xor,
.min,
.max,
+ .cmp_eq,
+ .cmp_neq,
=> true,
else => false,
};
- const vec_op = switch (lhs_ty.zigTypeTag(mod)) {
- else => false,
- .Float, .Vector => true,
- };
const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
.register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
@@ -8000,23 +8403,23 @@ fn genBinOp(
var flipped = false;
var copied_to_dst = true;
const dst_mcv: MCValue = dst: {
+ const tracked_inst = switch (air_tag) {
+ else => maybe_inst,
+ .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null,
+ };
if (maybe_inst) |inst| {
- const tracked_inst = switch (air_tag) {
- else => inst,
- .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null,
- };
- if ((!vec_op or lhs_mcv.isRegister()) and
+ if ((!sse_op or lhs_mcv.isRegister()) and
self.reuseOperandAdvanced(inst, ordered_air.lhs, 0, lhs_mcv, tracked_inst))
break :dst lhs_mcv;
- if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and
+ if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and
self.reuseOperandAdvanced(inst, ordered_air.rhs, 1, rhs_mcv, tracked_inst))
{
flipped = true;
break :dst rhs_mcv;
}
}
- const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
- if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
+ const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, tracked_inst, true);
+ if (sse_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
copied_to_dst = false
else
try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
@@ -8046,7 +8449,7 @@ fn genBinOp(
};
defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock);
- if (!vec_op) {
+ if (!sse_op) {
switch (air_tag) {
.add,
.add_wrap,
@@ -8130,17 +8533,25 @@ fn genBinOp(
try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]);
if (src_mcv.isMemory()) {
- try self.asmRegisterMemory(.{ ._, .cmp }, dst_regs[0], src_mcv.mem(.qword));
+ try self.asmRegisterMemory(
+ .{ ._, .cmp },
+ dst_regs[0],
+ try src_mcv.mem(self, .qword),
+ );
try self.asmRegisterMemory(
.{ ._, .sbb },
tmp_reg,
- src_mcv.address().offset(8).deref().mem(.qword),
+ try src_mcv.address().offset(8).deref().mem(self, .qword),
+ );
+ try self.asmCmovccRegisterMemory(
+ cc,
+ dst_regs[0],
+ try src_mcv.mem(self, .qword),
);
- try self.asmCmovccRegisterMemory(cc, dst_regs[0], src_mcv.mem(.qword));
try self.asmCmovccRegisterMemory(
cc,
dst_regs[1],
- src_mcv.address().offset(8).deref().mem(.qword),
+ try src_mcv.address().offset(8).deref().mem(self, .qword),
);
} else {
try self.asmRegisterRegister(
@@ -8292,7 +8703,7 @@ fn genBinOp(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
- src_mcv.mem(.word),
+ try src_mcv.mem(self, .word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .vp_, .unpcklwd },
@@ -8738,7 +9149,7 @@ fn genBinOp(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
- src_mcv.mem(.word),
+ try src_mcv.mem(self, .word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .vp_, .unpcklwd },
@@ -8784,7 +9195,7 @@ fn genBinOp(
if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
.{ .vp_d, .insr },
dst_reg,
- src_mcv.mem(.dword),
+ try src_mcv.mem(self, .dword),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .v_ps, .unpckl },
@@ -8836,7 +9247,7 @@ fn genBinOp(
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
- src_mcv.mem(.qword),
+ try src_mcv.mem(self, .qword),
) else try self.asmRegisterRegister(
.{ .v_ps, .cvtph2 },
tmp_reg,
@@ -8879,7 +9290,7 @@ fn genBinOp(
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
- src_mcv.mem(.xword),
+ try src_mcv.mem(self, .xword),
) else try self.asmRegisterRegister(
.{ .v_ps, .cvtph2 },
tmp_reg,
@@ -8925,6 +9336,13 @@ fn genBinOp(
=> if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
.max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
.min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_eq,
+ .cmp_gte,
+ .cmp_gt,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .v_ss, .cmp } else .{ ._ss, .cmp },
else => unreachable,
},
2...4 => switch (air_tag) {
@@ -8938,7 +9356,14 @@ fn genBinOp(
=> if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
.max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
.min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
- else => unreachable,
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_eq,
+ .cmp_gte,
+ .cmp_gt,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .v_ps, .cmp } else .{ ._ps, .cmp },
+ else => unreachable,
},
5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
.add => .{ .v_ps, .add },
@@ -8947,6 +9372,7 @@ fn genBinOp(
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .min },
+ .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp },
else => unreachable,
} else null,
else => null,
@@ -8963,6 +9389,13 @@ fn genBinOp(
=> if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
.max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
.min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_eq,
+ .cmp_gte,
+ .cmp_gt,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .v_sd, .cmp } else .{ ._sd, .cmp },
else => unreachable,
},
2 => switch (air_tag) {
@@ -8976,6 +9409,13 @@ fn genBinOp(
=> if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
.max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
.min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_eq,
+ .cmp_gte,
+ .cmp_gt,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .v_pd, .cmp } else .{ ._pd, .cmp },
else => unreachable,
},
3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
@@ -8984,6 +9424,7 @@ fn genBinOp(
.mul => .{ .v_pd, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
.max => .{ .v_pd, .max },
+ .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp },
.min => .{ .v_pd, .min },
else => unreachable,
} else null,
@@ -9004,43 +9445,96 @@ fn genBinOp(
const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
- if (self.hasFeature(.avx)) {
- const lhs_reg =
- if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
- if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- mir_tag,
- dst_reg,
- lhs_reg,
- src_mcv.mem(switch (lhs_ty.zigTypeTag(mod)) {
- else => Memory.Size.fromSize(abi_size),
- .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
- }),
- ) else try self.asmRegisterRegisterRegister(
- mir_tag,
- dst_reg,
- lhs_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
- );
- } else {
- assert(copied_to_dst);
- if (src_mcv.isMemory()) try self.asmRegisterMemory(
- mir_tag,
- dst_reg,
- src_mcv.mem(switch (lhs_ty.zigTypeTag(mod)) {
- else => Memory.Size.fromSize(abi_size),
- .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
- }),
- ) else try self.asmRegisterRegister(
- mir_tag,
- dst_reg,
- registerAlias(if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
- );
+ switch (mir_tag[1]) {
+ else => if (self.hasFeature(.avx)) {
+ const lhs_reg =
+ if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
+ if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ mir_tag,
+ dst_reg,
+ lhs_reg,
+ try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
+ else => Memory.Size.fromSize(abi_size),
+ .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
+ }),
+ ) else try self.asmRegisterRegisterRegister(
+ mir_tag,
+ dst_reg,
+ lhs_reg,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
+ );
+ } else {
+ assert(copied_to_dst);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ mir_tag,
+ dst_reg,
+ try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
+ else => Memory.Size.fromSize(abi_size),
+ .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
+ }),
+ ) else try self.asmRegisterRegister(
+ mir_tag,
+ dst_reg,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
+ );
+ },
+ .cmp => {
+ const imm = Immediate.u(switch (air_tag) {
+ .cmp_eq => 0,
+ .cmp_lt, .cmp_gt => 1,
+ .cmp_lte, .cmp_gte => 2,
+ .cmp_neq => 4,
+ else => unreachable,
+ });
+ if (self.hasFeature(.avx)) {
+ const lhs_reg =
+ if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
+ if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ mir_tag,
+ dst_reg,
+ lhs_reg,
+ try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
+ else => Memory.Size.fromSize(abi_size),
+ .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
+ }),
+ imm,
+ ) else try self.asmRegisterRegisterRegisterImmediate(
+ mir_tag,
+ dst_reg,
+ lhs_reg,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
+ imm,
+ );
+ } else {
+ assert(copied_to_dst);
+ if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ mir_tag,
+ dst_reg,
+ try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(mod)) {
+ else => Memory.Size.fromSize(abi_size),
+ .Vector => Memory.Size.fromBitSize(dst_reg.bitSize()),
+ }),
+ imm,
+ ) else try self.asmRegisterRegisterImmediate(
+ mir_tag,
+ dst_reg,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
+ imm,
+ );
+ }
+ },
}
switch (air_tag) {
@@ -9281,48 +9775,46 @@ fn genBinOp(
);
}
},
- .cmp_lt,
- .cmp_lte,
- .cmp_eq,
- .cmp_gte,
- .cmp_gt,
- .cmp_neq,
- => {
- switch (air_tag) {
- .cmp_lt,
- .cmp_eq,
- .cmp_gt,
- => {},
- .cmp_lte,
- .cmp_gte,
- .cmp_neq,
- => {
- const unsigned_ty = try lhs_ty.toUnsigned(mod);
- const not_mcv = try self.genTypedValue(.{
- .ty = lhs_ty,
- .val = try unsigned_ty.maxInt(mod, unsigned_ty),
- });
- const not_mem: Memory = if (not_mcv.isMemory())
- not_mcv.mem(Memory.Size.fromSize(abi_size))
- else
- .{ .base = .{
- .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()),
- }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } } };
- switch (mir_tag[0]) {
- .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory(
- .{ .vp_, .xor },
- dst_reg,
- dst_reg,
- not_mem,
- ),
- .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory(
- .{ .p_, .xor },
- dst_reg,
- not_mem,
- ),
- else => unreachable,
- }
+ .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => {
+ switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
+ .Int => switch (air_tag) {
+ .cmp_lt,
+ .cmp_eq,
+ .cmp_gt,
+ => {},
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_neq,
+ => {
+ const unsigned_ty = try lhs_ty.toUnsigned(mod);
+ const not_mcv = try self.genTypedValue(.{
+ .ty = lhs_ty,
+ .val = try unsigned_ty.maxInt(mod, unsigned_ty),
+ });
+ const not_mem: Memory = if (not_mcv.isMemory())
+ try not_mcv.mem(self, Memory.Size.fromSize(abi_size))
+ else
+ .{ .base = .{
+ .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()),
+ }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } } };
+ switch (mir_tag[0]) {
+ .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory(
+ .{ .vp_, .xor },
+ dst_reg,
+ dst_reg,
+ not_mem,
+ ),
+ .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory(
+ .{ .p_, .xor },
+ dst_reg,
+ not_mem,
+ ),
+ else => unreachable,
+ }
+ },
+ else => unreachable,
},
+ .Float => {},
else => unreachable,
}
@@ -9331,8 +9823,12 @@ fn genBinOp(
defer self.register_manager.unlockReg(gp_lock);
try self.asmRegisterRegister(switch (mir_tag[0]) {
- .vp_b, .vp_d, .vp_q, .vp_w => .{ .vp_b, .movmsk },
+ ._pd, ._sd => .{ ._pd, .movmsk },
+ ._ps, ._ss => .{ ._ps, .movmsk },
.p_b, .p_d, .p_q, .p_w => .{ .p_b, .movmsk },
+ .v_pd, .v_sd => .{ .v_pd, .movmsk },
+ .v_ps, .v_ss => .{ .v_ps, .movmsk },
+ .vp_b, .vp_d, .vp_q, .vp_w => .{ .vp_b, .movmsk },
else => unreachable,
}, gp_reg.to32(), dst_reg);
return .{ .register = gp_reg };
@@ -9459,13 +9955,13 @@ fn genBinOpMir(
.load_frame,
.lea_frame,
=> {
- blk: {
- return self.asmRegisterMemory(mir_limb_tag, dst_alias, switch (src_mcv) {
+ direct: {
+ try self.asmRegisterMemory(mir_limb_tag, dst_alias, switch (src_mcv) {
.memory => |addr| .{
.base = .{ .reg = .ds },
.mod = .{ .rm = .{
.size = Memory.Size.fromSize(limb_abi_size),
- .disp = math.cast(i32, addr + off) orelse break :blk,
+ .disp = math.cast(i32, addr + off) orelse break :direct,
} },
},
.indirect => |reg_off| .{
@@ -9482,8 +9978,9 @@ fn genBinOpMir(
.disp = frame_addr.off + off,
} },
},
- else => break :blk,
+ else => break :direct,
});
+ continue;
}
switch (src_mcv) {
@@ -10180,7 +10677,7 @@ fn genCall(self: *Self, info: union(enum) {
.none, .unreach => {},
.indirect => |reg_off| {
const ret_ty = fn_info.return_type.toType();
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ret_ty, mod));
try self.genSetReg(reg_off.reg, Type.usize, .{
.lea_frame = .{ .index = frame_index, .off = -reg_off.off },
});
@@ -10306,19 +10803,20 @@ fn genCall(self: *Self, info: union(enum) {
fn airRet(self: *Self, inst: Air.Inst.Index) !void {
const mod = self.bin_file.options.module.?;
const un_op = self.air.instructions.items(.data)[inst].un_op;
- const operand = try self.resolveInst(un_op);
const ret_ty = self.fn_type.fnReturnType(mod);
switch (self.ret_mcv.short) {
.none => {},
- .register, .register_pair => try self.genCopy(ret_ty, self.ret_mcv.short, operand),
+ .register,
+ .register_pair,
+ => try self.genCopy(ret_ty, self.ret_mcv.short, .{ .air_ref = un_op }),
.indirect => |reg_off| {
try self.register_manager.getReg(reg_off.reg, null);
const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg);
defer self.register_manager.unlockReg(lock);
try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long);
- try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, operand);
+ try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, .{ .air_ref = un_op });
},
else => unreachable,
}
@@ -10593,7 +11091,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
const locks = self.register_manager.lockRegsAssumeUnused(2, regs);
defer for (locks) |lock| self.register_manager.unlockReg(lock);
- const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable;
+ const limbs_len = math.divCeil(u16, abi_size, 8) catch unreachable;
var limb_i: u16 = 0;
while (limb_i < limbs_len) : (limb_i += 1) {
const off = limb_i * 8;
@@ -10688,7 +11186,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
.{ .vp_w, .insr },
tmp1_reg,
dst_reg.to128(),
- src_mcv.mem(.word),
+ try src_mcv.mem(self, .word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .vp_, .unpcklwd },
@@ -10892,8 +11390,8 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index {
},
.register => |reg| {
try self.spillEflagsIfOccupied();
- try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1));
- return self.asmJccReloc(.e, undefined);
+ try self.asmRegisterImmediate(.{ ._, .@"test" }, reg.to8(), Immediate.u(1));
+ return self.asmJccReloc(.z, undefined);
},
.immediate,
.load_frame,
@@ -11433,12 +11931,12 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void {
if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) {
if (first_br) break :result src_mcv;
- if (block_tracking.getReg()) |block_reg|
+ for (block_tracking.getRegs()) |block_reg|
try self.register_manager.getReg(block_reg, br.block_inst);
// .long = .none to avoid merging operand and block result stack frames.
var current_tracking = InstTracking{ .long = .none, .short = src_mcv };
try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*);
- if (src_mcv.getReg()) |src_reg| self.register_manager.freeReg(src_reg);
+ for (src_mcv.getRegs()) |src_reg| self.register_manager.freeReg(src_reg);
break :result block_tracking.short;
}
@@ -12177,16 +12675,87 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
.general_purpose, .segment => return .{ .move = .{ ._, .mov } },
.x87 => return .x87_load_store,
.mmx => {},
- .sse => {
- switch (ty.zigTypeTag(mod)) {
- else => {
- const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none);
- assert(std.mem.indexOfNone(abi.Class, classes, &.{
- .integer, .sse, .float, .float_combine,
- }) == null);
- const abi_size = ty.abiSize(mod);
- if (abi_size < 4 or
- std.mem.indexOfScalar(abi.Class, classes, .integer) != null) switch (abi_size) {
+ .sse => switch (ty.zigTypeTag(mod)) {
+ else => {
+ const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none);
+ assert(std.mem.indexOfNone(abi.Class, classes, &.{
+ .integer, .sse, .float, .float_combine,
+ }) == null);
+ const abi_size = ty.abiSize(mod);
+ if (abi_size < 4 or
+ std.mem.indexOfScalar(abi.Class, classes, .integer) != null) switch (abi_size) {
+ 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
+ .insert = .{ .vp_b, .insr },
+ .extract = .{ .vp_b, .extr },
+ } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
+ .insert = .{ .p_b, .insr },
+ .extract = .{ .p_b, .extr },
+ } },
+ 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
+ else
+ .{ ._d, .mov } },
+ 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 9...16 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 17...32 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ } else switch (abi_size) {
+ 4 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_ss, .mov }
+ else
+ .{ ._ss, .mov } },
+ 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_sd, .mov }
+ else
+ .{ ._sd, .mov } },
+ 9...16 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
+ else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
+ 17...32 => if (self.hasFeature(.avx)) return .{ .move = if (aligned)
+ .{ .v_pd, .mova }
+ else
+ .{ .v_pd, .movu } },
+ else => {},
+ }
+ },
+ .Float => switch (ty.floatBits(self.target.*)) {
+ 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 32 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_ss, .mov }
+ else
+ .{ ._ss, .mov } },
+ 64 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_sd, .mov }
+ else
+ .{ ._sd, .mov } },
+ 128 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ else => {},
+ },
+ .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
+ .Bool => {},
+ .Int => switch (ty.childType(mod).intInfo(mod).bits) {
+ 8 => switch (ty.vectorLen(mod)) {
1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
.insert = .{ .vp_b, .insr },
.extract = .{ .vp_b, .extr },
@@ -12213,242 +12782,169 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
17...32 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
+ else
+ .{ .v_, .movdqu } },
else => {},
- } else switch (abi_size) {
- 4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
+ },
+ 16 => switch (ty.vectorLen(mod)) {
+ 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
else
- .{ ._ss, .mov } },
+ .{ ._d, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 9...16 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
+ else
+ .{ .v_, .movdqu } },
+ else => {},
+ },
+ 32 => switch (ty.vectorLen(mod)) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
else
- .{ ._sd, .mov } },
- 9...16 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
- else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
- 17...32 => if (self.hasFeature(.avx)) return .{ .move = if (aligned)
- .{ .v_pd, .mova }
+ .{ ._d, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
else
- .{ .v_pd, .movu } },
- else => {},
- }
- },
- .Float => switch (ty.floatBits(self.target.*)) {
- 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
- .insert = .{ .vp_w, .insr },
- .extract = .{ .vp_w, .extr },
- } } else .{ .insert_extract = .{
- .insert = .{ .p_w, .insr },
- .extract = .{ .p_w, .extr },
- } },
- 32 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
- else
- .{ ._ss, .mov } },
- 64 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
- else
- .{ ._sd, .mov } },
- 128 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- else => {},
- },
- .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
- .Bool => return .{ .move = .{ ._, .mov } },
- .Int => switch (ty.childType(mod).intInfo(mod).bits) {
- 8 => switch (ty.vectorLen(mod)) {
- 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
- .insert = .{ .vp_b, .insr },
- .extract = .{ .vp_b, .extr },
- } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
- .insert = .{ .p_b, .insr },
- .extract = .{ .p_b, .extr },
- } },
- 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
- .insert = .{ .vp_w, .insr },
- .extract = .{ .vp_w, .extr },
- } } else .{ .insert_extract = .{
- .insert = .{ .p_w, .insr },
- .extract = .{ .p_w, .extr },
- } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
- else
- .{ ._d, .mov } },
- 5...8 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 9...16 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- 17...32 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
- 16 => switch (ty.vectorLen(mod)) {
- 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
- .insert = .{ .vp_w, .insr },
- .extract = .{ .vp_w, .extr },
- } } else .{ .insert_extract = .{
- .insert = .{ .p_w, .insr },
- .extract = .{ .p_w, .extr },
- } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
- else
- .{ ._d, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 5...8 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- 9...16 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
- 32 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
+ .{ ._q, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 5...8 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
else
- .{ ._d, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
+ .{ .v_, .movdqu } },
+ else => {},
+ },
+ 64 => switch (ty.vectorLen(mod)) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 3...4 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
else
- .{ ._q, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- 5...8 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
- 64 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
+ .{ .v_, .movdqu } },
+ else => {},
+ },
+ 128 => switch (ty.vectorLen(mod)) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 2 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
else
- .{ ._q, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- 3...4 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
- 128 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- 2 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
- 256 => switch (ty.vectorLen(mod)) {
- 1 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
+ .{ .v_, .movdqu } },
else => {},
},
- .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
- 16 => switch (ty.vectorLen(mod)) {
- 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
- .insert = .{ .vp_w, .insr },
- .extract = .{ .vp_w, .extr },
- } } else .{ .insert_extract = .{
- .insert = .{ .p_w, .insr },
- .extract = .{ .p_w, .extr },
- } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
+ 256 => switch (ty.vectorLen(mod)) {
+ 1 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
else
- .{ ._d, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
+ .{ .v_, .movdqu } },
+ else => {},
+ },
+ else => {},
+ },
+ .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
+ 16 => switch (ty.vectorLen(mod)) {
+ 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
+ else
+ .{ ._d, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 9...16 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
else
- .{ ._q, .mov } },
- 5...8 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- 9...16 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
- 32 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
+ .{ .v_, .movdqu } },
+ else => {},
+ },
+ 32 => switch (ty.vectorLen(mod)) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_ss, .mov }
+ else
+ .{ ._ss, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_sd, .mov }
+ else
+ .{ ._sd, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+ else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+ 5...8 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_ps, .mova }
else
- .{ ._ss, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ .{ .v_ps, .movu } },
+ else => {},
+ },
+ 64 => switch (ty.vectorLen(mod)) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_sd, .mov }
+ else
+ .{ ._sd, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
+ else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
+ 3...4 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_pd, .mova }
else
- .{ ._sd, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
- 5...8 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_ps, .mova }
- else
- .{ .v_ps, .movu } },
- else => {},
- },
- 64 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
+ .{ .v_pd, .movu } },
+ else => {},
+ },
+ 128 => switch (ty.vectorLen(mod)) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 2 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned)
+ .{ .v_, .movdqa }
else
- .{ ._sd, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
- else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
- 3...4 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_pd, .mova }
- else
- .{ .v_pd, .movu } },
- else => {},
- },
- 128 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
- else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
- 2 => if (self.hasFeature(.avx))
- return .{ .move = if (aligned)
- .{ .v_, .movdqa }
- else
- .{ .v_, .movdqu } },
- else => {},
- },
+ .{ .v_, .movdqu } },
else => {},
},
else => {},
},
- }
+ else => {},
+ },
},
}
return self.fail("TODO moveStrategy for {}", .{ty.fmt(mod)});
@@ -12514,32 +13010,18 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError
};
defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
- const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none);
- for (dst_regs, classes, 0..) |dst_reg, class, dst_reg_i| {
- const class_ty = switch (class) {
- .integer => Type.usize,
- .sse, .float, .float_combine => Type.f64,
- else => unreachable,
- };
- const off: i32 = @intCast(dst_reg_i * 8);
- switch (src_mcv) {
- .register_pair => |src_regs| try self.genSetReg(
- dst_reg,
- class_ty,
- .{ .register = src_regs[dst_reg_i] },
- ),
- .memory, .indirect, .load_frame => try self.genSetReg(
- dst_reg,
- class_ty,
- src_mcv.address().offset(off).deref(),
- ),
- .load_symbol, .load_direct, .load_got, .load_tlv => try self.genSetReg(
- dst_reg,
- class_ty,
- .{ .indirect = .{ .reg = src_info.?.addr_reg, .off = off } },
- ),
+ var part_disp: i32 = 0;
+ for (dst_regs, try self.splitType(ty), 0..) |dst_reg, dst_ty, part_i| {
+ try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) {
+ .register_pair => |src_regs| .{ .register = src_regs[part_i] },
+ .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(),
+ .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{
+ .reg = src_info.?.addr_reg,
+ .off = part_disp,
+ } },
else => unreachable,
- }
+ });
+ part_disp += @intCast(dst_ty.abiSize(mod));
}
},
.indirect => |reg_off| try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ty, src_mcv),
@@ -12584,6 +13066,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
if (imm == 0) {
// 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
// register is the fastest way to zero a register.
+ try self.spillEflagsIfOccupied();
try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32());
} else if (abi_size > 4 and math.cast(u32, imm) != null) {
// 32-bit moves zero-extend to 64-bit.
@@ -12933,44 +13416,65 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
.eflags => |cc| try self.asmSetccMemory(cc, .{ .base = base, .mod = .{
.rm = .{ .size = .byte, .disp = disp },
} }),
- .register => |src_reg| try (try self.moveStrategy(ty, src_reg.class(), switch (base) {
- .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
- .reg => |reg| switch (reg) {
- .es, .cs, .ss, .ds => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
- else => false,
- },
- .frame => |frame_index| self.getFrameAddrAlignment(
- .{ .index = frame_index, .off = disp },
- ).compare(.gte, ty.abiAlignment(mod)),
- .reloc => false,
- })).write(
- self,
- .{ .base = base, .mod = .{ .rm = .{
- .size = self.memSize(ty),
- .disp = disp,
- } } },
- registerAlias(src_reg, abi_size),
- ),
- .register_pair => |src_regs| for (src_regs, 0..) |src_reg, src_reg_i| {
- const part_size: u16 = @min(abi_size - src_reg_i * 8, 8);
- try (try self.moveStrategy(
- try mod.intType(.unsigned, part_size * 8),
- src_reg.class(),
- switch (base) {
- .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
- .reg => |reg| switch (reg) {
- .es, .cs, .ss, .ds => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
- else => false,
- },
- .frame => |frame_index| self.getFrameAddrAlignment(
- .{ .index = frame_index, .off = disp },
- ).compare(.gte, ty.abiAlignment(mod)),
- .reloc => false,
+ .register => |src_reg| {
+ const mem_size = switch (base) {
+ .frame => |base_fi| mem_size: {
+ assert(disp >= 0);
+ const frame_abi_size = self.frame_allocs.items(.abi_size)[@intFromEnum(base_fi)];
+ const frame_spill_pad = self.frame_allocs.items(.spill_pad)[@intFromEnum(base_fi)];
+ assert(frame_abi_size - frame_spill_pad - disp >= abi_size);
+ break :mem_size if (frame_abi_size - frame_spill_pad - disp == abi_size)
+ frame_abi_size
+ else
+ abi_size;
+ },
+ else => abi_size,
+ };
+ const src_alias = registerAlias(src_reg, abi_size);
+ const src_size: u32 = @intCast(switch (src_alias.class()) {
+ .general_purpose, .segment, .x87 => @divExact(src_alias.bitSize(), 8),
+ .mmx, .sse => abi_size,
+ });
+ if (src_size > mem_size) {
+ const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{
+ .size = src_size,
+ .alignment = Alignment.fromNonzeroByteUnits(src_size),
+ }));
+ const frame_mcv: MCValue = .{ .load_frame = .{ .index = frame_index } };
+ try (try self.moveStrategy(ty, src_alias.class(), true)).write(
+ self,
+ .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{
+ .size = Memory.Size.fromSize(src_size),
+ } } },
+ src_alias,
+ );
+ try self.genSetMem(base, disp, ty, frame_mcv);
+ try self.freeValue(frame_mcv);
+ } else try (try self.moveStrategy(ty, src_alias.class(), switch (base) {
+ .none => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
+ .reg => |reg| switch (reg) {
+ .es, .cs, .ss, .ds => ty.abiAlignment(mod).check(@as(u32, @bitCast(disp))),
+ else => false,
},
- )).write(self, .{ .base = base, .mod = .{ .rm = .{
- .size = Memory.Size.fromSize(part_size),
- .disp = disp + @as(i32, @intCast(src_reg_i * 8)),
- } } }, registerAlias(src_reg, part_size));
+ .frame => |frame_index| self.getFrameAddrAlignment(
+ .{ .index = frame_index, .off = disp },
+ ).compare(.gte, ty.abiAlignment(mod)),
+ .reloc => false,
+ })).write(
+ self,
+ .{ .base = base, .mod = .{ .rm = .{
+ .size = self.memSize(ty),
+ .disp = disp,
+ } } },
+ src_alias,
+ );
+ },
+ .register_pair => |src_regs| {
+ var part_disp: i32 = disp;
+ for (try self.splitType(ty), src_regs) |src_ty, src_reg| {
+ try self.genSetMem(base, part_disp, src_ty, .{ .register = src_reg });
+ part_disp += @intCast(src_ty.abiSize(mod));
+ }
},
.register_overflow => |ro| switch (ty.zigTypeTag(mod)) {
.Struct => {
@@ -13226,50 +13730,43 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
- const dst_mcv = if (dst_rc.supersetOf(src_rc) and
- self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
- src_mcv
- else dst: {
+ const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(mod) <= src_ty.abiSize(mod) and
+ self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(
- if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty,
- dst_mcv,
- src_mcv,
- );
+ try self.genCopy(switch (math.order(dst_ty.abiSize(mod), src_ty.abiSize(mod))) {
+ .lt => dst_ty,
+ .eq => if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty,
+ .gt => src_ty,
+ }, dst_mcv, src_mcv);
break :dst dst_mcv;
};
if (dst_ty.isRuntimeFloat()) break :result dst_mcv;
- const dst_signedness =
- if (dst_ty.isAbiInt(mod)) dst_ty.intInfo(mod).signedness else .unsigned;
- if (!src_ty.isRuntimeFloat() or src_ty.floatBits(self.target.*) != 80) {
- const src_signedness =
- if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned;
- if (dst_signedness == src_signedness) break :result dst_mcv;
- }
+ if (dst_ty.isAbiInt(mod) and src_ty.isAbiInt(mod) and
+ dst_ty.intInfo(mod).signedness == src_ty.intInfo(mod).signedness) break :result dst_mcv;
- const abi_size: u16 = @intCast(dst_ty.abiSize(mod));
- const bit_size: u16 = @intCast(dst_ty.bitSize(mod));
- if (abi_size * 8 <= bit_size) break :result dst_mcv;
+ const abi_size = dst_ty.abiSize(mod);
+ const bit_size = dst_ty.bitSize(mod);
+ if (abi_size * 8 <= bit_size or dst_ty.isVector(mod)) break :result dst_mcv;
- const dst_limbs_len = math.divCeil(i32, bit_size, 64) catch unreachable;
- const high_reg = if (dst_mcv.isRegister())
- dst_mcv.getReg().?
+ const dst_limbs_len = math.divCeil(i32, @intCast(bit_size), 64) catch unreachable;
+ const high_mcv: MCValue = switch (dst_mcv) {
+ .register => |dst_reg| .{ .register = dst_reg },
+ .register_pair => |dst_regs| .{ .register = dst_regs[1] },
+ else => dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(),
+ };
+ const high_reg = if (high_mcv.isRegister())
+ high_mcv.getReg().?
else
- try self.copyToTmpRegister(
- Type.usize,
- dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(),
- );
+ try self.copyToTmpRegister(Type.usize, high_mcv);
const high_lock = self.register_manager.lockReg(high_reg);
defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
- const high_ty = try mod.intType(dst_signedness, bit_size % 64);
-
- try self.truncateRegister(high_ty, high_reg);
- if (!dst_mcv.isRegister()) try self.genCopy(
- Type.usize,
- dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(),
+ try self.truncateRegister(dst_ty, high_reg);
+ if (!high_mcv.isRegister()) try self.genCopy(
+ if (abi_size <= 8) dst_ty else Type.usize,
+ high_mcv,
.{ .register = high_reg },
);
break :result dst_mcv;
@@ -13287,7 +13784,7 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void {
const array_ty = ptr_ty.childType(mod);
const array_len = array_ty.arrayLen(mod);
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, mod));
try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
try self.genSetMem(
.{ .frame = frame_index },
@@ -13497,7 +13994,7 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
const ptr_mcv = try self.resolveInst(extra.ptr);
const mem_size = Memory.Size.fromSize(val_abi_size);
const ptr_mem: Memory = switch (ptr_mcv) {
- .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(mem_size),
+ .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size),
else => .{
.base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
.mod = .{ .rm = .{ .size = mem_size } },
@@ -13563,7 +14060,7 @@ fn atomicOp(
const val_abi_size: u32 = @intCast(val_ty.abiSize(mod));
const mem_size = Memory.Size.fromSize(val_abi_size);
const ptr_mem: Memory = switch (ptr_mcv) {
- .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(mem_size),
+ .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size),
else => .{
.base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
.mod = .{ .rm = .{ .size = mem_size } },
@@ -13671,27 +14168,41 @@ fn atomicOp(
},
};
- try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
const cmov_abi_size = @max(val_abi_size, 2);
switch (val_mcv) {
- .register => |val_reg| try self.asmCmovccRegisterRegister(
- cc,
- registerAlias(tmp_reg, cmov_abi_size),
- registerAlias(val_reg, cmov_abi_size),
- ),
- .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
- cc,
- registerAlias(tmp_reg, cmov_abi_size),
- val_mcv.mem(Memory.Size.fromSize(cmov_abi_size)),
- ),
- else => {
- const val_reg = try self.copyToTmpRegister(val_ty, val_mcv);
+ .register => |val_reg| {
+ try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
try self.asmCmovccRegisterRegister(
cc,
registerAlias(tmp_reg, cmov_abi_size),
registerAlias(val_reg, cmov_abi_size),
);
},
+ .memory, .indirect, .load_frame => {
+ try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv);
+ try self.asmCmovccRegisterMemory(
+ cc,
+ registerAlias(tmp_reg, cmov_abi_size),
+ try val_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)),
+ );
+ },
+ else => {
+ const mat_reg = try self.copyToTmpRegister(val_ty, val_mcv);
+ const mat_lock = self.register_manager.lockRegAssumeUnused(mat_reg);
+ defer self.register_manager.unlockReg(mat_lock);
+
+ try self.genBinOpMir(
+ .{ ._, .cmp },
+ val_ty,
+ tmp_mcv,
+ .{ .register = mat_reg },
+ );
+ try self.asmCmovccRegisterRegister(
+ cc,
+ registerAlias(tmp_reg, cmov_abi_size),
+ registerAlias(mat_reg, cmov_abi_size),
+ );
+ },
}
},
};
@@ -13728,8 +14239,8 @@ fn atomicOp(
.reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()),
} },
};
- const val_lo_mem = val_mem_mcv.mem(.qword);
- const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword);
+ const val_lo_mem = try val_mem_mcv.mem(self, .qword);
+ const val_hi_mem = try val_mem_mcv.address().offset(8).deref().mem(self, .qword);
if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax);
try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx);
@@ -14000,7 +14511,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
try self.asmRegisterMemoryImmediate(
.{ .i_, .mul },
len_reg,
- dst_ptr.address().offset(8).deref().mem(.qword),
+ try dst_ptr.address().offset(8).deref().mem(self, .qword),
Immediate.s(@intCast(dst_ptr_ty.childType(mod).abiSize(mod))),
);
break :len .{ .register = len_reg };
@@ -14171,28 +14682,162 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
const mod = self.bin_file.options.module.?;
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const vector_ty = self.typeOfIndex(inst);
+ const vector_len = vector_ty.vectorLen(mod);
const dst_rc = self.regClassForType(vector_ty);
- const scalar_ty = vector_ty.scalarType(mod);
+ const scalar_ty = self.typeOf(ty_op.operand);
- const src_mcv = try self.resolveInst(ty_op.operand);
const result: MCValue = result: {
switch (scalar_ty.zigTypeTag(mod)) {
else => {},
+ .Bool => {
+ const regs =
+ try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp);
+ const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs);
+ defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
+
+ try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 });
+ try self.genSetReg(
+ regs[1],
+ vector_ty,
+ .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - vector_len) },
+ );
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const abi_size = @max(math.divCeil(u32, vector_len, 8) catch unreachable, 4);
+ try self.asmCmovccRegisterRegister(
+ switch (src_mcv) {
+ .eflags => |cc| cc,
+ .register => |src_reg| cc: {
+ try self.asmRegisterImmediate(
+ .{ ._, .@"test" },
+ src_reg.to8(),
+ Immediate.u(1),
+ );
+ break :cc .nz;
+ },
+ else => cc: {
+ try self.asmMemoryImmediate(
+ .{ ._, .@"test" },
+ try src_mcv.mem(self, .byte),
+ Immediate.u(1),
+ );
+ break :cc .nz;
+ },
+ },
+ registerAlias(regs[0], abi_size),
+ registerAlias(regs[1], abi_size),
+ );
+ break :result .{ .register = regs[0] };
+ },
+ .Int => if (self.hasFeature(.avx2)) avx2: {
+ const mir_tag = @as(?Mir.Inst.FixedTag, switch (scalar_ty.intInfo(mod).bits) {
+ else => null,
+ 1...8 => switch (vector_len) {
+ else => null,
+ 1...32 => .{ .vp_b, .broadcast },
+ },
+ 9...16 => switch (vector_len) {
+ else => null,
+ 1...16 => .{ .vp_w, .broadcast },
+ },
+ 17...32 => switch (vector_len) {
+ else => null,
+ 1...8 => .{ .vp_d, .broadcast },
+ },
+ 33...64 => switch (vector_len) {
+ else => null,
+ 1...4 => .{ .vp_q, .broadcast },
+ },
+ 65...128 => switch (vector_len) {
+ else => null,
+ 1...2 => .{ .vp_i128, .broadcast },
+ },
+ }) orelse break :avx2;
+
+ const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
+ const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
+ defer self.register_manager.unlockReg(dst_lock);
+
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ mir_tag,
+ registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))),
+ try src_mcv.mem(self, self.memSize(scalar_ty)),
+ ) else {
+ if (mir_tag[0] == .vp_i128) break :avx2;
+ try self.genSetReg(dst_reg, scalar_ty, src_mcv);
+ try self.asmRegisterRegister(
+ mir_tag,
+ registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))),
+ registerAlias(dst_reg, @intCast(scalar_ty.abiSize(mod))),
+ );
+ }
+ break :result .{ .register = dst_reg };
+ } else {
+ const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
+ const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
+ defer self.register_manager.unlockReg(dst_lock);
+
+ try self.genSetReg(dst_reg, scalar_ty, .{ .air_ref = ty_op.operand });
+ if (vector_len == 1) break :result .{ .register = dst_reg };
+
+ const dst_alias = registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod)));
+ const scalar_bits = scalar_ty.intInfo(mod).bits;
+ if (switch (scalar_bits) {
+ 1...8 => true,
+ 9...128 => false,
+ else => unreachable,
+ }) if (self.hasFeature(.avx)) try self.asmRegisterRegisterRegister(
+ .{ .vp_, .unpcklbw },
+ dst_alias,
+ dst_alias,
+ dst_alias,
+ ) else try self.asmRegisterRegister(
+ .{ .p_, .unpcklbw },
+ dst_alias,
+ dst_alias,
+ );
+ if (switch (scalar_bits) {
+ 1...8 => vector_len > 2,
+ 9...16 => true,
+ 17...128 => false,
+ else => unreachable,
+ }) try self.asmRegisterRegisterImmediate(
+ .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl },
+ dst_alias,
+ dst_alias,
+ Immediate.u(0),
+ );
+ if (switch (scalar_bits) {
+ 1...8 => vector_len > 4,
+ 9...16 => vector_len > 2,
+ 17...64 => true,
+ 65...128 => false,
+ else => unreachable,
+ }) try self.asmRegisterRegisterImmediate(
+ .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf },
+ dst_alias,
+ dst_alias,
+ Immediate.u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00),
+ );
+ break :result .{ .register = dst_reg };
+ },
.Float => switch (scalar_ty.floatBits(self.target.*)) {
- 32 => switch (vector_ty.vectorLen(mod)) {
+ 32 => switch (vector_len) {
1 => {
+ const src_mcv = try self.resolveInst(ty_op.operand);
if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
try self.genSetReg(dst_reg, scalar_ty, src_mcv);
break :result .{ .register = dst_reg };
},
2...4 => {
+ const src_mcv = try self.resolveInst(ty_op.operand);
if (self.hasFeature(.avx)) {
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ss, .broadcast },
dst_reg.to128(),
- src_mcv.mem(.dword),
+ try src_mcv.mem(self, .dword),
) else {
const src_reg = if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -14224,11 +14869,12 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
}
},
5...8 => if (self.hasFeature(.avx)) {
+ const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ss, .broadcast },
dst_reg.to256(),
- src_mcv.mem(.dword),
+ try src_mcv.mem(self, .dword),
) else {
const src_reg = if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -14259,20 +14905,22 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
},
else => {},
},
- 64 => switch (vector_ty.vectorLen(mod)) {
+ 64 => switch (vector_len) {
1 => {
+ const src_mcv = try self.resolveInst(ty_op.operand);
if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
try self.genSetReg(dst_reg, scalar_ty, src_mcv);
break :result .{ .register = dst_reg };
},
2 => {
+ const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
if (self.hasFeature(.sse3)) {
if (src_mcv.isMemory()) try self.asmRegisterMemory(
if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
dst_reg.to128(),
- src_mcv.mem(.qword),
+ try src_mcv.mem(self, .qword),
) else try self.asmRegisterRegister(
if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
dst_reg.to128(),
@@ -14292,11 +14940,12 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
);
},
3...4 => if (self.hasFeature(.avx)) {
+ const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_sd, .broadcast },
dst_reg.to256(),
- src_mcv.mem(.qword),
+ try src_mcv.mem(self, .qword),
) else {
const src_reg = if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -14325,19 +14974,21 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
},
else => {},
},
- 128 => switch (vector_ty.vectorLen(mod)) {
+ 128 => switch (vector_len) {
1 => {
+ const src_mcv = try self.resolveInst(ty_op.operand);
if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
try self.genSetReg(dst_reg, scalar_ty, src_mcv);
break :result .{ .register = dst_reg };
},
2 => if (self.hasFeature(.avx)) {
+ const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_f128, .broadcast },
dst_reg.to256(),
- src_mcv.mem(.xword),
+ try src_mcv.mem(self, .xword),
) else {
const src_reg = if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -14389,7 +15040,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
try self.spillEflagsIfOccupied();
const operand_mcv = try self.resolveInst(reduce.operand);
- const mask_len = (std.math.cast(u6, operand_ty.vectorLen(mod)) orelse
+ const mask_len = (math.cast(u6, operand_ty.vectorLen(mod)) orelse
return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)}));
const mask = (@as(u64, 1) << mask_len) - 1;
const abi_size: u32 = @intCast(operand_ty.abiSize(mod));
@@ -14397,7 +15048,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
.Or => {
if (operand_mcv.isMemory()) try self.asmMemoryImmediate(
.{ ._, .@"test" },
- operand_mcv.mem(Memory.Size.fromSize(abi_size)),
+ try operand_mcv.mem(self, Memory.Size.fromSize(abi_size)),
Immediate.u(mask),
) else {
const operand_reg = registerAlias(if (operand_mcv.isRegister())
@@ -14445,8 +15096,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
const result: MCValue = result: {
switch (result_ty.zigTypeTag(mod)) {
.Struct => {
- const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod));
if (result_ty.containerLayout(mod) == .Packed) {
const struct_type = mod.typeToStruct(result_ty).?;
try self.genInlineMemset(
@@ -14542,8 +15192,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
break :result .{ .load_frame = .{ .index = frame_index } };
},
.Array => {
- const frame_index =
- try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod));
+ const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod));
const elem_ty = result_ty.childType(mod);
const elem_size: u32 = @intCast(elem_ty.abiSize(mod));
@@ -14789,7 +15438,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
mir_tag,
mop1_reg,
mop2_reg,
- mops[2].mem(Memory.Size.fromSize(abi_size)),
+ try mops[2].mem(self, Memory.Size.fromSize(abi_size)),
);
break :result mops[0];
};
@@ -14807,7 +15456,7 @@ fn airVaStart(self: *Self, inst: Air.Inst.Index) !void {
)) {
.SysV => result: {
const info = self.va_info.sysv;
- const dst_fi = try self.allocFrameIndex(FrameAlloc.initType(va_list_ty, mod));
+ const dst_fi = try self.allocFrameIndex(FrameAlloc.initSpill(va_list_ty, mod));
var field_off: u31 = 0;
// gp_offset: c_uint,
try self.genSetMem(
@@ -15015,7 +15664,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void {
.{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
- promote_mcv.mem(.qword),
+ try promote_mcv.mem(self, .qword),
) else try self.asmRegisterRegisterRegister(
.{ .v_ss, .cvtsd2 },
dst_reg,
@@ -15027,7 +15676,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void {
) else if (promote_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._ss, .cvtsd2 },
dst_reg,
- promote_mcv.mem(.qword),
+ try promote_mcv.mem(self, .qword),
) else try self.asmRegisterRegister(
.{ ._ss, .cvtsd2 },
dst_reg,
@@ -15473,6 +16122,33 @@ fn memSize(self: *Self, ty: Type) Memory.Size {
};
}
+fn splitType(self: *Self, ty: Type) ![2]Type {
+ const mod = self.bin_file.options.module.?;
+ const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none);
+ var parts: [2]Type = undefined;
+ if (classes.len == 2) for (&parts, classes, 0..) |*part, class, part_i| {
+ part.* = switch (class) {
+ .integer => switch (part_i) {
+ 0 => Type.u64,
+ 1 => part: {
+ const elem_size = ty.abiAlignment(mod).minStrict(.@"8").toByteUnitsOptional().?;
+ const elem_ty = try mod.intType(.unsigned, @intCast(elem_size * 8));
+ break :part switch (@divExact(ty.abiSize(mod) - 8, elem_size)) {
+ 1 => elem_ty,
+ else => |len| try mod.arrayType(.{ .len = len, .child = elem_ty.toIntern() }),
+ };
+ },
+ else => unreachable,
+ },
+ .float => Type.f32,
+ .float_combine => try mod.vectorType(.{ .len = 2, .child = .f32_type }),
+ .sse => Type.f64,
+ else => break,
+ };
+ } else if (parts[0].abiSize(mod) + parts[1].abiSize(mod) == ty.abiSize(mod)) return parts;
+ return self.fail("TODO implement splitType for {}", .{ty.fmt(mod)});
+}
+
/// Truncates the value in the register in place.
/// Clobbers any remaining bits.
fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
src/arch/x86_64/Encoding.zig
@@ -410,6 +410,8 @@ pub const Mnemonic = enum {
vfmadd132ps, vfmadd213ps, vfmadd231ps,
vfmadd132sd, vfmadd213sd, vfmadd231sd,
vfmadd132ss, vfmadd213ss, vfmadd231ss,
+ // AVX2
+ vpbroadcastb, vpbroadcastd, vpbroadcasti128, vpbroadcastq, vpbroadcastw,
// zig fmt: on
};
@@ -444,7 +446,7 @@ pub const Op = enum {
moffs,
sreg,
st, mm, mm_m64,
- xmm0, xmm, xmm_m32, xmm_m64, xmm_m128,
+ xmm0, xmm, xmm_m8, xmm_m16, xmm_m32, xmm_m64, xmm_m128,
ymm, ymm_m256,
// zig fmt: on
@@ -534,7 +536,7 @@ pub const Op = enum {
.eax, .r32, .rm32, .r32_m16 => unreachable,
.rax, .r64, .rm64, .r64_m16 => unreachable,
.st, .mm, .mm_m64 => unreachable,
- .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
+ .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
.ymm, .ymm_m256 => unreachable,
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
.unity => 1,
@@ -556,7 +558,7 @@ pub const Op = enum {
.eax, .r32, .rm32, .r32_m8, .r32_m16 => 32,
.rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64,
.st => 80,
- .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
+ .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
.ymm, .ymm_m256 => 256,
};
}
@@ -568,8 +570,8 @@ pub const Op = enum {
.rel8, .rel16, .rel32 => unreachable,
.al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64 => unreachable,
.st, .mm, .xmm0, .xmm, .ymm => unreachable,
- .m8, .rm8, .r32_m8 => 8,
- .m16, .rm16, .r32_m16, .r64_m16 => 16,
+ .m8, .rm8, .r32_m8, .xmm_m8 => 8,
+ .m16, .rm16, .r32_m16, .r64_m16, .xmm_m16 => 16,
.m32, .rm32, .xmm_m32 => 32,
.m64, .rm64, .mm_m64, .xmm_m64 => 64,
.m80 => 80,
@@ -600,7 +602,7 @@ pub const Op = enum {
.rm8, .rm16, .rm32, .rm64,
.r32_m8, .r32_m16, .r64_m16,
.st, .mm, .mm_m64,
- .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128,
+ .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128,
.ymm, .ymm_m256,
=> true,
else => false,
@@ -629,7 +631,7 @@ pub const Op = enum {
.m8, .m16, .m32, .m64, .m80, .m128, .m256,
.m,
.mm_m64,
- .xmm_m32, .xmm_m64, .xmm_m128,
+ .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128,
.ymm_m256,
=> true,
else => false,
@@ -654,7 +656,7 @@ pub const Op = enum {
.sreg => .segment,
.st => .x87,
.mm, .mm_m64 => .mmx,
- .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
+ .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
.ymm, .ymm_m256 => .sse,
};
}
src/arch/x86_64/encodings.zig
@@ -1742,6 +1742,16 @@ pub const table = [_]Entry{
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
+ .{ .vpbroadcastb, .rm, &.{ .xmm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_128_w0, .avx2 },
+ .{ .vpbroadcastb, .rm, &.{ .ymm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_256_w0, .avx2 },
+ .{ .vpbroadcastw, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x79 }, 0, .vex_128_w0, .avx2 },
+ .{ .vpbroadcastw, .rm, &.{ .ymm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x79 }, 0, .vex_256_w0, .avx2 },
+ .{ .vpbroadcastd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_128_w0, .avx2 },
+ .{ .vpbroadcastd, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_256_w0, .avx2 },
+ .{ .vpbroadcastq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_128_w0, .avx2 },
+ .{ .vpbroadcastq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_256_w0, .avx2 },
+ .{ .vpbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 },
+
.{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 },
.{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 },
.{ .vpcmpeqd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x76 }, 0, .vex_256_wig, .avx2 },
src/arch/x86_64/Mir.zig
@@ -255,6 +255,8 @@ pub const Inst = struct {
vp_q,
/// VEX-Encoded Packed ___ Double Quadword
vp_dq,
+ /// VEX-Encoded Packed ___ Integer Data
+ vp_i128,
/// VEX-Encoded ___ Scalar Single-Precision Values
v_ss,
/// VEX-Encoded ___ Packed Single-Precision Values
src/link/Coff.zig
@@ -388,6 +388,7 @@ fn populateMissingMetadata(self: *Coff) !void {
self.rdata_section_index = try self.allocateSection(".rdata", file_size, .{
.CNT_INITIALIZED_DATA = 1,
.MEM_READ = 1,
+ .MEM_WRITE = 1,
});
}
src/codegen.zig
@@ -376,7 +376,10 @@ pub fn generateSymbol(
.val = switch (aggregate.storage) {
.bytes => unreachable,
.elems => |elems| elems[@as(usize, @intCast(index))],
- .repeated_elem => |elem| elem,
+ .repeated_elem => |elem| if (index < array_type.len)
+ elem
+ else
+ array_type.sentinel,
}.toValue(),
}, code, debug_output, reloc_info)) {
.ok => {},
src/Compilation.zig
@@ -1121,7 +1121,9 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
const include_compiler_rt = options.want_compiler_rt orelse needs_c_symbols;
const must_single_thread = target_util.isSingleThreaded(options.target);
- const single_threaded = options.single_threaded orelse must_single_thread;
+ const single_threaded = options.single_threaded orelse must_single_thread or
+ // x86_64 codegen doesn't support TLV for most object formats
+ (!use_llvm and options.target.cpu.arch == .x86_64 and options.target.ofmt != .macho);
if (must_single_thread and !single_threaded) {
return error.TargetRequiresSingleThreaded;
}
test/behavior/vector.zig
@@ -1260,7 +1260,6 @@ test "zero multiplicand" {
test "@intCast to u0" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO