Commit 2cbd442a9d

Jacob Young <jacobly0@users.noreply.github.com>
2023-05-13 09:05:49
x86_64: implement integer vector movement
1 parent f83ebd8
Changed files (3)
src
arch
test
src/arch/x86_64/CodeGen.zig
@@ -8579,56 +8579,174 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAirResult(inst, result);
 }
 
-fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
+const MoveStrategy = union(enum) {
+    move: Mir.Inst.FixedTag,
+    insert_extract: InsertExtract,
+    vex_insert_extract: InsertExtract,
+
+    const InsertExtract = struct {
+        insert: Mir.Inst.FixedTag,
+        extract: Mir.Inst.FixedTag,
+        imm: Immediate,
+    };
+};
+fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
     switch (ty.zigTypeTag()) {
-        else => return .{ ._, .mov },
+        else => return .{ .move = .{ ._, .mov } },
         .Float => switch (ty.floatBits(self.target.*)) {
-            16 => unreachable, // needs special handling
-            32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
-            64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
-            128 => return if (self.hasFeature(.avx))
+            16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+                .insert = .{ .vp_w, .insr },
+                .extract = .{ .vp_w, .extr },
+                .imm = Immediate.u(0),
+            } } else .{ .insert_extract = .{
+                .insert = .{ .p_w, .insr },
+                .extract = .{ .p_w, .extr },
+                .imm = Immediate.u(0),
+            } },
+            32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } },
+            64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } },
+            128 => return .{ .move = if (self.hasFeature(.avx))
                 if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
-            else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+            else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
             else => {},
         },
         .Vector => switch (ty.childType().zigTypeTag()) {
+            .Int => switch (ty.childType().intInfo(self.target.*).bits) {
+                8 => switch (ty.vectorLen()) {
+                    1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
+                        .insert = .{ .vp_b, .insr },
+                        .extract = .{ .vp_b, .extr },
+                        .imm = Immediate.u(0),
+                    } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
+                        .insert = .{ .p_b, .insr },
+                        .extract = .{ .p_b, .extr },
+                        .imm = Immediate.u(0),
+                    } },
+                    2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+                        .insert = .{ .vp_w, .insr },
+                        .extract = .{ .vp_w, .extr },
+                        .imm = Immediate.u(0),
+                    } } else .{ .insert_extract = .{
+                        .insert = .{ .p_w, .insr },
+                        .extract = .{ .p_w, .extr },
+                        .imm = Immediate.u(0),
+                    } },
+                    3...4 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_ss, .mov }
+                    else
+                        .{ ._ss, .mov } },
+                    5...8 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_sd, .mov }
+                    else
+                        .{ ._sd, .mov } },
+                    else => {},
+                },
+                16 => switch (ty.vectorLen()) {
+                    1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+                        .insert = .{ .vp_w, .insr },
+                        .extract = .{ .vp_w, .extr },
+                        .imm = Immediate.u(0),
+                    } } else .{ .insert_extract = .{
+                        .insert = .{ .p_w, .insr },
+                        .extract = .{ .p_w, .extr },
+                        .imm = Immediate.u(0),
+                    } },
+                    2 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_ss, .mov }
+                    else
+                        .{ ._ss, .mov } },
+                    3...4 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_sd, .mov }
+                    else
+                        .{ ._sd, .mov } },
+                    5...8 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_ps, .mov }
+                    else
+                        .{ ._ps, .mov } },
+                    else => {},
+                },
+                32 => switch (ty.vectorLen()) {
+                    1 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_ss, .mov }
+                    else
+                        .{ ._ss, .mov } },
+                    2 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_sd, .mov }
+                    else
+                        .{ ._sd, .mov } },
+                    3...4 => return .{ .move = if (self.hasFeature(.avx))
+                        if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+                    5...8 => if (self.hasFeature(.avx))
+                        return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
+                    else => {},
+                },
+                64 => switch (ty.vectorLen()) {
+                    1 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_sd, .mov }
+                    else
+                        .{ ._sd, .mov } },
+                    2 => return .{ .move = if (self.hasFeature(.avx))
+                        if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
+                    3...4 => if (self.hasFeature(.avx))
+                        return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
+                    else => {},
+                },
+                else => {},
+            },
             .Float => switch (ty.childType().floatBits(self.target.*)) {
                 16 => switch (ty.vectorLen()) {
-                    1 => unreachable, // needs special handling
-                    2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
-                    3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
-                    5...8 => return if (self.hasFeature(.avx))
+                    1 => {},
+                    2 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_ss, .mov }
+                    else
+                        .{ ._ss, .mov } },
+                    3...4 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_sd, .mov }
+                    else
+                        .{ ._sd, .mov } },
+                    5...8 => return .{ .move = if (self.hasFeature(.avx))
                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
-                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
                     9...16 => if (self.hasFeature(.avx))
-                        return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+                        return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
                     else => {},
                 },
                 32 => switch (ty.vectorLen()) {
-                    1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
-                    2 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
-                    3...4 => return if (self.hasFeature(.avx))
+                    1 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_ss, .mov }
+                    else
+                        .{ ._ss, .mov } },
+                    2 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_sd, .mov }
+                    else
+                        .{ ._sd, .mov } },
+                    3...4 => return .{ .move = if (self.hasFeature(.avx))
                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
-                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
                     5...8 => if (self.hasFeature(.avx))
-                        return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+                        return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
                     else => {},
                 },
                 64 => switch (ty.vectorLen()) {
-                    1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
-                    2 => return if (self.hasFeature(.avx))
+                    1 => return .{ .move = if (self.hasFeature(.avx))
+                        .{ .v_sd, .mov }
+                    else
+                        .{ ._sd, .mov } },
+                    2 => return .{ .move = if (self.hasFeature(.avx))
                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
-                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
                     3...4 => if (self.hasFeature(.avx))
-                        return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+                        return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
                     else => {},
                 },
                 128 => switch (ty.vectorLen()) {
-                    1 => return if (self.hasFeature(.avx))
+                    1 => return .{ .move = if (self.hasFeature(.avx))
                         if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
-                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+                    else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
                     2 => if (self.hasFeature(.avx))
-                        return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+                        return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
                     else => {},
                 },
                 else => {},
@@ -8636,7 +8754,7 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
             else => {},
         },
     }
-    return self.fail("TODO movMirTag for {}", .{ty.fmt(self.bin_file.options.module.?)});
+    return self.fail("TODO moveStrategy for {}", .{ty.fmt(self.bin_file.options.module.?)});
 }
 
 fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
@@ -8764,6 +8882,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
         .load_frame,
         .lea_frame,
         => {
+            const dst_alias = registerAlias(dst_reg, abi_size);
             const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
                 .register_offset, .indirect => |reg_off| .{
                     .base = .{ .reg = reg_off.reg },
@@ -8775,71 +8894,81 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
                 },
                 else => unreachable,
             });
-            if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
-                try self.asmRegisterMemoryImmediate(
-                    .{ .p_w, .insr },
-                    registerAlias(dst_reg, abi_size),
+            switch (@as(MoveStrategy, switch (src_mcv) {
+                .register_offset => |reg_off| switch (reg_off.off) {
+                    0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
+                    else => .{ .move = .{ ._, .lea } },
+                },
+                .indirect => try self.moveStrategy(ty, false),
+                .load_frame => |frame_addr| try self.moveStrategy(
+                    ty,
+                    self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*),
+                ),
+                .lea_frame => .{ .move = .{ ._, .lea } },
+                else => unreachable,
+            })) {
+                .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
+                .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
+                    ie.insert,
+                    dst_alias,
                     src_mem,
-                    Immediate.u(0),
-                )
-            else
-                try self.asmRegisterMemory(
-                    switch (src_mcv) {
-                        .register_offset => |reg_off| switch (reg_off.off) {
-                            0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
-                            else => .{ ._, .lea },
-                        },
-                        .indirect => try self.movMirTag(ty, false),
-                        .load_frame => |frame_addr| try self.movMirTag(
-                            ty,
-                            self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*),
-                        ),
-                        .lea_frame => .{ ._, .lea },
-                        else => unreachable,
-                    },
-                    registerAlias(dst_reg, abi_size),
+                    ie.imm,
+                ),
+                .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
+                    ie.insert,
+                    dst_alias,
+                    dst_alias,
                     src_mem,
-                );
+                    ie.imm,
+                ),
+            }
         },
         .memory, .load_direct, .load_got, .load_tlv => {
             switch (src_mcv) {
                 .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| {
+                    const dst_alias = registerAlias(dst_reg, abi_size);
                     const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
                         .base = .{ .reg = .ds },
                         .disp = small_addr,
                     });
-                    return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
-                        self.asmRegisterMemoryImmediate(
-                            .{ .p_w, .insr },
-                            registerAlias(dst_reg, abi_size),
+                    switch (try self.moveStrategy(ty, mem.isAlignedGeneric(
+                        u32,
+                        @bitCast(u32, small_addr),
+                        ty.abiAlignment(self.target.*),
+                    ))) {
+                        .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
+                        .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
+                            ie.insert,
+                            dst_alias,
                             src_mem,
-                            Immediate.u(0),
-                        )
-                    else
-                        self.asmRegisterMemory(
-                            try self.movMirTag(ty, mem.isAlignedGeneric(
-                                u32,
-                                @bitCast(u32, small_addr),
-                                ty.abiAlignment(self.target.*),
-                            )),
-                            registerAlias(dst_reg, abi_size),
+                            ie.imm,
+                        ),
+                        .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
+                            ie.insert,
+                            dst_alias,
+                            dst_alias,
                             src_mem,
-                        );
+                            ie.imm,
+                        ),
+                    }
                 },
-                .load_direct => |sym_index| if (!ty.isRuntimeFloat()) {
-                    const atom_index = try self.owner.getSymbolIndex(self);
-                    _ = try self.addInst(.{
-                        .tag = .mov,
-                        .ops = .direct_reloc,
-                        .data = .{ .rx = .{
-                            .r1 = dst_reg.to64(),
-                            .payload = try self.addExtra(Mir.Reloc{
-                                .atom_index = atom_index,
-                                .sym_index = sym_index,
-                            }),
-                        } },
-                    });
-                    return;
+                .load_direct => |sym_index| switch (ty.zigTypeTag()) {
+                    else => {
+                        const atom_index = try self.owner.getSymbolIndex(self);
+                        _ = try self.addInst(.{
+                            .tag = .mov,
+                            .ops = .direct_reloc,
+                            .data = .{ .rx = .{
+                                .r1 = dst_reg.to64(),
+                                .payload = try self.addExtra(Mir.Reloc{
+                                    .atom_index = atom_index,
+                                    .sym_index = sym_index,
+                                }),
+                            } },
+                        });
+                        return;
+                    },
+                    .Float, .Vector => {},
                 },
                 .load_got, .load_tlv => {},
                 else => unreachable,
@@ -8849,22 +8978,26 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
             defer self.register_manager.unlockReg(addr_lock);
 
+            const dst_alias = registerAlias(dst_reg, abi_size);
             const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
                 .base = .{ .reg = addr_reg },
             });
-            if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
-                try self.asmRegisterMemoryImmediate(
-                    .{ .p_w, .insr },
-                    registerAlias(dst_reg, abi_size),
+            switch (try self.moveStrategy(ty, false)) {
+                .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
+                .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
+                    ie.insert,
+                    dst_alias,
                     src_mem,
-                    Immediate.u(0),
-                )
-            else
-                try self.asmRegisterMemory(
-                    try self.movMirTag(ty, false),
-                    registerAlias(dst_reg, abi_size),
+                    ie.imm,
+                ),
+                .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
+                    ie.insert,
+                    dst_alias,
+                    dst_alias,
                     src_mem,
-                );
+                    ie.imm,
+                ),
+            }
         },
         .lea_direct, .lea_got => |sym_index| {
             const atom_index = try self.owner.getSymbolIndex(self);
@@ -8966,36 +9099,33 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
                 Memory.PtrSize.fromSize(abi_size),
                 .{ .base = base, .disp = disp },
             );
-            if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
-                try self.asmMemoryRegisterImmediate(
-                    .{ .p_w, .extr },
-                    dst_mem,
-                    src_reg.to128(),
-                    Immediate.u(0),
-                )
-            else
-                try self.asmMemoryRegister(
-                    try self.movMirTag(ty, switch (base) {
-                        .none => mem.isAlignedGeneric(
-                            u32,
-                            @bitCast(u32, disp),
-                            ty.abiAlignment(self.target.*),
-                        ),
-                        .reg => |reg| switch (reg) {
-                            .es, .cs, .ss, .ds => mem.isAlignedGeneric(
-                                u32,
-                                @bitCast(u32, disp),
-                                ty.abiAlignment(self.target.*),
-                            ),
-                            else => false,
-                        },
-                        .frame => |frame_index| self.getFrameAddrAlignment(
-                            .{ .index = frame_index, .off = disp },
-                        ) >= ty.abiAlignment(self.target.*),
-                    }),
+            const src_alias = registerAlias(src_reg, abi_size);
+            switch (try self.moveStrategy(ty, switch (base) {
+                .none => mem.isAlignedGeneric(
+                    u32,
+                    @bitCast(u32, disp),
+                    ty.abiAlignment(self.target.*),
+                ),
+                .reg => |reg| switch (reg) {
+                    .es, .cs, .ss, .ds => mem.isAlignedGeneric(
+                        u32,
+                        @bitCast(u32, disp),
+                        ty.abiAlignment(self.target.*),
+                    ),
+                    else => false,
+                },
+                .frame => |frame_index| self.getFrameAddrAlignment(
+                    .{ .index = frame_index, .off = disp },
+                ) >= ty.abiAlignment(self.target.*),
+            })) {
+                .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_alias),
+                .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate(
+                    ie.extract,
                     dst_mem,
-                    registerAlias(src_reg, abi_size),
-                );
+                    src_alias,
+                    ie.imm,
+                ),
+            }
         },
         .register_overflow => |ro| {
             try self.genSetMem(
test/behavior/maximum_minimum.zig
@@ -146,7 +146,6 @@ test "@min/@max more than two arguments" {
 
 test "@min/@max more than two vector arguments" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/vector.zig
@@ -1129,7 +1129,6 @@ test "loading the second vector from a slice of vectors" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
 
     @setRuntimeSafety(false);
     var small_bases = [2]@Vector(2, u8){
@@ -1219,7 +1218,6 @@ test "zero multiplicand" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
 
     const zeros = @Vector(2, u32){ 0.0, 0.0 };
     var ones = @Vector(2, u32){ 1.0, 1.0 };
@@ -1324,7 +1322,6 @@ test "store to vector in slice" {
 test "addition of vectors represented as strings" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
     const V = @Vector(3, u8);