Commit 5cbfd5819e

Jakub Konka <kubkon@jakubkonka.com>
2022-05-19 17:36:04
x64: check for floating-point intrinsics in codegen
1 parent 283f40e
Changed files (4)
src/arch/x86_64/abi.zig
@@ -383,11 +383,11 @@ pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8
 pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
 pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
 
-const avx_regs = [_]Register{
+const sse_avx_regs = [_]Register{
     .ymm0, .ymm1, .ymm2,  .ymm3,  .ymm4,  .ymm5,  .ymm6,  .ymm7,
     .ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
 };
-const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ avx_regs;
+const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs;
 pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
 
 // Register classes
@@ -401,7 +401,7 @@ pub const RegisterClass = struct {
         }, true);
         break :blk set;
     };
-    pub const avx: RegisterBitSet = blk: {
+    pub const sse: RegisterBitSet = blk: {
         var set = RegisterBitSet.initEmpty();
         set.setRangeValue(.{
             .start = caller_preserved_regs.len + callee_preserved_regs.len,
src/arch/x86_64/CodeGen.zig
@@ -39,7 +39,7 @@ const RegisterLock = RegisterManager.RegisterLock;
 const Register = bits.Register;
 
 const gp = abi.RegisterClass.gp;
-const avx = abi.RegisterClass.avx;
+const sse = abi.RegisterClass.sse;
 
 const InnerError = error{
     OutOfMemory,
@@ -881,15 +881,18 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
         switch (elem_ty.zigTypeTag()) {
             .Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}),
             .Float => {
-                // TODO check if AVX available
-                const ptr_bytes: u64 = 32;
-                if (abi_size <= ptr_bytes) {
-                    if (self.register_manager.tryAllocReg(inst, .{
-                        .selector_mask = avx,
-                    })) |reg| {
-                        return MCValue{ .register = registerAlias(reg, abi_size) };
+                if (self.intrinsicsAllowed(elem_ty)) {
+                    const ptr_bytes: u64 = 32;
+                    if (abi_size <= ptr_bytes) {
+                        if (self.register_manager.tryAllocReg(inst, .{
+                            .selector_mask = sse,
+                        })) |reg| {
+                            return MCValue{ .register = registerAlias(reg, abi_size) };
+                        }
                     }
                 }
+
+                return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{});
             },
             else => {
                 // Make sure the type can fit in a register before we try to allocate one.
@@ -969,8 +972,11 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou
 /// allocated. A second call to `copyToTmpRegister` may return the same register.
 /// This can have a side effect of spilling instructions to the stack to free up a register.
 fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
-    const mask = switch (ty.zigTypeTag()) {
-        .Float => avx,
+    const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
+        .Float => blk: {
+            if (self.intrinsicsAllowed(ty)) break :blk sse;
+            return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
+        },
         else => gp,
     };
     const reg: Register = try self.register_manager.allocReg(null, .{
@@ -985,8 +991,11 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
 /// This can have a side effect of spilling instructions to the stack to free up a register.
 /// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
 fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue {
-    const mask = switch (ty.zigTypeTag()) {
-        .Float => avx,
+    const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
+        .Float => blk: {
+            if (self.intrinsicsAllowed(ty)) break :blk sse;
+            return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
+        },
         else => gp,
     };
     const reg: Register = try self.register_manager.allocReg(reg_owner, .{
@@ -3469,27 +3478,32 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 },
                 .register => |src_reg| switch (dst_ty.zigTypeTag()) {
                     .Float => {
-                        const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
-                            .f32 => switch (mir_tag) {
-                                .add => Mir.Inst.Tag.add_f32,
-                                .cmp => Mir.Inst.Tag.cmp_f32,
-                                else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
-                            },
-                            .f64 => switch (mir_tag) {
-                                .add => Mir.Inst.Tag.add_f64,
-                                .cmp => Mir.Inst.Tag.cmp_f64,
-                                else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
-                            },
-                            else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
-                        };
-                        _ = try self.addInst(.{
-                            .tag = actual_tag,
-                            .ops = Mir.Inst.Ops.encode(.{
-                                .reg1 = dst_reg.to128(),
-                                .reg2 = src_reg.to128(),
-                            }),
-                            .data = undefined,
-                        });
+                        if (self.intrinsicsAllowed(dst_ty)) {
+                            const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
+                                .f32 => switch (mir_tag) {
+                                    .add => Mir.Inst.Tag.add_f32_avx,
+                                    .cmp => Mir.Inst.Tag.cmp_f32_avx,
+                                    else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
+                                },
+                                .f64 => switch (mir_tag) {
+                                    .add => Mir.Inst.Tag.add_f64_avx,
+                                    .cmp => Mir.Inst.Tag.cmp_f64_avx,
+                                    else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
+                                },
+                                else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
+                            };
+                            _ = try self.addInst(.{
+                                .tag = actual_tag,
+                                .ops = Mir.Inst.Ops.encode(.{
+                                    .reg1 = dst_reg.to128(),
+                                    .reg2 = src_reg.to128(),
+                                }),
+                                .data = undefined,
+                            });
+                            return;
+                        }
+
+                        return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{});
                     },
                     else => {
                         _ = try self.addInst(.{
@@ -5326,24 +5340,29 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
         .register => |reg| {
             switch (ty.zigTypeTag()) {
                 .Float => {
-                    const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                        .f32 => .mov_f32,
-                        .f64 => .mov_f64,
-                        else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
-                    };
-                    _ = try self.addInst(.{
-                        .tag = tag,
-                        .ops = Mir.Inst.Ops.encode(.{
-                            .reg1 = switch (ty.tag()) {
-                                .f32 => .esp,
-                                .f64 => .rsp,
-                                else => unreachable,
-                            },
-                            .reg2 = reg.to128(),
-                            .flags = 0b01,
-                        }),
-                        .data = .{ .imm = @bitCast(u32, -stack_offset) },
-                    });
+                    if (self.intrinsicsAllowed(ty)) {
+                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
+                            .f32 => .mov_f32_avx,
+                            .f64 => .mov_f64_avx,
+                            else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
+                        };
+                        _ = try self.addInst(.{
+                            .tag = tag,
+                            .ops = Mir.Inst.Ops.encode(.{
+                                .reg1 = switch (ty.tag()) {
+                                    .f32 => .esp,
+                                    .f64 => .rsp,
+                                    else => unreachable,
+                                },
+                                .reg2 = reg.to128(),
+                                .flags = 0b01,
+                            }),
+                            .data = .{ .imm = @bitCast(u32, -stack_offset) },
+                        });
+                        return;
+                    }
+
+                    return self.fail("TODO genSetStackArg for register with no intrinsics", .{});
                 },
                 else => {
                     _ = try self.addInst(.{
@@ -5505,24 +5524,29 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
 
             switch (ty.zigTypeTag()) {
                 .Float => {
-                    const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                        .f32 => .mov_f32,
-                        .f64 => .mov_f64,
-                        else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
-                    };
-                    _ = try self.addInst(.{
-                        .tag = tag,
-                        .ops = Mir.Inst.Ops.encode(.{
-                            .reg1 = switch (ty.tag()) {
-                                .f32 => base_reg.to32(),
-                                .f64 => base_reg.to64(),
-                                else => unreachable,
-                            },
-                            .reg2 = reg.to128(),
-                            .flags = 0b01,
-                        }),
-                        .data = .{ .imm = @bitCast(u32, -stack_offset) },
-                    });
+                    if (self.intrinsicsAllowed(ty)) {
+                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
+                            .f32 => .mov_f32_avx,
+                            .f64 => .mov_f64_avx,
+                            else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
+                        };
+                        _ = try self.addInst(.{
+                            .tag = tag,
+                            .ops = Mir.Inst.Ops.encode(.{
+                                .reg1 = switch (ty.tag()) {
+                                    .f32 => base_reg.to32(),
+                                    .f64 => base_reg.to64(),
+                                    else => unreachable,
+                                },
+                                .reg2 = reg.to128(),
+                                .flags = 0b01,
+                            }),
+                            .data = .{ .imm = @bitCast(u32, -stack_offset) },
+                        });
+                        return;
+                    }
+
+                    return self.fail("TODO genSetStack for register for type float with no intrinsics", .{});
                 },
                 else => {
                     if (!math.isPowerOfTwo(abi_size)) {
@@ -6026,21 +6050,25 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                     },
                 },
                 .Float => {
-                    const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                        .f32 => .mov_f32,
-                        .f64 => .mov_f64,
-                        else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
-                    };
-                    _ = try self.addInst(.{
-                        .tag = tag,
-                        .ops = Mir.Inst.Ops.encode(.{
-                            .reg1 = reg.to128(),
-                            .reg2 = src_reg.to128(),
-                            .flags = 0b10,
-                        }),
-                        .data = undefined,
-                    });
-                    return;
+                    if (self.intrinsicsAllowed(ty)) {
+                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
+                            .f32 => .mov_f32_avx,
+                            .f64 => .mov_f64_avx,
+                            else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
+                        };
+                        _ = try self.addInst(.{
+                            .tag = tag,
+                            .ops = Mir.Inst.Ops.encode(.{
+                                .reg1 = reg.to128(),
+                                .reg2 = src_reg.to128(),
+                                .flags = 0b10,
+                            }),
+                            .data = undefined,
+                        });
+                        return;
+                    }
+
+                    return self.fail("TODO genSetReg from register for float with no intrinsics", .{});
                 },
                 else => {},
             }
@@ -6073,24 +6101,29 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                 const base_reg = try self.register_manager.allocReg(null, .{ .selector_mask = gp });
                 try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv);
 
-                const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                    .f32 => .mov_f32,
-                    .f64 => .mov_f64,
-                    else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
-                };
+                if (self.intrinsicsAllowed(ty)) {
+                    const tag: Mir.Inst.Tag = switch (ty.tag()) {
+                        .f32 => .mov_f32_avx,
+                        .f64 => .mov_f64_avx,
+                        else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
+                    };
 
-                _ = try self.addInst(.{
-                    .tag = tag,
-                    .ops = Mir.Inst.Ops.encode(.{
-                        .reg1 = reg.to128(),
-                        .reg2 = switch (ty.tag()) {
-                            .f32 => base_reg.to32(),
-                            .f64 => base_reg.to64(),
-                            else => unreachable,
-                        },
-                    }),
-                    .data = .{ .imm = 0 },
-                });
+                    _ = try self.addInst(.{
+                        .tag = tag,
+                        .ops = Mir.Inst.Ops.encode(.{
+                            .reg1 = reg.to128(),
+                            .reg2 = switch (ty.tag()) {
+                                .f32 => base_reg.to32(),
+                                .f64 => base_reg.to64(),
+                                else => unreachable,
+                            },
+                        }),
+                        .data = .{ .imm = 0 },
+                    });
+                    return;
+                }
+
+                return self.fail("TODO genSetReg from memory for float with no intrinsics", .{});
             },
             else => {
                 if (x <= math.maxInt(i32)) {
@@ -6183,24 +6216,27 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                     },
                 },
                 .Float => {
-                    const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                        .f32 => .mov_f32,
-                        .f64 => .mov_f64,
-                        else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
-                    };
-                    _ = try self.addInst(.{
-                        .tag = tag,
-                        .ops = Mir.Inst.Ops.encode(.{
-                            .reg1 = reg.to128(),
-                            .reg2 = switch (ty.tag()) {
-                                .f32 => .ebp,
-                                .f64 => .rbp,
-                                else => unreachable,
-                            },
-                        }),
-                        .data = .{ .imm = @bitCast(u32, -off) },
-                    });
-                    return;
+                    if (self.intrinsicsAllowed(ty)) {
+                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
+                            .f32 => .mov_f32_avx,
+                            .f64 => .mov_f64_avx,
+                            else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
+                        };
+                        _ = try self.addInst(.{
+                            .tag = tag,
+                            .ops = Mir.Inst.Ops.encode(.{
+                                .reg1 = reg.to128(),
+                                .reg2 = switch (ty.tag()) {
+                                    .f32 => .ebp,
+                                    .f64 => .rbp,
+                                    else => unreachable,
+                                },
+                            }),
+                            .data = .{ .imm = @bitCast(u32, -off) },
+                        });
+                        return;
+                    }
+                    return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{});
                 },
                 else => {},
             }
@@ -6995,3 +7031,12 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
         },
     }
 }
+
+fn intrinsicsAllowed(self: *Self, ty: Type) bool {
+    return switch (ty.tag()) {
+        .f32,
+        .f64,
+        => Target.x86.featureSetHasAny(self.target.cpu.features, .{ .avx, .avx2 }),
+        else => unreachable, // TODO finish this off
+    };
+}
src/arch/x86_64/Emit.zig
@@ -183,14 +183,14 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
             .nop => try emit.mirNop(),
 
             // AVX instructions
-            .mov_f64 => try emit.mirMovFloatAvx(.vmovsd, inst),
-            .mov_f32 => try emit.mirMovFloatAvx(.vmovss, inst),
+            .mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst),
+            .mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst),
 
-            .add_f64 => try emit.mirAddFloatAvx(.vaddsd, inst),
-            .add_f32 => try emit.mirAddFloatAvx(.vaddss, inst),
+            .add_f64_avx => try emit.mirAddFloatAvx(.vaddsd, inst),
+            .add_f32_avx => try emit.mirAddFloatAvx(.vaddss, inst),
 
-            .cmp_f64 => try emit.mirCmpFloatAvx(.vucomisd, inst),
-            .cmp_f32 => try emit.mirCmpFloatAvx(.vucomiss, inst),
+            .cmp_f64_avx => try emit.mirCmpFloatAvx(.vucomisd, inst),
+            .cmp_f32_avx => try emit.mirCmpFloatAvx(.vucomiss, inst),
 
             // Pseudo-instructions
             .call_extern => try emit.mirCallExtern(inst),
src/arch/x86_64/Mir.zig
@@ -350,18 +350,18 @@ pub const Inst = struct {
         ///       0b00  reg1, qword ptr [reg2 + imm32]
         ///       0b01  qword ptr [reg1 + imm32], reg2
         ///       0b10  reg1, reg2
-        mov_f64,
-        mov_f32,
+        mov_f64_avx,
+        mov_f32_avx,
 
         /// ops flags:  form:
         ///       0b00  reg1, reg1, reg2
-        add_f64,
-        add_f32,
+        add_f64_avx,
+        add_f32_avx,
 
         /// ops flags:  form:
         ///
-        cmp_f64,
-        cmp_f32,
+        cmp_f64_avx,
+        cmp_f32_avx,
 
         /// Pseudo-instructions
         /// call extern function