Commit 6d3c7bd436

Jakub Konka <kubkon@jakubkonka.com>
2022-02-17 16:38:05
x64: pass all args on stack in debug and if not extern fn
If the function call is not extern and we are compiling in debug, pass function params always on stack. This will improve debugging capabilities since the params will not be volatile and possibly clobbered by the procedure code. Finish implementation of `imul_complex`.
1 parent 56e9575
Changed files (2)
src
src/arch/x86_64/CodeGen.zig
@@ -1737,6 +1737,10 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
         .immediate => |imm| {
             try self.setRegOrMem(elem_ty, dst_mcv, .{ .memory = imm });
         },
+        .stack_offset => {
+            const reg = try self.copyToTmpRegister(ptr_ty, ptr);
+            try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
+        },
         .ptr_stack_offset => |off| {
             try self.setRegOrMem(elem_ty, dst_mcv, .{ .stack_offset = off });
         },
@@ -1787,9 +1791,6 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
             const reg = try self.copyToTmpRegister(ptr_ty, ptr);
             try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
         },
-        .stack_offset => {
-            return self.fail("TODO implement loading from MCValue.stack_offset", .{});
-        },
     }
 }
 
@@ -1832,6 +1833,10 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
         .immediate => |imm| {
             try self.setRegOrMem(value_ty, .{ .memory = imm }, value);
         },
+        .stack_offset => {
+            const reg = try self.copyToTmpRegister(ptr_ty, ptr);
+            try self.store(.{ .register = reg }, value, ptr_ty, value_ty);
+        },
         .ptr_stack_offset => |off| {
             try self.genSetStack(value_ty, off, value);
         },
@@ -1909,6 +1914,10 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                         .data = .{ .imm = 0 },
                     });
                 },
+                .stack_offset => {
+                    const tmp_reg = try self.copyToTmpRegister(value_ty, value);
+                    return self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty);
+                },
                 else => |other| {
                     return self.fail("TODO implement set pointee with {}", .{other});
                 },
@@ -2020,9 +2029,6 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 else => return self.fail("TODO implement storing {} to MCValue.memory", .{value}),
             }
         },
-        .stack_offset => {
-            return self.fail("TODO implement storing to MCValue.stack_offset", .{});
-        },
     }
 }
 
@@ -2452,7 +2458,18 @@ fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !
                         return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
                     }
                 },
-                .embedded_in_code, .memory, .stack_offset => {
+                .stack_offset => |off| {
+                    _ = try self.addInst(.{
+                        .tag = .imul_complex,
+                        .ops = (Mir.Ops{
+                            .reg1 = dst_reg,
+                            .reg2 = .rbp,
+                            .flags = 0b01,
+                        }).encode(),
+                        .data = .{ .imm = @bitCast(u32, -off) },
+                    });
+                },
+                .embedded_in_code, .memory => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
                 .got_load, .direct_load => {
@@ -3520,6 +3537,13 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
         .dead => unreachable,
         .ptr_embedded_in_code => unreachable,
         .unreach, .none => return,
+        .undef => {
+            if (abi_size <= 8) {
+                const reg = try self.copyToTmpRegister(ty, mcv);
+                return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
+            }
+            try self.genInlineMemset(stack_offset, .rsp, ty, .{ .immediate = 0xaa });
+        },
         .compare_flags_unsigned,
         .compare_flags_signed,
         => {
@@ -3598,7 +3622,6 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
 
             try self.genInlineMemcpy(stack_offset, .rsp, ty, mcv);
         },
-        else => return self.fail("TODO implement args on stack for {}", .{mcv}),
     }
 }
 
@@ -3617,7 +3640,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerErro
                 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }),
                 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
                 8 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
-                else => return self.genInlineMemset(ty, stack_offset, .{ .immediate = 0xaa }),
+                else => return self.genInlineMemset(stack_offset, .rbp, ty, .{ .immediate = 0xaa }),
             }
         },
         .compare_flags_unsigned,
@@ -3943,12 +3966,20 @@ fn genInlineMemcpy(self: *Self, stack_offset: i32, stack_reg: Register, ty: Type
     try self.performReloc(loop_reloc);
 }
 
-fn genInlineMemset(self: *Self, ty: Type, stack_offset: i32, value: MCValue) InnerError!void {
+fn genInlineMemset(
+    self: *Self,
+    stack_offset: i32,
+    stack_register: Register,
+    ty: Type,
+    value: MCValue,
+) InnerError!void {
     try self.register_manager.getReg(.rax, null);
+
     const abi_size = ty.abiSize(self.target.*);
     if (stack_offset > 128) {
         return self.fail("TODO inline memset with large stack offset", .{});
     }
+
     const negative_offset = @bitCast(u32, -stack_offset);
 
     // We are actually counting `abi_size` bytes; however, we reuse the index register
@@ -4005,7 +4036,7 @@ fn genInlineMemset(self: *Self, ty: Type, stack_offset: i32, value: MCValue) Inn
             _ = try self.addInst(.{
                 .tag = .mov_mem_index_imm,
                 .ops = (Mir.Ops{
-                    .reg1 = .rbp,
+                    .reg1 = stack_register.to64(),
                 }).encode(),
                 .data = .{ .payload = payload },
             });
@@ -4731,20 +4762,40 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             var next_int_reg: usize = 0;
             var by_reg = std.AutoHashMap(usize, usize).init(self.bin_file.allocator);
             defer by_reg.deinit();
-            for (param_types) |ty, i| {
-                if (!ty.hasRuntimeBits()) continue;
-                const param_size = @intCast(u32, ty.abiSize(self.target.*));
-                const pass_in_reg = switch (ty.zigTypeTag()) {
-                    .Bool => true,
-                    .Int, .Enum => param_size <= 8,
-                    .Pointer => ty.ptrSize() != .Slice,
-                    .Optional => ty.isPtrLikeOptional(),
-                    else => false,
-                };
-                if (pass_in_reg) {
-                    if (next_int_reg >= c_abi_int_param_regs.len) break;
-                    try by_reg.putNoClobber(i, next_int_reg);
-                    next_int_reg += 1;
+
+            // If we want debug output, we store all args on stack for better liveness of args
+            // in debugging contexts such as previewing the args in the debugger anywhere in
+            // the procedure. Passing the args via registers can lead to reusing the register
+            // for local ops thus clobbering the input arg forever.
+            // This of course excludes C ABI calls.
+            const omit_args_in_registers = blk: {
+                if (cc == .C) break :blk false;
+                switch (self.bin_file.options.optimize_mode) {
+                    .Debug => break :blk true,
+                    else => break :blk false,
+                }
+            };
+            if (!omit_args_in_registers) {
+                for (param_types) |ty, i| {
+                    if (!ty.hasRuntimeBits()) continue;
+                    const param_size = @intCast(u32, ty.abiSize(self.target.*));
+                    // For simplicity of codegen, slices and other types are always pushed onto the stack.
+                    // TODO: look into optimizing this by passing things as registers sometimes,
+                    // such as ptr and len of slices as separate registers.
+                    // TODO: also we need to honor the C ABI for relevant types rather than passing on
+                    // the stack here.
+                    const pass_in_reg = switch (ty.zigTypeTag()) {
+                        .Bool => true,
+                        .Int, .Enum => param_size <= 8,
+                        .Pointer => ty.ptrSize() != .Slice,
+                        .Optional => ty.isPtrLikeOptional(),
+                        else => false,
+                    };
+                    if (pass_in_reg) {
+                        if (next_int_reg >= c_abi_int_param_regs.len) break;
+                        try by_reg.putNoClobber(i, next_int_reg);
+                        next_int_reg += 1;
+                    }
                 }
             }
 
@@ -4765,11 +4816,6 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     result.args[i] = .{ .register = aliased_reg };
                     next_int_reg += 1;
                 } else {
-                    // For simplicity of codegen, slices and other types are always pushed onto the stack.
-                    // TODO: look into optimizing this by passing things as registers sometimes,
-                    // such as ptr and len of slices as separate registers.
-                    // TODO: also we need to honor the C ABI for relevant types rather than passing on
-                    // the stack here.
                     const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
                     result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
                     next_stack_offset = offset;
src/arch/x86_64/Emit.zig
@@ -691,11 +691,26 @@ fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
         0b00 => {
             return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
         },
+        0b01 => {
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2;
+            return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.mem(.qword_ptr, .{
+                .disp = imm,
+                .base = src_reg,
+            }), emit.code);
+        },
         0b10 => {
             const imm = emit.mir.instructions.items(.data)[inst].imm;
             return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), imm, emit.code);
         },
-        else => return emit.fail("TODO implement imul", .{}),
+        0b11 => {
+            const payload = emit.mir.instructions.items(.data)[inst].payload;
+            const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+            return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.mem(.qword_ptr, .{
+                .disp = imm_pair.dest_off,
+                .base = ops.reg2,
+            }), imm_pair.operand, emit.code);
+        },
     }
 }