Commit f83ebd8e6c

Jacob Young <jacobly0@users.noreply.github.com>
2023-05-12 08:11:37
x86_64: implement stack probing
1 parent 3681da2
Changed files (4)
src
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -1550,7 +1550,9 @@ fn gen(self: *Self) InnerError!void {
         const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
         try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
         const backpatch_frame_align = try self.asmPlaceholder();
+        const backpatch_frame_align_extra = try self.asmPlaceholder();
         const backpatch_stack_alloc = try self.asmPlaceholder();
+        const backpatch_stack_alloc_extra = try self.asmPlaceholder();
 
         switch (self.ret_mcv.long) {
             .none, .unreach => {},
@@ -1599,24 +1601,67 @@ fn gen(self: *Self) InnerError!void {
         const need_stack_adjust = frame_layout.stack_adjust > 0;
         const need_save_reg = frame_layout.save_reg_list.count() > 0;
         if (need_frame_align) {
+            const page_align = @as(u32, math.maxInt(u32)) << 12;
             self.mir_instructions.set(backpatch_frame_align, .{
                 .tag = .@"and",
                 .ops = .ri_s,
                 .data = .{ .ri = .{
                     .r1 = .rsp,
-                    .i = frame_layout.stack_mask,
+                    .i = @max(frame_layout.stack_mask, page_align),
                 } },
             });
+            if (frame_layout.stack_mask < page_align) {
+                self.mir_instructions.set(backpatch_frame_align_extra, .{
+                    .tag = .pseudo,
+                    .ops = .pseudo_probe_align_ri_s,
+                    .data = .{ .ri = .{
+                        .r1 = .rsp,
+                        .i = ~frame_layout.stack_mask & page_align,
+                    } },
+                });
+            }
         }
         if (need_stack_adjust) {
-            self.mir_instructions.set(backpatch_stack_alloc, .{
-                .tag = .sub,
-                .ops = .ri_s,
-                .data = .{ .ri = .{
-                    .r1 = .rsp,
-                    .i = frame_layout.stack_adjust,
-                } },
-            });
+            const page_size: u32 = 1 << 12;
+            if (frame_layout.stack_adjust <= page_size) {
+                self.mir_instructions.set(backpatch_stack_alloc, .{
+                    .tag = .sub,
+                    .ops = .ri_s,
+                    .data = .{ .ri = .{
+                        .r1 = .rsp,
+                        .i = frame_layout.stack_adjust,
+                    } },
+                });
+            } else if (frame_layout.stack_adjust <
+                page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
+            {
+                self.mir_instructions.set(backpatch_stack_alloc, .{
+                    .tag = .pseudo,
+                    .ops = .pseudo_probe_adjust_unrolled_ri_s,
+                    .data = .{ .ri = .{
+                        .r1 = .rsp,
+                        .i = frame_layout.stack_adjust,
+                    } },
+                });
+            } else {
+                self.mir_instructions.set(backpatch_stack_alloc, .{
+                    .tag = .pseudo,
+                    .ops = .pseudo_probe_adjust_setup_rri_s,
+                    .data = .{ .rri = .{
+                        .r1 = .rsp,
+                        .r2 = .rax,
+                        .i = frame_layout.stack_adjust,
+                    } },
+                });
+                self.mir_instructions.set(backpatch_stack_alloc_extra, .{
+                    .tag = .pseudo,
+                    .ops = .pseudo_probe_adjust_loop_rr,
+                    .data = .{ .rr = .{
+                        .r1 = .rsp,
+                        .r2 = .rax,
+                    } },
+                });
+            }
         }
         if (need_frame_align or need_stack_adjust) {
             self.mir_instructions.set(backpatch_stack_dealloc, .{
src/arch/x86_64/Lower.zig
@@ -9,19 +9,33 @@ result_insts_len: u8 = undefined,
 result_relocs_len: u8 = undefined,
 result_insts: [
     std.mem.max(usize, &.{
+        1, // non-pseudo instructions
         2, // cmovcc: cmovcc \ cmovcc
         3, // setcc: setcc \ setcc \ logicop
         2, // jcc: jcc \ jcc
+        pseudo_probe_align_insts,
+        pseudo_probe_adjust_unrolled_max_insts,
+        pseudo_probe_adjust_setup_insts,
+        pseudo_probe_adjust_loop_insts,
         abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs
         abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs
     })
 ]Instruction = undefined,
 result_relocs: [
     std.mem.max(usize, &.{
+        1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
         2, // jcc: jcc \ jcc
+        2, // test \ jcc \ probe \ sub \ jmp
+        1, // probe \ sub \ jcc
     })
 ]Reloc = undefined,
 
+pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
+pub const pseudo_probe_adjust_unrolled_max_insts =
+    pseudo_probe_adjust_setup_insts + pseudo_probe_adjust_loop_insts;
+pub const pseudo_probe_adjust_setup_insts = 2; // mov \ sub
+pub const pseudo_probe_adjust_loop_insts = 3; // probe \ sub \ jcc
+
 pub const Error = error{
     OutOfMemory,
     LowerFail,
@@ -62,6 +76,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
         else => try lower.generic(inst),
         .pseudo => switch (inst.ops) {
             .pseudo_cmov_z_and_np_rr => {
+                assert(inst.data.rr.fixes == ._);
                 try lower.emit(.none, .cmovnz, &.{
                     .{ .reg = inst.data.rr.r2 },
                     .{ .reg = inst.data.rr.r1 },
@@ -72,6 +87,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
                 });
             },
             .pseudo_cmov_nz_or_p_rr => {
+                assert(inst.data.rr.fixes == ._);
                 try lower.emit(.none, .cmovnz, &.{
                     .{ .reg = inst.data.rr.r1 },
                     .{ .reg = inst.data.rr.r2 },
@@ -84,6 +100,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
             .pseudo_cmov_nz_or_p_rm_sib,
             .pseudo_cmov_nz_or_p_rm_rip,
             => {
+                assert(inst.data.rx.fixes == ._);
                 try lower.emit(.none, .cmovnz, &.{
                     .{ .reg = inst.data.rx.r1 },
                     .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
@@ -94,6 +111,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
                 });
             },
             .pseudo_set_z_and_np_r => {
+                assert(inst.data.rr.fixes == ._);
                 try lower.emit(.none, .setz, &.{
                     .{ .reg = inst.data.rr.r1 },
                 });
@@ -108,6 +126,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
             .pseudo_set_z_and_np_m_sib,
             .pseudo_set_z_and_np_m_rip,
             => {
+                assert(inst.data.rx.fixes == ._);
                 try lower.emit(.none, .setz, &.{
                     .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
                 });
@@ -120,6 +139,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
                 });
             },
             .pseudo_set_nz_or_p_r => {
+                assert(inst.data.rr.fixes == ._);
                 try lower.emit(.none, .setnz, &.{
                     .{ .reg = inst.data.rr.r1 },
                 });
@@ -134,6 +154,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
             .pseudo_set_nz_or_p_m_sib,
             .pseudo_set_nz_or_p_m_rip,
             => {
+                assert(inst.data.rx.fixes == ._);
                 try lower.emit(.none, .setnz, &.{
                     .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
                 });
@@ -146,6 +167,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
                 });
             },
             .pseudo_j_z_and_np_inst => {
+                assert(inst.data.inst.fixes == ._);
                 try lower.emit(.none, .jnz, &.{
                     .{ .imm = lower.reloc(.{ .inst = index + 1 }) },
                 });
@@ -154,6 +176,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
                 });
             },
             .pseudo_j_nz_or_p_inst => {
+                assert(inst.data.inst.fixes == ._);
                 try lower.emit(.none, .jnz, &.{
                     .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) },
                 });
@@ -162,6 +185,78 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
                 });
             },
 
+            .pseudo_probe_align_ri_s => {
+                try lower.emit(.none, .@"test", &.{
+                    .{ .reg = inst.data.ri.r1 },
+                    .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
+                });
+                try lower.emit(.none, .jz, &.{
+                    .{ .imm = lower.reloc(.{ .inst = index + 1 }) },
+                });
+                try lower.emit(.none, .lea, &.{
+                    .{ .reg = inst.data.ri.r1 },
+                    .{ .mem = Memory.sib(.qword, .{
+                        .base = .{ .reg = inst.data.ri.r1 },
+                        .disp = -page_size,
+                    }) },
+                });
+                try lower.emit(.none, .@"test", &.{
+                    .{ .mem = Memory.sib(.dword, .{
+                        .base = .{ .reg = inst.data.ri.r1 },
+                    }) },
+                    .{ .reg = inst.data.ri.r1.to32() },
+                });
+                try lower.emit(.none, .jmp, &.{
+                    .{ .imm = lower.reloc(.{ .inst = index }) },
+                });
+                assert(lower.result_insts_len == pseudo_probe_align_insts);
+            },
+            .pseudo_probe_adjust_unrolled_ri_s => {
+                var offset = page_size;
+                while (offset < @bitCast(i32, inst.data.ri.i)) : (offset += page_size) {
+                    try lower.emit(.none, .@"test", &.{
+                        .{ .mem = Memory.sib(.dword, .{
+                            .base = .{ .reg = inst.data.ri.r1 },
+                            .disp = -offset,
+                        }) },
+                        .{ .reg = inst.data.ri.r1.to32() },
+                    });
+                }
+                try lower.emit(.none, .sub, &.{
+                    .{ .reg = inst.data.ri.r1 },
+                    .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
+                });
+                assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts);
+            },
+            .pseudo_probe_adjust_setup_rri_s => {
+                try lower.emit(.none, .mov, &.{
+                    .{ .reg = inst.data.rri.r2.to32() },
+                    .{ .imm = Immediate.s(@bitCast(i32, inst.data.rri.i)) },
+                });
+                try lower.emit(.none, .sub, &.{
+                    .{ .reg = inst.data.rri.r1 },
+                    .{ .reg = inst.data.rri.r2 },
+                });
+                assert(lower.result_insts_len == pseudo_probe_adjust_setup_insts);
+            },
+            .pseudo_probe_adjust_loop_rr => {
+                try lower.emit(.none, .@"test", &.{
+                    .{ .mem = Memory.sib(.dword, .{
+                        .base = .{ .reg = inst.data.rr.r1 },
+                        .scale_index = .{ .scale = 1, .index = inst.data.rr.r2 },
+                        .disp = -page_size,
+                    }) },
+                    .{ .reg = inst.data.rr.r1.to32() },
+                });
+                try lower.emit(.none, .sub, &.{
+                    .{ .reg = inst.data.rr.r2 },
+                    .{ .imm = Immediate.s(page_size) },
+                });
+                try lower.emit(.none, .jae, &.{
+                    .{ .imm = lower.reloc(.{ .inst = index }) },
+                });
+                assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
+            },
             .pseudo_push_reg_list => try lower.pushPopRegList(.push, inst),
             .pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst),
 
@@ -440,6 +535,8 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er
     }});
 }
 
+const page_size: i32 = 1 << 12;
+
 const abi = @import("abi.zig");
 const assert = std.debug.assert;
 const bits = @import("bits.zig");
src/arch/x86_64/Mir.zig
@@ -740,6 +740,18 @@ pub const Inst = struct {
         /// Uses `inst` payload.
         pseudo_j_nz_or_p_inst,
 
+        /// Probe alignment
+        /// Uses `ri` payload
+        pseudo_probe_align_ri_s,
+        /// Probe adjust unrolled
+        /// Uses `ri` payload
+        pseudo_probe_adjust_unrolled_ri_s,
+        /// Probe adjust setup
+        /// Uses `rri` payload
+        pseudo_probe_adjust_setup_rri_s,
+        /// Probe adjust loop
+        /// Uses `rr` payload
+        pseudo_probe_adjust_loop_rr,
         /// Push registers
         /// Uses `reg_list` payload.
         pseudo_push_reg_list,
test/behavior/memset.zig
@@ -120,7 +120,6 @@ test "memset with large array element, runtime known" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
 
     const A = [128]u64;
     var buf: [5]A = undefined;
@@ -139,7 +138,6 @@ test "memset with large array element, comptime known" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
 
     const A = [128]u64;
     var buf: [5]A = undefined;