Commit f83ebd8e6c
Changed files (4)
src
arch
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -1550,7 +1550,9 @@ fn gen(self: *Self) InnerError!void {
const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
const backpatch_frame_align = try self.asmPlaceholder();
+ const backpatch_frame_align_extra = try self.asmPlaceholder();
const backpatch_stack_alloc = try self.asmPlaceholder();
+ const backpatch_stack_alloc_extra = try self.asmPlaceholder();
switch (self.ret_mcv.long) {
.none, .unreach => {},
@@ -1599,24 +1601,67 @@ fn gen(self: *Self) InnerError!void {
const need_stack_adjust = frame_layout.stack_adjust > 0;
const need_save_reg = frame_layout.save_reg_list.count() > 0;
if (need_frame_align) {
+ const page_align = @as(u32, math.maxInt(u32)) << 12;
self.mir_instructions.set(backpatch_frame_align, .{
.tag = .@"and",
.ops = .ri_s,
.data = .{ .ri = .{
.r1 = .rsp,
- .i = frame_layout.stack_mask,
+ .i = @max(frame_layout.stack_mask, page_align),
} },
});
+ if (frame_layout.stack_mask < page_align) {
+ self.mir_instructions.set(backpatch_frame_align_extra, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_align_ri_s,
+ .data = .{ .ri = .{
+ .r1 = .rsp,
+ .i = ~frame_layout.stack_mask & page_align,
+ } },
+ });
+ }
}
if (need_stack_adjust) {
- self.mir_instructions.set(backpatch_stack_alloc, .{
- .tag = .sub,
- .ops = .ri_s,
- .data = .{ .ri = .{
- .r1 = .rsp,
- .i = frame_layout.stack_adjust,
- } },
- });
+ const page_size: u32 = 1 << 12;
+ if (frame_layout.stack_adjust <= page_size) {
+ self.mir_instructions.set(backpatch_stack_alloc, .{
+ .tag = .sub,
+ .ops = .ri_s,
+ .data = .{ .ri = .{
+ .r1 = .rsp,
+ .i = frame_layout.stack_adjust,
+ } },
+ });
+ } else if (frame_layout.stack_adjust <
+ page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
+ {
+ self.mir_instructions.set(backpatch_stack_alloc, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_adjust_unrolled_ri_s,
+ .data = .{ .ri = .{
+ .r1 = .rsp,
+ .i = frame_layout.stack_adjust,
+ } },
+ });
+ } else {
+ self.mir_instructions.set(backpatch_stack_alloc, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_adjust_setup_rri_s,
+ .data = .{ .rri = .{
+ .r1 = .rsp,
+ .r2 = .rax,
+ .i = frame_layout.stack_adjust,
+ } },
+ });
+ self.mir_instructions.set(backpatch_stack_alloc_extra, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_adjust_loop_rr,
+ .data = .{ .rr = .{
+ .r1 = .rsp,
+ .r2 = .rax,
+ } },
+ });
+ }
}
if (need_frame_align or need_stack_adjust) {
self.mir_instructions.set(backpatch_stack_dealloc, .{
src/arch/x86_64/Lower.zig
@@ -9,19 +9,33 @@ result_insts_len: u8 = undefined,
result_relocs_len: u8 = undefined,
result_insts: [
std.mem.max(usize, &.{
+ 1, // non-pseudo instructions
2, // cmovcc: cmovcc \ cmovcc
3, // setcc: setcc \ setcc \ logicop
2, // jcc: jcc \ jcc
+ pseudo_probe_align_insts,
+ pseudo_probe_adjust_unrolled_max_insts,
+ pseudo_probe_adjust_setup_insts,
+ pseudo_probe_adjust_loop_insts,
abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs
abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs
})
]Instruction = undefined,
result_relocs: [
std.mem.max(usize, &.{
+ 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
2, // jcc: jcc \ jcc
+ 2, // test \ jcc \ probe \ sub \ jmp
+ 1, // probe \ sub \ jcc
})
]Reloc = undefined,
+pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
+pub const pseudo_probe_adjust_unrolled_max_insts =
+ pseudo_probe_adjust_setup_insts + pseudo_probe_adjust_loop_insts;
+pub const pseudo_probe_adjust_setup_insts = 2; // mov \ sub
+pub const pseudo_probe_adjust_loop_insts = 3; // probe \ sub \ jcc
+
pub const Error = error{
OutOfMemory,
LowerFail,
@@ -62,6 +76,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
else => try lower.generic(inst),
.pseudo => switch (inst.ops) {
.pseudo_cmov_z_and_np_rr => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rr.r2 },
.{ .reg = inst.data.rr.r1 },
@@ -72,6 +87,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_cmov_nz_or_p_rr => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rr.r1 },
.{ .reg = inst.data.rr.r2 },
@@ -84,6 +100,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_cmov_nz_or_p_rm_sib,
.pseudo_cmov_nz_or_p_rm_rip,
=> {
+ assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
@@ -94,6 +111,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_set_z_and_np_r => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .setz, &.{
.{ .reg = inst.data.rr.r1 },
});
@@ -108,6 +126,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_z_and_np_m_sib,
.pseudo_set_z_and_np_m_rip,
=> {
+ assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setz, &.{
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
});
@@ -120,6 +139,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_set_nz_or_p_r => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .setnz, &.{
.{ .reg = inst.data.rr.r1 },
});
@@ -134,6 +154,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_nz_or_p_m_sib,
.pseudo_set_nz_or_p_m_rip,
=> {
+ assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setnz, &.{
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
});
@@ -146,6 +167,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_j_z_and_np_inst => {
+ assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }) },
});
@@ -154,6 +176,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_j_nz_or_p_inst => {
+ assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) },
});
@@ -162,6 +185,78 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
+ .pseudo_probe_align_ri_s => {
+ try lower.emit(.none, .@"test", &.{
+ .{ .reg = inst.data.ri.r1 },
+ .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
+ });
+ try lower.emit(.none, .jz, &.{
+ .{ .imm = lower.reloc(.{ .inst = index + 1 }) },
+ });
+ try lower.emit(.none, .lea, &.{
+ .{ .reg = inst.data.ri.r1 },
+ .{ .mem = Memory.sib(.qword, .{
+ .base = .{ .reg = inst.data.ri.r1 },
+ .disp = -page_size,
+ }) },
+ });
+ try lower.emit(.none, .@"test", &.{
+ .{ .mem = Memory.sib(.dword, .{
+ .base = .{ .reg = inst.data.ri.r1 },
+ }) },
+ .{ .reg = inst.data.ri.r1.to32() },
+ });
+ try lower.emit(.none, .jmp, &.{
+ .{ .imm = lower.reloc(.{ .inst = index }) },
+ });
+ assert(lower.result_insts_len == pseudo_probe_align_insts);
+ },
+ .pseudo_probe_adjust_unrolled_ri_s => {
+ var offset = page_size;
+ while (offset < @bitCast(i32, inst.data.ri.i)) : (offset += page_size) {
+ try lower.emit(.none, .@"test", &.{
+ .{ .mem = Memory.sib(.dword, .{
+ .base = .{ .reg = inst.data.ri.r1 },
+ .disp = -offset,
+ }) },
+ .{ .reg = inst.data.ri.r1.to32() },
+ });
+ }
+ try lower.emit(.none, .sub, &.{
+ .{ .reg = inst.data.ri.r1 },
+ .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
+ });
+ assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts);
+ },
+ .pseudo_probe_adjust_setup_rri_s => {
+ try lower.emit(.none, .mov, &.{
+ .{ .reg = inst.data.rri.r2.to32() },
+ .{ .imm = Immediate.s(@bitCast(i32, inst.data.rri.i)) },
+ });
+ try lower.emit(.none, .sub, &.{
+ .{ .reg = inst.data.rri.r1 },
+ .{ .reg = inst.data.rri.r2 },
+ });
+ assert(lower.result_insts_len == pseudo_probe_adjust_setup_insts);
+ },
+ .pseudo_probe_adjust_loop_rr => {
+ try lower.emit(.none, .@"test", &.{
+ .{ .mem = Memory.sib(.dword, .{
+ .base = .{ .reg = inst.data.rr.r1 },
+ .scale_index = .{ .scale = 1, .index = inst.data.rr.r2 },
+ .disp = -page_size,
+ }) },
+ .{ .reg = inst.data.rr.r1.to32() },
+ });
+ try lower.emit(.none, .sub, &.{
+ .{ .reg = inst.data.rr.r2 },
+ .{ .imm = Immediate.s(page_size) },
+ });
+ try lower.emit(.none, .jae, &.{
+ .{ .imm = lower.reloc(.{ .inst = index }) },
+ });
+ assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
+ },
.pseudo_push_reg_list => try lower.pushPopRegList(.push, inst),
.pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst),
@@ -440,6 +535,8 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er
}});
}
+const page_size: i32 = 1 << 12;
+
const abi = @import("abi.zig");
const assert = std.debug.assert;
const bits = @import("bits.zig");
src/arch/x86_64/Mir.zig
@@ -740,6 +740,18 @@ pub const Inst = struct {
/// Uses `inst` payload.
pseudo_j_nz_or_p_inst,
+ /// Probe alignment
+ /// Uses `ri` payload
+ pseudo_probe_align_ri_s,
+ /// Probe adjust unrolled
+ /// Uses `ri` payload
+ pseudo_probe_adjust_unrolled_ri_s,
+ /// Probe adjust setup
+ /// Uses `rri` payload
+ pseudo_probe_adjust_setup_rri_s,
+ /// Probe adjust loop
+ /// Uses `rr` payload
+ pseudo_probe_adjust_loop_rr,
/// Push registers
/// Uses `reg_list` payload.
pseudo_push_reg_list,
test/behavior/memset.zig
@@ -120,7 +120,6 @@ test "memset with large array element, runtime known" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
const A = [128]u64;
var buf: [5]A = undefined;
@@ -139,7 +138,6 @@ test "memset with large array element, comptime known" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
const A = [128]u64;
var buf: [5]A = undefined;