Commit 8c8dfb35f3
Changed files (8)
lib
std
src
test
behavior
x86_64
lib/std/crypto/aes/aesni.zig
@@ -4,7 +4,7 @@ const mem = std.mem;
const debug = std.debug;
const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes);
-const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
+const has_avx512f = builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
/// A single AES block.
pub const Block = struct {
lib/std/Thread/Pool.zig
@@ -27,6 +27,7 @@ pub const Options = struct {
allocator: std.mem.Allocator,
n_jobs: ?usize = null,
track_ids: bool = false,
+ stack_size: usize = std.Thread.SpawnConfig.default_stack_size,
};
pub fn init(pool: *Pool, options: Options) !void {
@@ -54,7 +55,10 @@ pub fn init(pool: *Pool, options: Options) !void {
errdefer pool.join(spawned);
for (pool.threads) |*thread| {
- thread.* = try std.Thread.spawn(.{}, worker, .{pool});
+ thread.* = try std.Thread.spawn(.{
+ .stack_size = options.stack_size,
+ .allocator = allocator,
+ }, worker, .{pool});
spawned += 1;
}
}
lib/std/Thread.zig
@@ -372,9 +372,11 @@ pub const SpawnConfig = struct {
// https://github.com/ziglang/zig/issues/157
/// Size in bytes of the Thread's stack
- stack_size: usize = 16 * 1024 * 1024,
+ stack_size: usize = default_stack_size,
/// The allocator to be used to allocate memory for the to-be-spawned thread
allocator: ?std.mem.Allocator = null,
+
+ pub const default_stack_size = 16 * 1024 * 1024;
};
pub const SpawnError = error{
src/arch/x86_64/abi.zig
@@ -540,8 +540,12 @@ pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Regis
}
pub fn getCAbiLinkerScratchReg(cc: std.builtin.CallingConvention.Tag) Register {
- const int_return_regs = getCAbiIntReturnRegs(cc);
- return int_return_regs[int_return_regs.len - 1];
+ return switch (cc) {
+ .auto => zigcc.int_return_regs[zigcc.int_return_regs.len - 1],
+ .x86_64_sysv => SysV.c_abi_int_return_regs[0],
+ .x86_64_win => Win64.c_abi_int_return_regs[0],
+ else => unreachable,
+ };
}
const gp_regs = [_]Register{
src/arch/x86_64/CodeGen.zig
@@ -634,42 +634,14 @@ const InstTracking = struct {
}
fn reuseFrame(self: *InstTracking) void {
- switch (self.long) {
- .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
- else => {},
- }
- self.short = switch (self.long) {
- .none,
- .unreach,
- .undef,
- .immediate,
- .memory,
- .load_direct,
- .lea_direct,
- .load_got,
- .lea_got,
- .load_tlv,
- .lea_tlv,
- .load_frame,
- .lea_frame,
- .load_symbol,
- .lea_symbol,
- => self.long,
- .dead,
- .eflags,
- .register,
- .register_pair,
- .register_triple,
- .register_quadruple,
- .register_offset,
- .register_overflow,
- .register_mask,
- .indirect,
- .elementwise_regs_then_frame,
- .reserved_frame,
- .air_ref,
- => unreachable,
- };
+ self.* = .init(switch (self.long) {
+ .none => switch (self.short) {
+ .dead => .none,
+ else => |short| short,
+ },
+ .reserved_frame => |index| .{ .load_frame = .{ .index = index } },
+ else => |long| long,
+ });
}
fn trackSpill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void {
@@ -681,6 +653,15 @@ const InstTracking = struct {
fn verifyMaterialize(self: InstTracking, target: InstTracking) void {
switch (self.long) {
.none,
+ .load_frame,
+ .reserved_frame,
+ => switch (target.long) {
+ .none,
+ .load_frame,
+ .reserved_frame,
+ => {},
+ else => unreachable,
+ },
.unreach,
.undef,
.immediate,
@@ -695,15 +676,6 @@ const InstTracking = struct {
.load_symbol,
.lea_symbol,
=> assert(std.meta.eql(self.long, target.long)),
- .load_frame,
- .reserved_frame,
- => switch (target.long) {
- .none,
- .load_frame,
- .reserved_frame,
- => {},
- else => unreachable,
- },
.dead,
.eflags,
.register,
@@ -754,10 +726,11 @@ const InstTracking = struct {
tracking_log.debug("{} => {} (materialize)", .{ inst, self.* });
}
- fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void {
+ fn resurrect(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index, scope_generation: u32) !void {
switch (self.short) {
.dead => |die_generation| if (die_generation >= scope_generation) {
self.reuseFrame();
+ try function.getValue(self.short, inst);
tracking_log.debug("{} => {} (resurrect)", .{ inst, self.* });
},
else => {},
@@ -767,6 +740,7 @@ const InstTracking = struct {
fn die(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void {
if (self.short == .dead) return;
try function.freeValue(self.short);
+ if (self.long == .none) self.long = self.short;
self.short = .{ .dead = function.scope_generation };
tracking_log.debug("{} => {} (death)", .{ inst, self.* });
}
@@ -2359,7 +2333,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
- @setEvalBranchQuota(1_600);
+ @setEvalBranchQuota(1_700);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@@ -2520,7 +2494,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
try ops[0].toSlicePtr(cg);
var res: [1]Temp = undefined;
- cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
+ if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32 } },
},
@@ -2625,7 +2599,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
ops[1].tracking(cg),
}),
else => |e| return e,
- };
+ } else { // hack around Sema OPV bugs
+ res[0] = ops[0];
+ }
for (ops) |op| for (res) |r| {
if (op.index == r.index) break;
} else try op.die(cg);
@@ -2637,7 +2613,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
try ops[0].toSlicePtr(cg);
var res: [1]Temp = undefined;
- cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
+ if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32 } },
},
@@ -2757,7 +2733,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
ops[1].tracking(cg),
}),
else => |e| return e,
- };
+ } else {
+ // hack around Sema OPV bugs
+ res[0] = ops[0];
+ }
for (ops) |op| for (res) |r| {
if (op.index == r.index) break;
} else try op.die(cg);
@@ -2799,79 +2778,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
})) {
else => unreachable,
inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{
- .required_features = .{ .avx2, null, null, null },
- .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } },
- .patterns = &.{
- .{ .src = .{ .to_ymm, .mem } },
- .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_ymm, .to_ymm } },
- },
- .dst_temps = .{.{ .rc = .sse }},
- .each = .{ .once = &.{
- .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ },
- } },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } },
- .patterns = &.{
- .{ .src = .{ .to_ymm, .mem } },
- .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_ymm, .to_ymm } },
- },
- .dst_temps = .{.{ .rc = .sse }},
- .each = .{ .once = &.{
- .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ },
- } },
- }, .{
- .required_features = .{ .avx, null, null, null },
- .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
- .patterns = &.{
- .{ .src = .{ .to_xmm, .mem } },
- .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_xmm, .to_xmm } },
- },
- .dst_temps = .{.{ .rc = .sse }},
- .each = .{ .once = &.{
- .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ },
- } },
- }, .{
- .required_features = .{ .sse2, null, null, null },
- .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
- .patterns = &.{
- .{ .src = .{ .to_mut_xmm, .mem } },
- .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_mut_xmm, .to_xmm } },
- },
- .dst_temps = .{.{ .ref = .src0 }},
- .each = .{ .once = &.{
- .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ },
- } },
- }, .{
- .required_features = .{ .sse, null, null, null },
- .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } },
- .patterns = &.{
- .{ .src = .{ .to_mut_xmm, .mem } },
- .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_mut_xmm, .to_xmm } },
- },
- .dst_temps = .{.{ .ref = .src0 }},
- .each = .{ .once = &.{
- .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ },
- } },
- }, .{
- .required_features = .{ .mmx, null, null, null },
- .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } },
- .patterns = &.{
- .{ .src = .{ .to_mut_mm, .mem } },
- .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
- .{ .src = .{ .to_mut_mm, .to_mm } },
- },
- .dst_temps = .{.{ .ref = .src0 }},
- .each = .{ .once = &.{
- .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ },
- } },
- }, .{
- .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } },
+ .src_constraints = .{ .{ .size = .byte }, .{ .size = .byte } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8 } },
.{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2889,7 +2796,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ },
} },
}, .{
- .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } },
+ .src_constraints = .{ .{ .size = .word }, .{ .size = .word } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm16 } },
.{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2907,7 +2814,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ },
} },
}, .{
- .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } },
+ .src_constraints = .{ .{ .size = .dword }, .{ .size = .dword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm32 } },
.{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2926,7 +2833,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
- .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } },
+ .src_constraints = .{ .{ .size = .qword }, .{ .size = .qword } },
.patterns = &.{
.{ .src = .{ .mut_mem, .simm32 } },
.{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } },
@@ -2943,12 +2850,81 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.each = .{ .once = &.{
.{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ },
} },
+ }, .{
+ .required_features = .{ .mmx, null, null, null },
+ .src_constraints = .{ .{ .size = .qword }, .{ .size = .qword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_mm, .mem } },
+ .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_mm, .to_mm } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } },
+ .patterns = &.{
+ .{ .src = .{ .to_xmm, .mem } },
+ .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_xmm, .to_xmm } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse2, null, null, null },
+ .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ },
+ } },
+ }, .{
+ .required_features = .{ .sse, null, null, null },
+ .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } },
+ .patterns = &.{
+ .{ .src = .{ .to_mut_xmm, .mem } },
+ .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_mut_xmm, .to_xmm } },
+ },
+ .dst_temps = .{.{ .ref = .src0 }},
+ .each = .{ .once = &.{
+ .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ },
+ } },
}, .{
.required_features = .{ .avx2, null, null, null },
- .src_constraints = .{
- .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
- .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
+ .src_constraints = .{ .{ .size = .yword }, .{ .size = .yword } },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
},
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx, null, null, null },
+ .src_constraints = .{ .{ .size = .yword }, .{ .size = .yword } },
+ .patterns = &.{
+ .{ .src = .{ .to_ymm, .mem } },
+ .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+ .{ .src = .{ .to_ymm, .to_ymm } },
+ },
+ .dst_temps = .{.{ .rc = .sse }},
+ .each = .{ .once = &.{
+ .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ },
+ } },
+ }, .{
+ .required_features = .{ .avx2, null, null, null },
+ .src_constraints = .{ .{ .multiple_size = .yword }, .{ .multiple_size = .yword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2972,10 +2948,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
- .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } },
- },
+ .src_constraints = .{ .{ .multiple_size = .yword }, .{ .multiple_size = .yword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -2999,10 +2972,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_features = .{ .avx, null, null, null },
- .src_constraints = .{
- .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
- .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
- },
+ .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3026,10 +2996,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_features = .{ .sse2, null, null, null },
- .src_constraints = .{
- .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
- .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
- },
+ .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3053,10 +3020,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_features = .{ .sse, null, null, null },
- .src_constraints = .{
- .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
- .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } },
- },
+ .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3080,10 +3044,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} },
}, .{
.required_features = .{ .mmx, null, null, null },
- .src_constraints = .{
- .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
- .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
- },
+ .src_constraints = .{ .{ .multiple_size = .qword }, .{ .multiple_size = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -3106,10 +3067,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
- .src_constraints = .{
- .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
- .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } },
- },
+ .src_constraints = .{ .{ .multiple_size = .qword }, .{ .multiple_size = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem } },
},
@@ -6983,7 +6941,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
switch (extra.compareOperator()) {
- .eq, .neq => {},
+ .eq, .neq => if (cg.typeOf(extra.lhs).scalarType(zcu).isRuntimeFloat())
+ break :fallback try cg.airCmpVector(inst),
else => break :fallback try cg.airCmpVector(inst),
}
var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs });
@@ -9763,22 +9722,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.auto, .@"extern" => @intCast(agg_ty.structFieldOffset(extra.field_index, zcu)),
.@"packed" => break :fallback try cg.airStructFieldVal(inst),
};
- if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
- var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand});
- var res = try ops[0].read(field_ty, .{ .disp = field_off }, cg);
- for (ops) |op| if (op.index != res.index) try op.die(cg);
- try res.moveTo(inst, cg);
- } else {
- // hack around Sema OPV bugs
- const res = try cg.tempInit(field_ty, .none);
- try res.moveTo(inst, cg);
- }
+ var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand});
+ // hack around Sema OPV bugs
+ var res = if (field_ty.hasRuntimeBitsIgnoreComptime(zcu))
+ try ops[0].read(field_ty, .{ .disp = field_off }, cg)
+ else
+ try cg.tempInit(field_ty, .none);
+ for (ops) |op| if (op.index != res.index) try op.die(cg);
+ try res.moveTo(inst, cg);
},
.set_union_tag => if (use_old) try cg.airSetUnionTag(inst) else {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
const union_ty = cg.typeOf(bin_op.lhs).childType(zcu);
- var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
const union_layout = union_ty.unionGetLayout(zcu);
+ var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
// hack around Sema OPV bugs
if (union_layout.tag_size > 0) try ops[0].store(&ops[1], .{
.disp = @intCast(union_layout.tagOffset()),
@@ -9834,11 +9791,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.ptr_elem_val => try cg.airPtrElemVal(inst),
} else {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
+ const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu);
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
try ops[0].toSlicePtr(cg);
var res: [1]Temp = undefined;
- const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu);
- cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{
+ if (res_ty.hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{
.dst_constraints = .{.{ .int = .byte }},
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32 } },
@@ -9912,51 +9869,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .dst0q, .leasi(.qword, .src0, .@"8", .src1), ._, ._ },
} },
} }) catch |err| switch (err) {
- error.SelectFailed => switch (res_ty.abiSize(zcu)) {
- // hack around Sema OPV bugs
- 0 => res[0] = try cg.tempInit(res_ty, .none),
- else => |elem_size| {
- while (true) for (&ops) |*op| {
- if (try op.toRegClass(true, .general_purpose, cg)) break;
- } else break;
- const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
- const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
- if (!std.math.isPowerOfTwo(elem_size)) {
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterRegisterImmediate(
- .{ .i_, .mul },
- rhs_reg,
- rhs_reg,
- .u(elem_size),
- );
- try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
- .base = .{ .reg = lhs_reg },
- .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
- });
- } else if (elem_size > 8) {
- try cg.spillEflagsIfOccupied();
- try cg.asmRegisterImmediate(
- .{ ._l, .sh },
- rhs_reg,
- .u(std.math.log2_int(u64, elem_size)),
- );
- try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
- .base = .{ .reg = lhs_reg },
- .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
- });
- } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
+ error.SelectFailed => {
+ const elem_size = res_ty.abiSize(zcu);
+ while (true) for (&ops) |*op| {
+ if (try op.toRegClass(true, .general_purpose, cg)) break;
+ } else break;
+ const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64();
+ const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64();
+ if (!std.math.isPowerOfTwo(elem_size)) {
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterRegisterImmediate(
+ .{ .i_, .mul },
+ rhs_reg,
+ rhs_reg,
+ .u(elem_size),
+ );
+ try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
.base = .{ .reg = lhs_reg },
- .mod = .{ .rm = .{
- .size = .qword,
- .index = rhs_reg,
- .scale = .fromFactor(@intCast(elem_size)),
- } },
+ .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
});
- res[0] = try ops[0].load(res_ty, .{}, cg);
- },
+ } else if (elem_size > 8) {
+ try cg.spillEflagsIfOccupied();
+ try cg.asmRegisterImmediate(
+ .{ ._l, .sh },
+ rhs_reg,
+ .u(std.math.log2_int(u64, elem_size)),
+ );
+ try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
+ .base = .{ .reg = lhs_reg },
+ .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } },
+ });
+ } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{
+ .base = .{ .reg = lhs_reg },
+ .mod = .{ .rm = .{
+ .size = .qword,
+ .index = rhs_reg,
+ .scale = .fromFactor(@intCast(elem_size)),
+ } },
+ });
+ res[0] = try ops[0].load(res_ty, .{}, cg);
},
else => |e| return e,
- };
+ } else {
+ // hack around Sema OPV bugs
+ res[0] = try cg.tempInit(res_ty, .none);
+ }
for (ops) |op| for (res) |r| {
if (op.index == r.index) break;
} else try op.die(cg);
@@ -10499,7 +10456,7 @@ fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, co
if (opts.resurrect) for (
self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len],
self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len],
- ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation);
+ ) |inst, *tracking| try tracking.resurrect(self, inst, state.scope_generation);
for (deaths) |death| try self.processDeath(death);
const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).array.len]RegisterLock;
@@ -10879,7 +10836,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_ty = self.typeOfIndex(inst);
const result = @as(?MCValue, result: {
- const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
+ const src_abi_size: u31 = @intCast(src_ty.abiSize(zcu));
+ const dst_abi_size: u31 = @intCast(dst_ty.abiSize(zcu));
const src_int_info = src_ty.intInfo(zcu);
const dst_int_info = dst_ty.intInfo(zcu);
@@ -10890,7 +10848,6 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
const src_mcv = try self.resolveInst(ty_op.operand);
if (dst_ty.isVector(zcu)) {
- const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu));
const max_abi_size = @max(dst_abi_size, src_abi_size);
if (max_abi_size > self.vectorSize(.int)) break :result null;
const has_avx = self.hasFeature(.avx);
@@ -11060,7 +11017,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
else => src_int_info.bits,
};
- const dst_mcv = if (dst_int_info.bits <= src_storage_bits and
+ const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else src_abi_size > 8) and
+ dst_int_info.bits <= src_storage_bits and
std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable ==
std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and
self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
@@ -11079,8 +11037,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) };
}
- const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
- const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable;
+ const src_limbs_len = std.math.divCeil(u31, src_abi_size, 8) catch unreachable;
+ const dst_limbs_len = @divExact(dst_abi_size, 8);
const high_mcv: MCValue = if (dst_mcv.isBase())
dst_mcv.address().offset((src_limbs_len - 1) * 8).deref()
@@ -12067,6 +12025,7 @@ fn genSetFrameTruncatedOverflowCompare(
defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
const ty = tuple_ty.fieldType(0, zcu);
+ const ty_size = ty.abiSize(zcu);
const int_info = ty.intInfo(zcu);
const hi_bits = (int_info.bits - 1) % 64 + 1;
@@ -12100,6 +12059,13 @@ fn genSetFrameTruncatedOverflowCompare(
try self.asmSetccRegister(.ne, eq_reg.to8());
try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg });
}
+ try self.genSetMem(
+ .{ .frame = frame_index },
+ @intCast(tuple_ty.structFieldOffset(1, zcu)),
+ tuple_ty.fieldType(1, zcu),
+ if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
+ .{},
+ );
const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu));
if (hi_limb_off > 0) try self.genSetMem(
@@ -12116,13 +12082,20 @@ fn genSetFrameTruncatedOverflowCompare(
.{ .register = scratch_reg },
.{},
);
- try self.genSetMem(
- .{ .frame = frame_index },
- @intCast(tuple_ty.structFieldOffset(1, zcu)),
- tuple_ty.fieldType(1, zcu),
- if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
- .{},
- );
+ var ext_off: i32 = hi_limb_off + 8;
+ if (ext_off < ty_size) {
+ switch (int_info.signedness) {
+ .signed => try self.asmRegisterImmediate(.{ ._r, .sa }, scratch_reg.to64(), .s(63)),
+ .unsigned => try self.asmRegisterRegister(.{ ._, .xor }, scratch_reg.to32(), scratch_reg.to32()),
+ }
+ while (ext_off < ty_size) : (ext_off += 8) try self.genSetMem(
+ .{ .frame = frame_index },
+ payload_off + ext_off,
+ limb_ty,
+ .{ .register = scratch_reg },
+ .{},
+ );
+ }
}
fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
@@ -13581,9 +13554,12 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
.{ ._, .bt },
.{
.base = .{ .frame = frame_index },
- .mod = .{ .rm = .{ .size = .qword } },
+ .mod = .{ .rm = .{
+ .size = .qword,
+ .disp = @intCast(index_imm / 64 * 8),
+ } },
},
- .u(index_imm),
+ .u(index_imm % 64),
),
else => try self.asmMemoryRegister(
.{ ._, .bt },
@@ -13603,8 +13579,11 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
.load_frame => switch (index_mcv) {
.immediate => |index_imm| try self.asmMemoryImmediate(
.{ ._, .bt },
- try array_mat_mcv.mem(self, .{ .size = .qword }),
- .u(index_imm),
+ try array_mat_mcv.mem(self, .{
+ .size = .qword,
+ .disp = @intCast(index_imm / 64 * 8),
+ }),
+ .u(index_imm % 64),
),
else => try self.asmMemoryRegister(
.{ ._, .bt },
@@ -13622,9 +13601,12 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void {
.base = .{
.reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()),
},
- .mod = .{ .rm = .{ .size = .qword } },
+ .mod = .{ .rm = .{
+ .size = .qword,
+ .disp = @intCast(index_imm / 64 * 8),
+ } },
},
- .u(index_imm),
+ .u(index_imm % 64),
),
else => try self.asmMemoryRegister(
.{ ._, .bt },
@@ -14451,13 +14433,18 @@ fn genByteSwap(
return src_mcv;
},
9...16 => {
- switch (src_mcv) {
+ const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) {
+ .register => {
+ const frame_index = try self.allocFrameIndex(.initSpill(src_ty, zcu));
+ try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{});
+ break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } };
+ },
.register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64());
return .{ .register_pair = .{ src_regs[1], src_regs[0] } };
- },
- else => {},
- }
+ } else src_mcv,
+ else => src_mcv,
+ };
const dst_regs =
try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
@@ -14465,18 +14452,18 @@ fn genByteSwap(
defer for (dst_locks) |lock| self.register_manager.unlockReg(lock);
for (dst_regs, 0..) |dst_reg, limb_index| {
- if (src_mcv.isBase()) {
+ if (mat_src_mcv.isBase()) {
try self.asmRegisterMemory(
.{ ._, if (has_movbe) .movbe else .mov },
dst_reg.to64(),
- try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }),
+ try mat_src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }),
);
if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
} else {
try self.asmRegisterRegister(
.{ ._, .mov },
dst_reg.to64(),
- src_mcv.register_pair[limb_index].to64(),
+ mat_src_mcv.register_pair[limb_index].to64(),
);
try self.asmRegister(.{ ._, .bswap }, dst_reg.to64());
}
@@ -15680,6 +15667,15 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue)
const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
defer self.register_manager.unlockReg(ptr_lock);
+ const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) {
+ .register => if (src_bit_size > 64) {
+ const frame_index = try self.allocFrameIndex(.initSpill(src_ty, self.pt.zcu));
+ try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{});
+ break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } };
+ } else src_mcv,
+ else => src_mcv,
+ };
+
var limb_i: u16 = 0;
while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
const part_bit_off = if (limb_i == 0) src_bit_off else 0;
@@ -15712,7 +15708,7 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue)
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- try self.genSetReg(tmp_reg, limb_ty, src_mcv, .{});
+ try self.genSetReg(tmp_reg, limb_ty, mat_src_mcv, .{});
switch (limb_i) {
0 => try self.genShiftBinOpMir(
.{ ._l, .sh },
@@ -15743,8 +15739,8 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue)
defer self.register_manager.unlockReg(tmp_lock);
try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) {
- 0 => src_mcv,
- else => src_mcv.address().offset(limb_i * limb_abi_size).deref(),
+ 0 => mat_src_mcv,
+ else => mat_src_mcv.address().offset(limb_i * limb_abi_size).deref(),
}, .{});
try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
try self.asmMemoryRegister(
@@ -17228,7 +17224,7 @@ fn genMulDivBinOp(
dst_mcv.address(),
lhs_mcv.address(),
rhs_mcv.address(),
- .{ .immediate = src_info.bits },
+ .{ .immediate = 8 * src_abi_size },
}, .{});
return dst_mcv;
},
@@ -17246,7 +17242,8 @@ fn genMulDivBinOp(
const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
- const signedness = ty.intInfo(zcu).signedness;
+ const int_info = ty.intInfo(zcu);
+ const signedness = int_info.signedness;
switch (tag) {
.mul,
.mul_wrap,
@@ -17279,6 +17276,15 @@ fn genMulDivBinOp(
},
}, ty, lhs_mcv, rhs_mcv);
+ switch (tag) {
+ .mul, .rem, .div_trunc, .div_exact => {},
+ .mul_wrap => if (dst_ty.intInfo(zcu).bits < 8 * dst_abi_size) try self.truncateRegister(
+ dst_ty,
+ if (dst_abi_size <= 8) .rax else .rdx,
+ ),
+ else => unreachable,
+ }
+
if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) {
.mul, .mul_wrap, .div_trunc, .div_exact => .rax,
.rem => .rdx,
@@ -21954,8 +21960,9 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(.usize, ptr_mcv) } };
};
};
- if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| {
- _ = self.register_manager.lockReg(reg);
+ if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |tracked_index| {
+ try self.register_manager.getRegIndex(tracked_index, if (output == .none) inst else null);
+ _ = self.register_manager.lockRegIndexAssumeUnused(tracked_index);
};
if (!std.mem.eql(u8, name, "_"))
arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len));
@@ -22881,7 +22888,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128());
try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128());
- try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128());
+ try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), tmp_reg.to128());
}
return;
} else unreachable,
@@ -23831,10 +23838,12 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void {
const dst_rc = self.regSetForType(dst_ty);
const src_rc = self.regSetForType(src_ty);
- const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
+ const src_lock = if (src_mcv.getReg()) |src_reg| self.register_manager.lockReg(src_reg) else null;
defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
- const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and
+ const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else true) and
+ dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and
+ dst_ty.abiAlignment(zcu).order(src_ty.abiAlignment(zcu)).compare(.lte) and
self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
const dst_mcv = try self.allocRegOrMem(inst, true);
try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) {
@@ -27702,7 +27711,7 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
reg
else
unreachable,
- .x87 => if (size_bytes == 16)
+ .x87 => if (size_bytes >= 10 and size_bytes <= 16)
reg
else
unreachable,
@@ -28574,23 +28583,19 @@ const Temp = struct {
try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }),
);
},
- .register => |val_reg| try ptr.storeReg(val_ty, registerAlias(
+ .register => |val_reg| try ptr.storeRegs(val_ty, &.{registerAlias(
val_reg,
@intCast(val_ty.abiSize(cg.pt.zcu)),
- ), cg),
+ )}, cg),
inline .register_pair,
.register_triple,
.register_quadruple,
- => |val_regs| for (val_regs) |val_reg| {
- try ptr.storeReg(val_ty, val_reg, cg);
- try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg);
- while (try ptr.toLea(cg)) {}
- },
+ => |val_regs| try ptr.storeRegs(val_ty, &val_regs, cg),
.register_offset => |val_reg_off| switch (val_reg_off.off) {
- 0 => try ptr.storeReg(val_ty, registerAlias(
+ 0 => try ptr.storeRegs(val_ty, &.{registerAlias(
val_reg_off.reg,
@intCast(val_ty.abiSize(cg.pt.zcu)),
- ), cg),
+ )}, cg),
else => continue :val_to_gpr,
},
.register_overflow => |val_reg_ov| {
@@ -28608,7 +28613,7 @@ const Temp = struct {
else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
});
const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
- try ptr.storeReg(first_ty, registerAlias(val_reg_ov.reg, first_size), cg);
+ try ptr.storeRegs(first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
try ptr.toOffset(first_size, cg);
try cg.asmSetccMemory(
val_reg_ov.eflags,
@@ -28675,6 +28680,15 @@ const Temp = struct {
const val_mcv = val.tracking(cg).short;
switch (val_mcv) {
else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
+ .undef => if (opts.safe) {
+ var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp));
+ var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa });
+ var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
+ try dst_ptr.memset(&pat, &len, cg);
+ try dst_ptr.die(cg);
+ try pat.die(cg);
+ try len.die(cg);
+ },
.immediate => |val_imm| {
const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31|
.u(val_uimm31)
@@ -28691,24 +28705,52 @@ const Temp = struct {
val_op,
);
},
- .register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias(
+ .eflags => |cc| try cg.asmSetccMemory(
+ cc,
+ try dst.tracking(cg).short.mem(cg, .{
+ .size = .byte,
+ .disp = opts.disp,
+ }),
+ ),
+ .register => |val_reg| try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
val_reg,
@intCast(val_ty.abiSize(cg.pt.zcu)),
- ), cg),
- inline .register_pair, .register_triple, .register_quadruple => |val_regs| {
- var disp = opts.disp;
- for (val_regs) |val_reg| {
- try dst.writeReg(disp, val_ty, val_reg, cg);
- disp += @divExact(val_reg.bitSize(), 8);
- }
- },
+ )}, cg),
+ inline .register_pair,
+ .register_triple,
+ .register_quadruple,
+ => |val_regs| try dst.writeRegs(opts.disp, val_ty, &val_regs, cg),
.register_offset => |val_reg_off| switch (val_reg_off.off) {
- 0 => try dst.writeReg(opts.disp, val_ty, registerAlias(
+ 0 => try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
val_reg_off.reg,
@intCast(val_ty.abiSize(cg.pt.zcu)),
- ), cg),
+ )}, cg),
else => continue :val_to_gpr,
},
+ .register_overflow => |val_reg_ov| {
+ const ip = &cg.pt.zcu.intern_pool;
+ const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) {
+ .tuple_type => |tuple_type| {
+ const tuple_field_types = tuple_type.types.get(ip);
+ assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type);
+ break :first_ty tuple_field_types[0];
+ },
+ .opt_type => |opt_child| {
+ assert(!val_ty.optionalReprIsPayload(cg.pt.zcu));
+ break :first_ty opt_child;
+ },
+ else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
+ });
+ const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
+ try dst.writeRegs(opts.disp, first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
+ try cg.asmSetccMemory(
+ val_reg_ov.eflags,
+ try dst.tracking(cg).short.mem(cg, .{
+ .size = .byte,
+ .disp = opts.disp + first_size,
+ }),
+ );
+ },
.lea_frame, .lea_symbol => continue :val_to_gpr,
.memory, .indirect, .load_frame, .load_symbol => {
var dst_ptr =
@@ -28739,33 +28781,47 @@ const Temp = struct {
}));
}
- fn storeReg(ptr: *Temp, src_ty: Type, src_reg: Register, cg: *CodeGen) !void {
- const src_rc = src_reg.class();
- const src_abi_size = src_ty.abiSize(cg.pt.zcu);
- const strat = try cg.moveStrategy(src_ty, src_rc, false);
- // hack around linker relocation bugs
- switch (ptr.tracking(cg).short) {
- else => {},
- .lea_symbol => |sym_off| if (src_rc != .general_purpose or sym_off.off != 0)
- while (try ptr.toRegClass(false, .general_purpose, cg)) {},
- }
- if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) {
- try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{
- .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())),
- }), src_reg);
- } else {
- const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu);
- const frame_index = try cg.allocFrameIndex(frame_alloc);
- const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size);
- try strat.write(cg, .{
- .base = .{ .frame = frame_index },
- .mod = .{ .rm = .{ .size = frame_size } },
- }, src_reg);
- var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
- var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
- try ptr.memcpy(&src_ptr, &len, cg);
- try src_ptr.die(cg);
- try len.die(cg);
+ fn storeRegs(ptr: *Temp, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void {
+ var part_disp: u31 = 0;
+ var deferred_disp: u31 = 0;
+ var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
+ for (src_regs) |src_reg| {
+ const src_rc = src_reg.class();
+ const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
+ const part_size = @divExact(part_bit_size, 8);
+ if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
+ // hack around linker relocation bugs
+ switch (ptr.tracking(cg).short) {
+ else => {},
+ .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
+ }
+ const strat = try cg.moveStrategy(src_ty, src_rc, false);
+ try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{
+ .size = .fromBitSize(part_bit_size),
+ .disp = part_disp,
+ }), registerAlias(src_reg, part_size));
+ } else {
+ const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size);
+ const frame_index = try cg.allocFrameIndex(.init(.{
+ .size = frame_size,
+ .alignment = .fromNonzeroByteUnits(frame_size),
+ }));
+ const strat = try cg.moveStrategy(src_ty, src_rc, true);
+ try strat.write(cg, .{
+ .base = .{ .frame = frame_index },
+ .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
+ }, registerAlias(src_reg, frame_size));
+ try ptr.toOffset(deferred_disp, cg);
+ deferred_disp = 0;
+ var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
+ var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
+ try ptr.memcpy(&src_ptr, &len, cg);
+ try src_ptr.die(cg);
+ try len.die(cg);
+ }
+ part_disp += part_size;
+ deferred_disp += part_size;
+ src_abi_size -= part_size;
}
}
@@ -28777,30 +28833,41 @@ const Temp = struct {
}));
}
- fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) !void {
- const src_rc = src_reg.class();
- const src_abi_size = src_ty.abiSize(cg.pt.zcu);
- const strat = try cg.moveStrategy(src_ty, src_rc, false);
- if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) {
- try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
- .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())),
- .disp = disp,
- }), src_reg);
- } else {
- const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu);
- const frame_index = try cg.allocFrameIndex(frame_alloc);
- const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size);
- try strat.write(cg, .{
- .base = .{ .frame = frame_index },
- .mod = .{ .rm = .{ .size = frame_size } },
- }, src_reg);
- var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
- var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
- var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
- try dst_ptr.memcpy(&src_ptr, &len, cg);
- try dst_ptr.die(cg);
- try src_ptr.die(cg);
- try len.die(cg);
+ fn writeRegs(dst: Temp, disp: i32, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void {
+ var part_disp = disp;
+ var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
+ for (src_regs) |src_reg| {
+ const src_rc = src_reg.class();
+ const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
+ const part_size = @divExact(part_bit_size, 8);
+ if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
+ const strat = try cg.moveStrategy(src_ty, src_rc, false);
+ try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
+ .size = .fromBitSize(part_bit_size),
+ .disp = part_disp,
+ }), registerAlias(src_reg, part_size));
+ } else {
+ const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size);
+ const frame_index = try cg.allocFrameIndex(.init(.{
+ .size = frame_size,
+ .alignment = .fromNonzeroByteUnits(frame_size),
+ }));
+ const strat = try cg.moveStrategy(src_ty, src_rc, true);
+ try strat.write(cg, .{
+ .base = .{ .frame = frame_index },
+ .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
+ }, registerAlias(src_reg, frame_size));
+ var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
+ try dst_ptr.toOffset(part_disp, cg);
+ var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
+ var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
+ try dst_ptr.memcpy(&src_ptr, &len, cg);
+ try dst_ptr.die(cg);
+ try src_ptr.die(cg);
+ try len.die(cg);
+ }
+ part_disp += part_size;
+ src_abi_size -= part_size;
}
}
@@ -29123,8 +29190,8 @@ const Select = struct {
signed_int_vec: Memory.Size,
signed_int_or_full_vec: Memory.Size,
unsigned_int_vec: Memory.Size,
- int_or_vec: Memory.Size,
- exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size },
+ size: Memory.Size,
+ multiple_size: Memory.Size,
int: Memory.Size,
scalar_int: Memory.Size,
scalar_signed_int: Memory.Size,
@@ -29170,15 +29237,8 @@ const Select = struct {
} else false,
.unsigned_int_vec => |size| ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu) and
if (intInfo(ty.childType(zcu), cg)) |int_info| int_info.signedness == .unsigned else false,
- .int_or_vec => |size| if (intInfo(ty, cg)) |int_info|
- size.bitSize(cg.target) >= int_info.bits
- else
- ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu),
- .exact_remainder_int_or_vec => |of_is| if (intInfo(ty, cg)) |int_info|
- of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1
- else
- ty.isVector(zcu) and ty.childType(zcu).toIntern() != .bool_type and
- of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1,
+ .size => |size| size.bitSize(cg.target) >= 8 * ty.abiSize(zcu),
+ .multiple_size => |size| size.bitSize(cg.target) % 8 * ty.abiSize(zcu) == 0,
.int => |size| if (intInfo(ty, cg)) |int_info| size.bitSize(cg.target) >= int_info.bits else false,
.scalar_int => |size| if (intInfo(ty.scalarType(zcu), cg)) |int_info|
size.bitSize(cg.target) >= int_info.bits
src/main.zig
@@ -39,6 +39,11 @@ test {
_ = Package;
}
+const thread_stack_size = switch (builtin.zig_backend) {
+ else => std.Thread.SpawnConfig.default_stack_size,
+ .stage2_x86_64 => 32 << 20,
+};
+
pub const std_options: std.Options = .{
.wasiCwd = wasi_cwd,
.logFn = log,
@@ -3320,6 +3325,7 @@ fn buildOutputType(
.allocator = gpa,
.n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
.track_ids = true,
+ .stack_size = thread_stack_size,
});
defer thread_pool.deinit();
@@ -5024,6 +5030,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
.allocator = gpa,
.n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
.track_ids = true,
+ .stack_size = thread_stack_size,
});
defer thread_pool.deinit();
@@ -5460,6 +5467,7 @@ fn jitCmd(
.allocator = gpa,
.n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
.track_ids = true,
+ .stack_size = thread_stack_size,
});
defer thread_pool.deinit();
src/register_manager.zig
@@ -58,11 +58,6 @@ pub fn RegisterManager(
return @alignCast(@fieldParentPtr("register_manager", self));
}
- fn excludeRegister(reg: Register, register_class: RegisterBitSet) bool {
- const index = indexOfRegIntoTracked(reg) orelse return true;
- return !register_class.isSet(index);
- }
-
fn markRegIndexAllocated(self: *Self, tracked_index: TrackedIndex) void {
self.allocated_registers.set(tracked_index);
}
@@ -234,28 +229,20 @@ pub fn RegisterManager(
) ?[count]Register {
comptime assert(count > 0 and count <= tracked_registers.len);
- var free_and_not_locked_registers = self.free_registers;
- free_and_not_locked_registers.setIntersection(register_class);
-
- var unlocked_registers = self.locked_registers;
- unlocked_registers.toggleAll();
-
- free_and_not_locked_registers.setIntersection(unlocked_registers);
-
- if (free_and_not_locked_registers.count() < count) return null;
+ var free_and_unlocked_registers = self.locked_registers;
+ free_and_unlocked_registers.toggleAll();
+ free_and_unlocked_registers.setIntersection(self.free_registers);
+ free_and_unlocked_registers.setIntersection(register_class);
var regs: [count]Register = undefined;
var i: usize = 0;
- for (tracked_registers) |reg| {
- if (i >= count) break;
- if (excludeRegister(reg, register_class)) continue;
- if (self.isRegLocked(reg)) continue;
- if (!self.isRegFree(reg)) continue;
-
- regs[i] = reg;
+ var it = free_and_unlocked_registers.iterator(.{});
+ while (it.next()) |reg_index| {
+ regs[i] = regAtTrackedIndex(@intCast(reg_index));
i += 1;
+ if (i >= count) break;
}
- assert(i == count);
+ if (i < count) return null;
for (regs, insts) |reg, inst| {
log.debug("tryAllocReg {} for inst {?}", .{ reg, inst });
@@ -290,46 +277,27 @@ pub fn RegisterManager(
) AllocationError![count]Register {
comptime assert(count > 0 and count <= tracked_registers.len);
- var locked_registers = self.locked_registers;
- locked_registers.setIntersection(register_class);
-
- if (count > register_class.count() - locked_registers.count()) return error.OutOfRegisters;
-
const result = self.tryAllocRegs(count, insts, register_class) orelse blk: {
+ var unlocked_registers = self.locked_registers;
+ unlocked_registers.toggleAll();
+ unlocked_registers.setIntersection(register_class);
+
// We'll take over the first count registers. Spill
// the instructions that were previously there to a
// stack allocations.
var regs: [count]Register = undefined;
var i: usize = 0;
- for (tracked_registers) |reg| {
- if (i >= count) break;
- if (excludeRegister(reg, register_class)) break;
- if (self.isRegLocked(reg)) continue;
-
- log.debug("allocReg {} for inst {?}", .{ reg, insts[i] });
- regs[i] = reg;
- self.markRegAllocated(reg);
- const index = indexOfRegIntoTracked(reg).?; // indexOfReg() on a callee-preserved reg should never return null
- if (insts[i]) |inst| {
- // Track the register
- if (self.isRegFree(reg)) {
- self.markRegUsed(reg);
- } else {
- const spilled_inst = self.registers[index];
- try self.getFunction().spillInstruction(reg, spilled_inst);
- }
- self.registers[index] = inst;
- } else {
- // Don't track the register
- if (!self.isRegFree(reg)) {
- const spilled_inst = self.registers[index];
- try self.getFunction().spillInstruction(reg, spilled_inst);
- self.freeReg(reg);
- }
- }
-
+ var it = unlocked_registers.iterator(.{});
+ while (it.next()) |reg_index| {
+ const tracked_index: TrackedIndex = @intCast(reg_index);
+ if (!self.isRegIndexFree(tracked_index) and
+ self.registers[tracked_index].unwrap() == .target) continue;
+ try self.getRegIndex(tracked_index, insts[i]);
+ regs[i] = regAtTrackedIndex(tracked_index);
i += 1;
+ if (i >= count) break;
}
+ if (i < count) return error.OutOfRegisters;
break :blk regs;
};
@@ -351,7 +319,7 @@ pub fn RegisterManager(
/// Spills the register if it is currently allocated. If a
/// corresponding instruction is passed, will also track this
/// register.
- fn getRegIndex(
+ pub fn getRegIndex(
self: *Self,
tracked_index: TrackedIndex,
inst: ?Air.Inst.Index,
test/behavior/x86_64/math.zig
@@ -742,6 +742,16 @@ fn testBinary(comptime op: anytype) !void {
0xb7935f5c2f3b1ae7a422c0a7c446884294b7d5370bada307d2fe5a4c4284a999,
0x310e6e196ba4f143b8d285ca6addf7f3bb3344224aff221b27607a31e148be08,
);
+ try testType(
+ u258,
+ 0x186d5ddaab8cb8cb04e5b41e36f812e039d008baf49f12894c39e29a07796d800,
+ 0x2072daba6ffad168826163eb136f6d28ca4360c8e7e5e41e29755e19e4753a4f5,
+ );
+ try testType(
+ u495,
+ 0x6eaf4e252b3bf74b75bac59e0b43ca5326bad2a25b3fdb74a67ef132ac5e47d72eebc3316fb2351ee66c50dc5afb92a75cea9b0e35160652c7db39eeb158,
+ 0x49fbed744a92b549d8c05bb3512c617d24dd824f3f69bdf3923bc326a75674b85f5b828d2566fab9c86f571d12c2a63c9164feb0d191d27905533d09622a,
+ );
try testType(
u512,
0xe5b1fedca3c77db765e517aabd05ffc524a3a8aff1784bbf67c45b894447ede32b65b9940e78173c591e56e078932d465f235aece7ad47b7f229df7ba8f12295,