Commit 54ed62755f

Jacob Young <jacobly0@users.noreply.github.com>
2025-03-08 03:04:24
x86_64: implement write register splitting
1 parent f8f2a3e
Changed files (3)
src
arch
codegen
src/arch/x86_64/abi.zig
@@ -100,7 +100,7 @@ pub const Context = enum { ret, arg, field, other };
 
 /// There are a maximum of 8 possible return slots. Returned values are in
 /// the beginning of the array; unused slots are filled with .none.
-pub fn classifySystemV(ty: Type, zcu: *Zcu, target: std.Target, ctx: Context) [8]Class {
+pub fn classifySystemV(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) [8]Class {
     const memory_class = [_]Class{
         .memory, .none, .none, .none,
         .none,   .none, .none, .none,
@@ -148,7 +148,7 @@ pub fn classifySystemV(ty: Type, zcu: *Zcu, target: std.Target, ctx: Context) [8
             result[0] = .integer;
             return result;
         },
-        .float => switch (ty.floatBits(target)) {
+        .float => switch (ty.floatBits(target.*)) {
             16 => {
                 if (ctx == .field) {
                     result[0] = .memory;
@@ -330,7 +330,7 @@ fn classifySystemVStruct(
     starting_byte_offset: u64,
     loaded_struct: InternPool.LoadedStructType,
     zcu: *Zcu,
-    target: std.Target,
+    target: *const std.Target,
 ) u64 {
     const ip = &zcu.intern_pool;
     var byte_offset = starting_byte_offset;
@@ -379,7 +379,7 @@ fn classifySystemVUnion(
     starting_byte_offset: u64,
     loaded_union: InternPool.LoadedUnionType,
     zcu: *Zcu,
-    target: std.Target,
+    target: *const std.Target,
 ) u64 {
     const ip = &zcu.intern_pool;
     for (0..loaded_union.field_types.len) |field_index| {
src/arch/x86_64/CodeGen.zig
@@ -95074,7 +95074,7 @@ fn moveStrategy(cg: *CodeGen, ty: Type, class: Register.Class, aligned: bool) !M
         .mmx => {},
         .sse => switch (ty.zigTypeTag(zcu)) {
             else => {
-                const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, cg.target.*, .other), .none);
+                const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, cg.target, .other), .none);
                 assert(std.mem.indexOfNone(abi.Class, classes, &.{
                     .integer, .sse, .sseup, .memory, .float, .float_combine,
                 }) == null);
@@ -99706,7 +99706,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
             const overflow_arg_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 8 } };
             const reg_save_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 16 } };
 
-            const classes = std.mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target.*, .arg), .none);
+            const classes = std.mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target, .arg), .none);
             switch (classes[0]) {
                 .integer => {
                     assert(classes.len == 1);
@@ -100051,7 +100051,7 @@ fn resolveCallingConventionValues(
                 var ret_tracking_i: usize = 0;
 
                 const classes = switch (cc) {
-                    .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target.*, .ret), .none),
+                    .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target, .ret), .none),
                     .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu)},
                     else => unreachable,
                 };
@@ -100140,7 +100140,7 @@ fn resolveCallingConventionValues(
                 var arg_mcv_i: usize = 0;
 
                 const classes = switch (cc) {
-                    .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .arg), .none),
+                    .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target, .arg), .none),
                     .x86_64_win => &.{abi.classifyWindows(ty, zcu)},
                     else => unreachable,
                 };
@@ -100444,7 +100444,7 @@ fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Ty
         error.DivisionByZero => unreachable,
         error.UnexpectedRemainder => {},
     };
-    const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none);
+    const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target, .other), .none);
     if (classes.len == parts_len) for (&parts, classes, 0..) |*part, class, part_i| {
         part.* = switch (class) {
             .integer => if (part_i < parts_len - 1)
@@ -101443,19 +101443,19 @@ const Temp = struct {
                         .disp = opts.disp,
                     }),
                 ),
-                .register => |val_reg| try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
+                .register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias(
                     val_reg,
                     @intCast(val_ty.abiSize(cg.pt.zcu)),
-                )}, cg),
+                ), cg),
                 inline .register_pair,
                 .register_triple,
                 .register_quadruple,
                 => |val_regs| try dst.writeRegs(opts.disp, val_ty, &val_regs, cg),
                 .register_offset => |val_reg_off| switch (val_reg_off.off) {
-                    0 => try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
+                    0 => try dst.writeReg(opts.disp, val_ty, registerAlias(
                         val_reg_off.reg,
                         @intCast(val_ty.abiSize(cg.pt.zcu)),
-                    )}, cg),
+                    ), cg),
                     else => continue :val_to_gpr,
                 },
                 .register_overflow => |val_reg_ov| {
@@ -101473,7 +101473,7 @@ const Temp = struct {
                         else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
                     });
                     const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
-                    try dst.writeRegs(opts.disp, first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
+                    try dst.writeReg(opts.disp, first_ty, registerAlias(val_reg_ov.reg, first_size), cg);
                     try cg.asmSetccMemory(
                         val_reg_ov.eflags,
                         try dst.tracking(cg).short.mem(cg, .{
@@ -101564,17 +101564,79 @@ const Temp = struct {
         }));
     }
 
+    fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) InnerError!void {
+        const src_abi_size: u31 = @intCast(src_ty.abiSize(cg.pt.zcu));
+        const src_rc = src_reg.class();
+        if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) {
+            const strat = try cg.moveStrategy(src_ty, src_rc, false);
+            try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
+                .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())),
+                .disp = disp,
+            }), registerAlias(src_reg, src_abi_size));
+        } else {
+            const frame_size = std.math.ceilPowerOfTwoAssert(u32, src_abi_size);
+            const frame_index = try cg.allocFrameIndex(.init(.{
+                .size = frame_size,
+                .alignment = .fromNonzeroByteUnits(frame_size),
+            }));
+            const strat = try cg.moveStrategy(src_ty, src_rc, true);
+            try strat.write(cg, .{
+                .base = .{ .frame = frame_index },
+                .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
+            }, registerAlias(src_reg, frame_size));
+            var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
+            try dst_ptr.toOffset(disp, cg);
+            var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
+            var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
+            try dst_ptr.memcpy(&src_ptr, &len, cg);
+            try dst_ptr.die(cg);
+            try src_ptr.die(cg);
+            try len.die(cg);
+        }
+    }
+
     fn writeRegs(dst: Temp, disp: i32, src_ty: Type, src_regs: []const Register, cg: *CodeGen) InnerError!void {
+        const zcu = cg.pt.zcu;
+        const classes = std.mem.sliceTo(&abi.classifySystemV(src_ty, zcu, cg.target, .other), .none);
+        var next_class_index: u4 = 0;
         var part_disp = disp;
-        var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
+        var remaining_abi_size = src_ty.abiSize(zcu);
         for (src_regs) |src_reg| {
-            const src_rc = src_reg.class();
-            const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
-            const part_size = @divExact(part_bit_size, 8);
-            if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
-                const strat = try cg.moveStrategy(src_ty, src_rc, false);
+            const class_index = next_class_index;
+            const class = classes[class_index];
+            next_class_index = @intCast(switch (class) {
+                .integer, .memory, .float, .float_combine => class_index + 1,
+                .sse => std.mem.indexOfNonePos(abi.Class, classes, class_index + 1, &.{.sseup}) orelse classes.len,
+                .x87 => std.mem.indexOfNonePos(abi.Class, classes, class_index + 1, &.{.x87up}) orelse classes.len,
+                .sseup, .x87up, .complex_x87, .none, .win_i128, .integer_per_element => unreachable,
+            });
+            const part_size = switch (class) {
+                .integer, .sse, .memory => @min(8 * @as(u7, next_class_index - class_index), remaining_abi_size),
+                .x87 => 16,
+                .float => 4,
+                .float_combine => 8,
+                .sseup, .x87up, .complex_x87, .none, .win_i128, .integer_per_element => unreachable,
+            };
+            const part_ty: Type = switch (class) {
+                .integer => .u64,
+                .sse => switch (part_size) {
+                    else => unreachable,
+                    8 => .f64,
+                    16 => .vector_2_f64,
+                    32 => .vector_4_f64,
+                },
+                .x87 => .f80,
+                .float => .f32,
+                .float_combine => .vector_2_f32,
+                .sseup, .x87up, .complex_x87, .memory, .none, .win_i128, .integer_per_element => unreachable,
+            };
+            if (class == .x87 or std.math.isPowerOfTwo(part_size)) {
+                const strat = try cg.moveStrategy(part_ty, src_reg.class(), false);
                 try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
-                    .size = .fromBitSize(part_bit_size),
+                    .size = switch (class) {
+                        else => .fromSize(part_size),
+                        .x87 => .tbyte,
+                    },
                     .disp = part_disp,
                 }), registerAlias(src_reg, part_size));
             } else {
@@ -101583,7 +101645,7 @@ const Temp = struct {
                     .size = frame_size,
                     .alignment = .fromNonzeroByteUnits(frame_size),
                 }));
-                const strat = try cg.moveStrategy(src_ty, src_rc, true);
+                const strat = try cg.moveStrategy(part_ty, src_reg.class(), true);
                 try strat.write(cg, .{
                     .base = .{ .frame = frame_index },
                     .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
@@ -101591,15 +101653,16 @@ const Temp = struct {
                 var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
                 try dst_ptr.toOffset(part_disp, cg);
                 var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
-                var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
+                var len = try cg.tempInit(.usize, .{ .immediate = part_size });
                 try dst_ptr.memcpy(&src_ptr, &len, cg);
                 try dst_ptr.die(cg);
                 try src_ptr.die(cg);
                 try len.die(cg);
             }
             part_disp += part_size;
-            src_abi_size -= part_size;
+            remaining_abi_size -= part_size;
         }
+        assert(next_class_index == classes.len);
     }
 
     fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) InnerError!void {
src/codegen/llvm.zig
@@ -12071,7 +12071,7 @@ fn firstParamSRet(fn_info: InternPool.Key.FuncType, zcu: *Zcu, target: std.Targe
 }
 
 fn firstParamSRetSystemV(ty: Type, zcu: *Zcu, target: std.Target) bool {
-    const class = x86_64_abi.classifySystemV(ty, zcu, target, .ret);
+    const class = x86_64_abi.classifySystemV(ty, zcu, &target, .ret);
     if (class[0] == .memory) return true;
     if (class[0] == .x87 and class[2] != .none) return true;
     return false;
@@ -12181,7 +12181,7 @@ fn lowerSystemVFnRetTy(o: *Object, fn_info: InternPool.Key.FuncType) Allocator.E
         return o.lowerType(return_type);
     }
     const target = zcu.getTarget();
-    const classes = x86_64_abi.classifySystemV(return_type, zcu, target, .ret);
+    const classes = x86_64_abi.classifySystemV(return_type, zcu, &target, .ret);
     if (classes[0] == .memory) return .void;
     var types_index: u32 = 0;
     var types_buffer: [8]Builder.Type = undefined;
@@ -12459,7 +12459,7 @@ const ParamTypeIterator = struct {
         const zcu = it.object.pt.zcu;
         const ip = &zcu.intern_pool;
         const target = zcu.getTarget();
-        const classes = x86_64_abi.classifySystemV(ty, zcu, target, .arg);
+        const classes = x86_64_abi.classifySystemV(ty, zcu, &target, .arg);
         if (classes[0] == .memory) {
             it.zig_index += 1;
             it.llvm_index += 1;