Commit c1a2da6b78

Jacob Young <jacobly0@users.noreply.github.com>
2023-04-15 07:59:43
x86_64: implement packed load and store
1 parent c03771e
Changed files (4)
src
arch
test
src/arch/x86_64/CodeGen.zig
@@ -1692,7 +1692,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
             .unsigned => .int_unsigned,
         } }, .data = switch (tag) {
             else => unreachable,
-            .mul, .mulwrap => std.math.max3(
+            .mul, .mulwrap => math.max3(
                 self.activeIntBits(bin_op.lhs),
                 self.activeIntBits(bin_op.rhs),
                 dst_info.bits / 2,
@@ -1745,7 +1745,7 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void {
             break :cc .o;
         } else cc: {
             try self.genSetReg(ty, limit_reg, .{
-                .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
+                .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
             });
             break :cc .c;
         };
@@ -1852,7 +1852,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
             break :cc .o;
         } else cc: {
             try self.genSetReg(ty, limit_reg, .{
-                .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
+                .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
             });
             break :cc .c;
         };
@@ -2069,7 +2069,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                     var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) {
                         .signed => .int_signed,
                         .unsigned => .int_unsigned,
-                    } }, .data = std.math.max3(
+                    } }, .data = math.max3(
                         self.activeIntBits(bin_op.lhs),
                         self.activeIntBits(bin_op.rhs),
                         dst_info.bits / 2,
@@ -3569,6 +3569,62 @@ fn reuseOperand(
     return true;
 }
 
+fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_mcv: MCValue, ptr_ty: Type) InnerError!void {
+    const ptr_info = ptr_ty.ptrInfo().data;
+
+    const val_ty = ptr_info.pointee_type;
+    const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*));
+    const limb_abi_size = @min(val_abi_size, 8);
+    const limb_abi_bits = limb_abi_size * 8;
+    const val_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size);
+    const val_bit_off = ptr_info.bit_offset % limb_abi_bits;
+    const val_extra_bits = self.regExtraBits(val_ty);
+
+    if (val_abi_size > 8) return self.fail("TODO implement packed load of {}", .{
+        val_ty.fmt(self.bin_file.options.module.?),
+    });
+
+    const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
+    const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
+    defer self.register_manager.unlockReg(ptr_lock);
+
+    const dst_reg = switch (dst_mcv) {
+        .register => |reg| reg,
+        else => try self.register_manager.allocReg(null, gp),
+    };
+    const dst_lock = self.register_manager.lockReg(dst_reg);
+    defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const load_abi_size =
+        if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
+    if (load_abi_size <= 8) {
+        const load_reg = registerAlias(dst_reg, load_abi_size);
+        try self.asmRegisterMemory(.mov, load_reg, Memory.sib(
+            Memory.PtrSize.fromSize(load_abi_size),
+            .{ .base = ptr_reg, .disp = val_byte_off },
+        ));
+        try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(val_bit_off));
+    } else {
+        const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size);
+        const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+        defer self.register_manager.unlockReg(tmp_lock);
+
+        const dst_alias = registerAlias(dst_reg, val_abi_size);
+        try self.asmRegisterMemory(.mov, dst_alias, Memory.sib(
+            Memory.PtrSize.fromSize(val_abi_size),
+            .{ .base = ptr_reg, .disp = val_byte_off },
+        ));
+        try self.asmRegisterMemory(.mov, tmp_reg, Memory.sib(
+            Memory.PtrSize.fromSize(val_abi_size),
+            .{ .base = ptr_reg, .disp = val_byte_off + 1 },
+        ));
+        try self.asmRegisterRegisterImmediate(.shrd, dst_alias, tmp_reg, Immediate.u(val_bit_off));
+    }
+
+    if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
+    try self.setRegOrMem(val_ty, dst_mcv, .{ .register = dst_reg });
+}
+
 fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void {
     const elem_ty = ptr_ty.elemType();
     const abi_size = @intCast(u32, elem_ty.abiSize(self.target.*));
@@ -3657,12 +3713,84 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
             ptr
         else
             try self.allocRegOrMem(inst, true);
-        try self.load(dst_mcv, ptr, self.air.typeOf(ty_op.operand));
+
+        const ptr_ty = self.air.typeOf(ty_op.operand);
+        if (ptr_ty.ptrInfo().data.host_size > 0) {
+            try self.packedLoad(dst_mcv, ptr, ptr_ty);
+        } else {
+            try self.load(dst_mcv, ptr, ptr_ty);
+        }
         break :result dst_mcv;
     };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
+fn packedStore(
+    self: *Self,
+    ptr_mcv: MCValue,
+    val_mcv: MCValue,
+    ptr_ty: Type,
+    val_ty: Type,
+) InnerError!void {
+    const ptr_info = ptr_ty.ptrInfo().data;
+
+    const limb_abi_size = @min(ptr_info.host_size, 8);
+    const limb_abi_bits = limb_abi_size * 8;
+
+    const val_bit_size = val_ty.bitSize(self.target.*);
+    const val_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size);
+    const val_bit_off = ptr_info.bit_offset % limb_abi_bits;
+
+    const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
+    const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
+    defer self.register_manager.unlockReg(ptr_lock);
+
+    var limb_i: u16 = 0;
+    while (limb_i * limb_abi_bits < val_bit_off + val_bit_size) : (limb_i += 1) {
+        const part_bit_off = if (limb_i == 0) val_bit_off else 0;
+        const part_bit_size =
+            @min(val_bit_off + val_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
+        const limb_mem = Memory.sib(
+            Memory.PtrSize.fromSize(limb_abi_size),
+            .{ .base = ptr_reg, .disp = val_byte_off + limb_i * limb_abi_bits },
+        );
+
+        const part_mask = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - part_bit_size)) <<
+            @intCast(u6, part_bit_off);
+        const part_mask_not = part_mask ^
+            (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_abi_bits));
+        if (limb_abi_size <= 4) {
+            try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.u(part_mask_not));
+        } else if (math.cast(i32, @bitCast(i64, part_mask_not))) |small| {
+            try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.s(small));
+        } else {
+            const part_mask_reg = try self.register_manager.allocReg(null, gp);
+            try self.asmRegisterImmediate(.mov, part_mask_reg, Immediate.u(part_mask_not));
+            try self.asmMemoryRegister(.@"and", limb_mem, part_mask_reg);
+        }
+
+        if (val_bit_size <= 64) {
+            const tmp_reg = try self.register_manager.allocReg(null, gp);
+            const tmp_mcv = MCValue{ .register = tmp_reg };
+            const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+            defer self.register_manager.unlockReg(tmp_lock);
+
+            try self.genSetReg(val_ty, tmp_reg, val_mcv);
+            switch (limb_i) {
+                0 => try self.genShiftBinOpMir(.shl, val_ty, tmp_mcv, .{ .immediate = val_bit_off }),
+                1 => try self.genShiftBinOpMir(.shr, val_ty, tmp_mcv, .{
+                    .immediate = limb_abi_bits - val_bit_off,
+                }),
+                else => unreachable,
+            }
+            try self.genBinOpMir(.@"and", val_ty, tmp_mcv, .{ .immediate = part_mask });
+            try self.asmMemoryRegister(.@"or", limb_mem, registerAlias(tmp_reg, limb_abi_size));
+        } else return self.fail("TODO: implement packed store of {}", .{
+            val_ty.fmt(self.bin_file.options.module.?),
+        });
+    }
+}
+
 fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void {
     const abi_size = @intCast(u32, value_ty.abiSize(self.target.*));
     switch (ptr) {
@@ -3854,7 +3982,11 @@ fn airStore(self: *Self, inst: Air.Inst.Index) !void {
     const value = try self.resolveInst(bin_op.rhs);
     const value_ty = self.air.typeOf(bin_op.rhs);
     log.debug("airStore(%{d}): {} <- {}", .{ inst, ptr, value });
-    try self.store(ptr, value, ptr_ty, value_ty);
+    if (ptr_ty.ptrInfo().data.host_size > 0) {
+        try self.packedStore(ptr, value, ptr_ty, value_ty);
+    } else {
+        try self.store(ptr, value, ptr_ty, value_ty);
+    }
     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -5218,7 +5350,7 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                     registerAlias(src_reg, abi_size),
                 ),
                 .immediate => |imm| {
-                    if (std.math.cast(i32, imm)) |small| {
+                    if (math.cast(i32, imm)) |small| {
                         try self.asmRegisterRegisterImmediate(
                             .imul,
                             dst_alias,
@@ -6824,7 +6956,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
                 } else if (mem.startsWith(u8, op_str, "$")) {
                     if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| {
                         if (mnem_size) |size| {
-                            const max = @as(u64, std.math.maxInt(u64)) >>
+                            const max = @as(u64, math.maxInt(u64)) >>
                                 @intCast(u6, 64 - (size.bitSize() - 1));
                             if ((if (s < 0) ~s else s) > max)
                                 return self.fail("Invalid immediate size: '{s}'", .{op_str});
@@ -6832,7 +6964,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
                         op.* = .{ .imm = Immediate.s(s) };
                     } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| {
                         if (mnem_size) |size| {
-                            const max = @as(u64, std.math.maxInt(u64)) >>
+                            const max = @as(u64, math.maxInt(u64)) >>
                                 @intCast(u6, 64 - size.bitSize());
                             if (u > max)
                                 return self.fail("Invalid immediate size: '{s}'", .{op_str});
@@ -7171,7 +7303,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
                 else => {
                     // 64 bit write to memory would take two mov's anyways so we
                     // insted just use two 32 bit writes to avoid register allocation
-                    if (std.math.cast(i32, @bitCast(i64, imm))) |small| {
+                    if (math.cast(i32, @bitCast(i64, imm))) |small| {
                         try self.asmMemoryImmediate(.mov, Memory.sib(
                             Memory.PtrSize.fromSize(abi_size),
                             .{ .base = base_reg, .disp = -stack_offset },
test/behavior/bugs/1851.zig
@@ -4,7 +4,6 @@ const expect = std.testing.expect;
 
 test "allocation and looping over 3-byte integer" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/enum.zig
@@ -1071,7 +1071,6 @@ const bit_field_1 = BitFieldOfEnums{
 
 test "bit field access with enum fields" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/struct.zig
@@ -456,7 +456,6 @@ test "packed struct 24bits" {
 test "runtime struct initialization of bitfield" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     const s1 = Nibbles{
@@ -577,7 +576,6 @@ const bit_field_1 = BitField1{
 test "bit field access" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
 
     var data = bit_field_1;
@@ -696,7 +694,6 @@ const FooArrayOfAligned = packed struct {
 };
 
 test "pointer to packed struct member in a stack variable" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1259,7 +1256,6 @@ test "packed struct aggregate init" {
 }
 
 test "packed struct field access via pointer" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO