Commit 881e931ee1

Andrew Kelley <andrew@ziglang.org>
2023-04-24 05:52:43
x86_64 backend: implement `@memset` for element ABI size > 1
* make memset and memset_safe guarantee that if the length is comptime-known then it will be nonzero.
1 parent 7c56145
Changed files (4)
src
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -8175,23 +8175,62 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
     };
     defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock);
 
-    if (elem_ty.abiSize(self.target.*) != 1) {
-        return self.fail("TODO implement airMemset when element ABI size > 1", .{});
+    if (elem_ty.abiSize(self.target.*) == 1) {
+        const len = switch (dst_ptr_ty.ptrSize()) {
+            // TODO: this only handles slices stored in the stack
+            .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }),
+            .One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }),
+            .C, .Many => unreachable,
+        };
+        const len_lock: ?RegisterLock = switch (len) {
+            .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
+            else => null,
+        };
+        defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
+
+        // TODO: dst_ptr could be a slice rather than raw pointer
+        try self.genInlineMemset(dst_ptr, src_val, len, .{});
+        return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
     }
 
-    const len = switch (dst_ptr_ty.ptrSize()) {
-        .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }),
-        .One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }),
-        .C, .Many => unreachable,
-    };
-    const len_lock: ?RegisterLock = switch (len) {
-        .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
-        else => null,
-    };
-    defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
+    // Store the first element, and then rely on memcpy copying forwards.
+    // Length zero requires a runtime check - so we handle arrays specially
+    // here to elide it.
+    switch (dst_ptr_ty.ptrSize()) {
+        .Slice => {
+            // TODO: this only handles slices stored in the stack
+            const ptr = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 0 });
+            const len = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 });
+            _ = ptr;
+            _ = len;
+            return self.fail("TODO implement airMemset for x86_64 with ABI size > 1 using a slice", .{});
+        },
+        .One => {
+            const len = dst_ptr_ty.childType().arrayLen();
+            assert(len != 0); // prevented by Sema
+            try self.store(dst_ptr, src_val, dst_ptr_ty, elem_ty);
+
+            const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
+            const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
+            const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
+            defer self.register_manager.unlockReg(second_elem_ptr_lock);
 
-    // TODO: dst_ptr could be a slice rather than raw pointer
-    try self.genInlineMemset(dst_ptr, src_val, len, .{});
+            const elem_abi_size = @intCast(u31, elem_ty.abiSize(self.target.*));
+
+            try self.asmRegisterMemory(
+                .lea,
+                second_elem_ptr_reg,
+                Memory.sib(.qword, .{
+                    .base = try self.copyToTmpRegister(Type.usize, dst_ptr),
+                    .disp = elem_abi_size,
+                }),
+            );
+
+            const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) };
+            try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy, .{});
+        },
+        .C, .Many => unreachable,
+    }
 
     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
 }
src/Air.zig
@@ -641,6 +641,8 @@ pub const Inst = struct {
         /// The element value may be undefined, in which case the destination
         /// memory region has undefined bytes after this function executes. In
         /// such case ignoring this instruction is legal lowering.
+        /// If the length is compile-time known (due to the destination being a
+        /// pointer-to-array), then it is guaranteed to be greater than zero.
         memset,
         /// Same as `memset`, except if the element value is undefined, the memory region
         /// should be filled with 0xaa bytes, and any other safety metadata such as Valgrind
@@ -654,6 +656,9 @@ pub const Inst = struct {
         /// The two memory regions must not overlap.
         /// Result type is always void.
         /// Uses the `bin_op` field. LHS is the dest slice. RHS is the source pointer.
+        /// If the length is compile-time known (due to the destination or
+        /// source being a pointer-to-array), then it is guaranteed to be
+        /// greater than zero.
         memcpy,
 
         /// Uses the `ty_pl` field with payload `Cmpxchg`.
src/Sema.zig
@@ -21918,8 +21918,6 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
         } else break :rs src_src;
     } else dest_src;
 
-    try sema.requireRuntimeBlock(block, src, runtime_src);
-
     const dest_ty = sema.typeOf(dest_ptr);
     const src_ty = sema.typeOf(src_ptr);
 
@@ -21946,10 +21944,16 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
     var new_src_ptr = src_ptr;
     if (len_val) |val| {
         const len = val.toUnsignedInt(target);
+        if (len == 0) {
+            // This AIR instruction guarantees length > 0 if it is comptime-known.
+            return;
+        }
         new_dest_ptr = try upgradeToArrayPtr(sema, block, dest_ptr, len);
         new_src_ptr = try upgradeToArrayPtr(sema, block, src_ptr, len);
     }
 
+    try sema.requireRuntimeBlock(block, src, runtime_src);
+
     // Aliasing safety check.
     if (block.wantSafety()) {
         const dest_int = try block.addUnOp(.ptrtoint, new_dest_ptr);
@@ -21995,13 +21999,18 @@ fn zirMemset(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
     const target = sema.mod.getTarget();
 
     const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |ptr_val| rs: {
+        const len_air_ref = try sema.fieldVal(block, src, dest_ptr, "len", dest_src);
+        const len_val = (try sema.resolveDefinedValue(block, dest_src, len_air_ref)) orelse
+            break :rs dest_src;
+        const len_u64 = (try len_val.getUnsignedIntAdvanced(target, sema)).?;
+        const len = try sema.usizeCast(block, dest_src, len_u64);
+        if (len == 0) {
+            // This AIR instruction guarantees length > 0 if it is comptime-known.
+            return;
+        }
+
         if (!ptr_val.isComptimeMutablePtr()) break :rs dest_src;
         if (try sema.resolveMaybeUndefVal(uncoerced_elem)) |_| {
-            const len_air_ref = try sema.fieldVal(block, src, dest_ptr, "len", dest_src);
-            const len_val = (try sema.resolveDefinedValue(block, dest_src, len_air_ref)) orelse
-                break :rs dest_src;
-            const len_u64 = (try len_val.getUnsignedIntAdvanced(target, sema)).?;
-            const len = try sema.usizeCast(block, dest_src, len_u64);
             for (0..len) |i| {
                 const elem_index = try sema.addIntUnsigned(Type.usize, i);
                 const elem_ptr = try sema.elemPtr(
test/behavior/basic.zig
@@ -361,10 +361,6 @@ test "@memset on array pointers" {
         // TODO: implement memset when element ABI size > 1
         return error.SkipZigTest;
     }
-    if (builtin.zig_backend == .stage2_x86_64) {
-        // TODO: implement memset when element ABI size > 1
-        return error.SkipZigTest;
-    }
 
     try testMemsetArray();
     try comptime testMemsetArray();