Commit 057c950093

Andrew Kelley <andrew@ziglang.org>
2023-04-22 08:05:36
LLVM backend: support non-byte-sized memset
Also introduce memset_safe AIR tag and support it in C backend and LLVM backend.
1 parent 25d1128
Changed files (14)
src/arch/aarch64/CodeGen.zig
@@ -775,7 +775,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .atomic_rmw      => try self.airAtomicRmw(inst),
             .atomic_load     => try self.airAtomicLoad(inst),
             .memcpy          => try self.airMemcpy(inst),
-            .memset          => try self.airMemset(inst),
+            .memset          => try self.airMemset(inst, false),
+            .memset_safe     => try self.airMemset(inst, true),
             .set_union_tag   => try self.airSetUnionTag(inst),
             .get_union_tag   => try self.airGetUnionTag(inst),
             .clz             => try self.airClz(inst),
@@ -5975,8 +5976,13 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr
     return self.fail("TODO implement airAtomicStore for {}", .{self.target.cpu.arch});
 }
 
-fn airMemset(self: *Self, inst: Air.Inst.Index) !void {
+fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
     _ = inst;
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     return self.fail("TODO implement airMemset for {}", .{self.target.cpu.arch});
 }
 
src/arch/arm/CodeGen.zig
@@ -759,7 +759,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .atomic_rmw      => try self.airAtomicRmw(inst),
             .atomic_load     => try self.airAtomicLoad(inst),
             .memcpy          => try self.airMemcpy(inst),
-            .memset          => try self.airMemset(inst),
+            .memset          => try self.airMemset(inst, false),
+            .memset_safe     => try self.airMemset(inst, true),
             .set_union_tag   => try self.airSetUnionTag(inst),
             .get_union_tag   => try self.airGetUnionTag(inst),
             .clz             => try self.airClz(inst),
@@ -5921,7 +5922,12 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr
     return self.fail("TODO implement airAtomicStore for {}", .{self.target.cpu.arch});
 }
 
-fn airMemset(self: *Self, inst: Air.Inst.Index) !void {
+fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     _ = inst;
     return self.fail("TODO implement airMemset for {}", .{self.target.cpu.arch});
 }
src/arch/riscv64/CodeGen.zig
@@ -589,7 +589,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .atomic_rmw      => try self.airAtomicRmw(inst),
             .atomic_load     => try self.airAtomicLoad(inst),
             .memcpy          => try self.airMemcpy(inst),
-            .memset          => try self.airMemset(inst),
+            .memset          => try self.airMemset(inst, false),
+            .memset_safe     => try self.airMemset(inst, true),
             .set_union_tag   => try self.airSetUnionTag(inst),
             .get_union_tag   => try self.airGetUnionTag(inst),
             .clz             => try self.airClz(inst),
@@ -2421,8 +2422,13 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr
     return self.fail("TODO implement airAtomicStore for {}", .{self.target.cpu.arch});
 }
 
-fn airMemset(self: *Self, inst: Air.Inst.Index) !void {
+fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
     _ = inst;
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     return self.fail("TODO implement airMemset for {}", .{self.target.cpu.arch});
 }
 
src/arch/sparc64/CodeGen.zig
@@ -605,7 +605,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .atomic_rmw      => try self.airAtomicRmw(inst),
             .atomic_load     => try self.airAtomicLoad(inst),
             .memcpy          => @panic("TODO try self.airMemcpy(inst)"),
-            .memset          => try self.airMemset(inst),
+            .memset          => try self.airMemset(inst, false),
+            .memset_safe     => try self.airMemset(inst, true),
             .set_union_tag   => try self.airSetUnionTag(inst),
             .get_union_tag   => try self.airGetUnionTag(inst),
             .clz             => try self.airClz(inst),
@@ -1764,7 +1765,12 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAirBookkeeping();
 }
 
-fn airMemset(self: *Self, inst: Air.Inst.Index) !void {
+fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
     const extra = self.air.extraData(Air.Bin, pl_op.payload);
 
src/arch/wasm/CodeGen.zig
@@ -1883,7 +1883,8 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 
         .load => func.airLoad(inst),
         .loop => func.airLoop(inst),
-        .memset => func.airMemset(inst),
+        // TODO: elide memset when writing undef without safety
+        .memset, .memset_safe => func.airMemset(inst),
         .not => func.airNot(inst),
         .optional_payload => func.airOptionalPayload(inst),
         .optional_payload_ptr => func.airOptionalPayloadPtr(inst),
src/arch/x86_64/CodeGen.zig
@@ -1046,7 +1046,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .atomic_rmw      => try self.airAtomicRmw(inst),
             .atomic_load     => try self.airAtomicLoad(inst),
             .memcpy          => try self.airMemcpy(inst),
-            .memset          => try self.airMemset(inst),
+            .memset          => try self.airMemset(inst, false),
+            .memset_safe     => try self.airMemset(inst, true),
             .set_union_tag   => try self.airSetUnionTag(inst),
             .get_union_tag   => try self.airGetUnionTag(inst),
             .clz             => try self.airClz(inst),
@@ -8149,7 +8150,13 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
-fn airMemset(self: *Self, inst: Air.Inst.Index) !void {
+fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
+
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
 
     const dst_ptr = try self.resolveInst(bin_op.lhs);
src/codegen/c.zig
@@ -2925,7 +2925,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .cmpxchg_strong   => try airCmpxchg(f, inst, "strong"),
             .atomic_rmw       => try airAtomicRmw(f, inst),
             .atomic_load      => try airAtomicLoad(f, inst),
-            .memset           => try airMemset(f, inst),
+            .memset           => try airMemset(f, inst, false),
+            .memset_safe      => try airMemset(f, inst, true),
             .memcpy           => try airMemcpy(f, inst),
             .set_union_tag    => try airSetUnionTag(f, inst),
             .get_union_tag    => try airGetUnionTag(f, inst),
@@ -6189,7 +6190,7 @@ fn writeSliceOrPtr(f: *Function, writer: anytype, ptr: CValue, ptr_ty: Type) !vo
     }
 }
 
-fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
     const dest_ty = f.air.typeOf(bin_op.lhs);
     const dest_slice = try f.resolveInst(bin_op.lhs);
@@ -6201,6 +6202,11 @@ fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
     const writer = f.object.writer();
 
     if (val_is_undef) {
+        if (!safety) {
+            try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
+            return .none;
+        }
+
         try writer.writeAll("memset(");
         switch (dest_ty.ptrSize()) {
             .Slice => {
@@ -6242,8 +6248,7 @@ fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
             },
             .One => {
                 const array_ty = dest_ty.childType();
-                const len = array_ty.arrayLen() * elem_abi_size;
-                try writer.print("{d}", .{len});
+                try writer.print("{d}", .{array_ty.arrayLen()});
             },
             .Many, .C => unreachable,
         }
src/codegen/llvm.zig
@@ -4672,7 +4672,8 @@ pub const FuncGen = struct {
                 .fence          => try self.airFence(inst),
                 .atomic_rmw     => try self.airAtomicRmw(inst),
                 .atomic_load    => try self.airAtomicLoad(inst),
-                .memset         => try self.airMemset(inst),
+                .memset         => try self.airMemset(inst, false),
+                .memset_safe    => try self.airMemset(inst, true),
                 .memcpy         => try self.airMemcpy(inst),
                 .set_union_tag  => try self.airSetUnionTag(inst),
                 .get_union_tag  => try self.airGetUnionTag(inst),
@@ -8405,29 +8406,95 @@ pub const FuncGen = struct {
         return null;
     }
 
-    fn airMemset(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+    fn airMemset(self: *FuncGen, inst: Air.Inst.Index, safety: bool) !?*llvm.Value {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const dest_slice = try self.resolveInst(bin_op.lhs);
         const ptr_ty = self.air.typeOf(bin_op.lhs);
-        const value = try self.resolveInst(bin_op.rhs);
         const elem_ty = self.air.typeOf(bin_op.rhs);
         const target = self.dg.module.getTarget();
         const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false;
-        const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty);
-        const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty);
+        const dest_ptr_align = ptr_ty.ptrAlignment(target);
         const u8_llvm_ty = self.context.intType(8);
-        const fill_byte = if (val_is_undef) u8_llvm_ty.constInt(0xaa, .False) else b: {
-            if (elem_ty.abiSize(target) != 1) {
-                return self.dg.todo("implement @memset for non-byte-sized element type", .{});
+        const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty);
+
+        if (val_is_undef) {
+            // Even if safety is disabled, we still emit a memset to undefined since it conveys
+            // extra information to LLVM. However, safety makes the difference between using
+            // 0xaa or actual undefined for the fill byte.
+            const fill_byte = if (safety)
+                u8_llvm_ty.constInt(0xaa, .False)
+            else
+                u8_llvm_ty.getUndef();
+            const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty);
+            _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr());
+
+            if (safety and self.dg.module.comp.bin_file.options.valgrind) {
+                self.valgrindMarkUndef(dest_ptr, len);
             }
-            break :b self.builder.buildBitCast(value, u8_llvm_ty, "");
-        };
-        const dest_ptr_align = ptr_ty.ptrAlignment(target);
-        _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr());
+            return null;
+        }
+
+        const value = try self.resolveInst(bin_op.rhs);
+        const elem_abi_size = elem_ty.abiSize(target);
 
-        if (val_is_undef and self.dg.module.comp.bin_file.options.valgrind) {
-            self.valgrindMarkUndef(dest_ptr, len);
+        if (elem_abi_size == 1) {
+            // In this case we can take advantage of LLVM's intrinsic.
+            const fill_byte = self.builder.buildBitCast(value, u8_llvm_ty, "");
+            const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty);
+            _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr());
+            return null;
         }
+
+        // non-byte-sized element. lower with a loop. something like this:
+
+        // entry:
+        //   ...
+        //   %end_ptr = getelementptr %ptr, %len
+        //   br loop
+        // loop:
+        //   %it_ptr = phi body %next_ptr, entry %ptr
+        //   %end = cmp eq %it_ptr, %end_ptr
+        //   cond_br %end body, end
+        // body:
+        //   store %it_ptr, %value
+        //   %next_ptr = getelementptr %it_ptr, 1
+        //   br loop
+        // end:
+        //   ...
+        const entry_block = self.builder.getInsertBlock();
+        const loop_block = self.context.appendBasicBlock(self.llvm_func, "InlineMemsetLoop");
+        const body_block = self.context.appendBasicBlock(self.llvm_func, "InlineMemsetBody");
+        const end_block = self.context.appendBasicBlock(self.llvm_func, "InlineMemsetEnd");
+
+        const llvm_usize_ty = self.context.intType(target.cpu.arch.ptrBitWidth());
+        const len = switch (ptr_ty.ptrSize()) {
+            .Slice => self.builder.buildExtractValue(dest_slice, 1, ""),
+            .One => llvm_usize_ty.constInt(ptr_ty.childType().arrayLen(), .False),
+            .Many, .C => unreachable,
+        };
+        const elem_llvm_ty = try self.dg.lowerType(elem_ty);
+        const len_gep = [_]*llvm.Value{len};
+        const end_ptr = self.builder.buildInBoundsGEP(elem_llvm_ty, dest_ptr, &len_gep, len_gep.len, "");
+        _ = self.builder.buildBr(loop_block);
+
+        self.builder.positionBuilderAtEnd(loop_block);
+        const it_ptr = self.builder.buildPhi(self.context.pointerType(0), "");
+        const end = self.builder.buildICmp(.NE, it_ptr, end_ptr, "");
+        _ = self.builder.buildCondBr(end, body_block, end_block);
+
+        self.builder.positionBuilderAtEnd(body_block);
+        const store_inst = self.builder.buildStore(value, it_ptr);
+        store_inst.setAlignment(@min(elem_ty.abiAlignment(target), dest_ptr_align));
+        const one_gep = [_]*llvm.Value{llvm_usize_ty.constInt(1, .False)};
+        const next_ptr = self.builder.buildInBoundsGEP(elem_llvm_ty, it_ptr, &one_gep, one_gep.len, "");
+        _ = self.builder.buildBr(loop_block);
+
+        self.builder.positionBuilderAtEnd(end_block);
+
+        const incoming_values: [2]*llvm.Value = .{ next_ptr, dest_ptr };
+        const incoming_blocks: [2]*llvm.BasicBlock = .{ body_block, entry_block };
+        it_ptr.addIncoming(&incoming_values, &incoming_blocks, 2);
+
         return null;
     }
 
src/Liveness/Verify.zig
@@ -255,6 +255,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .min,
             .max,
             .memset,
+            .memset_safe,
             .memcpy,
             => {
                 const bin_op = data[inst].bin_op;
src/Air.zig
@@ -638,7 +638,14 @@ pub const Inst = struct {
         /// The element type may be any type, and the slice may have any alignment.
         /// Result type is always void.
         /// Uses the `bin_op` field. LHS is the dest slice. RHS is the element value.
+        /// The element value may be undefined, in which case the destination
+        /// memory region has undefined bytes after this function executes. In
+        /// such case ignoring this instruction is legal lowering.
         memset,
+        /// Same as `memset`, except if the element value is undefined, the memory region
+        /// should be filled with 0xaa bytes, and any other safety metadata such as Valgrind
+        /// integrations should be notified of this memory region being undefined.
+        memset_safe,
         /// Given dest pointer and source pointer, copy elements from source to dest.
         /// Dest pointer is either a slice or a pointer to array.
         /// The dest element type may be any type.
@@ -1236,6 +1243,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .atomic_store_release,
         .atomic_store_seq_cst,
         .memset,
+        .memset_safe,
         .memcpy,
         .set_union_tag,
         .prefetch,
@@ -1415,6 +1423,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index) bool {
         .errunion_payload_ptr_set,
         .set_union_tag,
         .memset,
+        .memset_safe,
         .memcpy,
         .cmpxchg_weak,
         .cmpxchg_strong,
src/Liveness.zig
@@ -305,6 +305,7 @@ pub fn categorizeOperand(
         .atomic_store_seq_cst,
         .set_union_tag,
         .memset,
+        .memset_safe,
         .memcpy,
         => {
             const o = air_datas[inst].bin_op;
@@ -980,6 +981,7 @@ fn analyzeInst(
         .min,
         .max,
         .memset,
+        .memset_safe,
         .memcpy,
         => {
             const o = inst_datas[inst].bin_op;
src/print_air.zig
@@ -171,6 +171,7 @@ const Writer = struct {
             .cmp_neq_optimized,
             .memcpy,
             .memset,
+            .memset_safe,
             => try w.writeBinOp(s, inst),
 
             .is_null,
src/Sema.zig
@@ -21972,7 +21972,7 @@ fn zirMemset(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
 
     try sema.requireRuntimeBlock(block, src, runtime_src);
     _ = try block.addInst(.{
-        .tag = .memset,
+        .tag = if (block.wantSafety()) .memset_safe else .memset,
         .data = .{ .bin_op = .{
             .lhs = dest_ptr,
             .rhs = elem,
test/behavior/basic.zig
@@ -353,6 +353,50 @@ fn f2(x: bool) []const u8 {
     return (if (x) &fA else &fB)();
 }
 
+test "@memset on array pointers" {
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
+
+    try testMemsetArray();
+    // TODO this doesn't pass yet
+    // try comptime testMemsetArray();
+}
+
+fn testMemsetArray() !void {
+    {
+        // memset array to non-undefined, ABI size == 1
+        var foo: [20]u8 = undefined;
+        @memset(&foo, 'A');
+        try expect(foo[0] == 'A');
+        try expect(foo[11] == 'A');
+        try expect(foo[19] == 'A');
+
+        // memset array to undefined, ABI size == 1
+        @setRuntimeSafety(true);
+        @memset(&foo, undefined);
+        try expect(foo[0] == 0xaa);
+        try expect(foo[11] == 0xaa);
+        try expect(foo[19] == 0xaa);
+    }
+
+    {
+        // memset array to non-undefined, ABI size > 1
+        var foo: [20]u32 = undefined;
+        @memset(&foo, 1234);
+        try expect(foo[0] == 1234);
+        try expect(foo[11] == 1234);
+        try expect(foo[19] == 1234);
+
+        // memset array to undefined, ABI size > 1
+        @setRuntimeSafety(true);
+        @memset(&foo, undefined);
+        try expect(foo[0] == 0xaaaaaaaa);
+        try expect(foo[11] == 0xaaaaaaaa);
+        try expect(foo[19] == 0xaaaaaaaa);
+    }
+}
+
 test "memcpy and memset intrinsics" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;