Commit 5378fdffdc

Andrew Kelley <andrew@ziglang.org>
2023-04-25 02:58:23
stage2: introduce store_safe AIR instruction
store: The value to store may be undefined, in which case the destination memory region has undefined bytes after this instruction is evaluated. In such case ignoring this instruction is legal lowering. store_safe: Same as `store`, except if the value to store is undefined, the memory region should be filled with 0xaa bytes, and any other safety metadata such as Valgrind integrations should be notified of this memory region being undefined.
1 parent d604553
src/arch/aarch64/CodeGen.zig
@@ -764,7 +764,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ptrtoint        => try self.airPtrToInt(inst),
             .ret             => try self.airRet(inst),
             .ret_load        => try self.airRetLoad(inst),
-            .store           => try self.airStore(inst),
+            .store           => try self.airStore(inst, false),
+            .store_safe      => try self.airStore(inst, true),
             .struct_field_ptr=> try self.airStructFieldPtr(inst),
             .struct_field_val=> try self.airStructFieldVal(inst),
             .array_to_slice  => try self.airArrayToSlice(inst),
@@ -4036,7 +4037,12 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
     }
 }
 
-fn airStore(self: *Self, inst: Air.Inst.Index) !void {
+fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ptr = try self.resolveInst(bin_op.lhs);
     const value = try self.resolveInst(bin_op.rhs);
src/arch/arm/CodeGen.zig
@@ -748,7 +748,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ptrtoint        => try self.airPtrToInt(inst),
             .ret             => try self.airRet(inst),
             .ret_load        => try self.airRetLoad(inst),
-            .store           => try self.airStore(inst),
+            .store           => try self.airStore(inst, false),
+            .store_safe      => try self.airStore(inst, true),
             .struct_field_ptr=> try self.airStructFieldPtr(inst),
             .struct_field_val=> try self.airStructFieldVal(inst),
             .array_to_slice  => try self.airArrayToSlice(inst),
@@ -2836,7 +2837,12 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
     }
 }
 
-fn airStore(self: *Self, inst: Air.Inst.Index) !void {
+fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ptr = try self.resolveInst(bin_op.lhs);
     const value = try self.resolveInst(bin_op.rhs);
src/arch/riscv64/CodeGen.zig
@@ -578,7 +578,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ptrtoint        => try self.airPtrToInt(inst),
             .ret             => try self.airRet(inst),
             .ret_load        => try self.airRetLoad(inst),
-            .store           => try self.airStore(inst),
+            .store           => try self.airStore(inst, false),
+            .store_safe      => try self.airStore(inst, true),
             .struct_field_ptr=> try self.airStructFieldPtr(inst),
             .struct_field_val=> try self.airStructFieldVal(inst),
             .array_to_slice  => try self.airArrayToSlice(inst),
@@ -1573,7 +1574,12 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
     }
 }
 
-fn airStore(self: *Self, inst: Air.Inst.Index) !void {
+fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ptr = try self.resolveInst(bin_op.lhs);
     const value = try self.resolveInst(bin_op.rhs);
src/arch/sparc64/CodeGen.zig
@@ -593,7 +593,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ptrtoint        => try self.airPtrToInt(inst),
             .ret             => try self.airRet(inst),
             .ret_load        => try self.airRetLoad(inst),
-            .store           => try self.airStore(inst),
+            .store           => try self.airStore(inst, false),
+            .store_safe      => try self.airStore(inst, true),
             .struct_field_ptr=> @panic("TODO try self.airStructFieldPtr(inst)"),
             .struct_field_val=> try self.airStructFieldVal(inst),
             .array_to_slice  => try self.airArrayToSlice(inst),
@@ -2407,7 +2408,12 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
-fn airStore(self: *Self, inst: Air.Inst.Index) !void {
+fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ptr = try self.resolveInst(bin_op.lhs);
     const value = try self.resolveInst(bin_op.rhs);
src/arch/wasm/CodeGen.zig
@@ -1883,8 +1883,8 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 
         .load => func.airLoad(inst),
         .loop => func.airLoop(inst),
-        // TODO: elide memset when writing undef without safety
-        .memset, .memset_safe => func.airMemset(inst),
+        .memset => func.airMemset(inst, false),
+        .memset_safe => func.airMemset(inst, true),
         .not => func.airNot(inst),
         .optional_payload => func.airOptionalPayload(inst),
         .optional_payload_ptr => func.airOptionalPayloadPtr(inst),
@@ -1914,7 +1914,8 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .slice_ptr => func.airSlicePtr(inst),
         .ptr_slice_len_ptr => func.airPtrSliceFieldPtr(inst, func.ptrSize()),
         .ptr_slice_ptr_ptr => func.airPtrSliceFieldPtr(inst, 0),
-        .store => func.airStore(inst),
+        .store => func.airStore(inst, false),
+        .store_safe => func.airStore(inst, true),
 
         .set_union_tag => func.airSetUnionTag(inst),
         .struct_field_ptr => func.airStructFieldPtr(inst),
@@ -2222,7 +2223,12 @@ fn airAlloc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     func.finishAir(inst, value, &.{});
 }
 
-fn airStore(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+fn airStore(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const bin_op = func.air.instructions.items(.data)[inst].bin_op;
 
     const lhs = try func.resolveInst(bin_op.lhs);
@@ -4384,7 +4390,12 @@ fn airPtrBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
     func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
 }
 
-fn airMemset(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+fn airMemset(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const bin_op = func.air.instructions.items(.data)[inst].bin_op;
 
     const ptr = try func.resolveInst(bin_op.lhs);
src/arch/x86_64/CodeGen.zig
@@ -1035,7 +1035,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ptrtoint        => try self.airPtrToInt(inst),
             .ret             => try self.airRet(inst),
             .ret_load        => try self.airRetLoad(inst),
-            .store           => try self.airStore(inst),
+            .store           => try self.airStore(inst, false),
+            .store_safe      => try self.airStore(inst, true),
             .struct_field_ptr=> try self.airStructFieldPtr(inst),
             .struct_field_val=> try self.airStructFieldVal(inst),
             .array_to_slice  => try self.airArrayToSlice(inst),
@@ -3936,7 +3937,12 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
     }
 }
 
-fn airStore(self: *Self, inst: Air.Inst.Index) !void {
+fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
+    if (safety) {
+        // TODO if the value is undef, write 0xaa bytes to dest
+    } else {
+        // TODO if the value is undef, don't lower this instruction
+    }
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
     const ptr = try self.resolveInst(bin_op.lhs);
     const ptr_ty = self.air.typeOf(bin_op.lhs);
src/codegen/c.zig
@@ -2914,7 +2914,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .load             => try airLoad(f, inst),
             .ret              => try airRet(f, inst, false),
             .ret_load         => try airRet(f, inst, true),
-            .store            => try airStore(f, inst),
+            .store            => try airStore(f, inst, false),
+            .store_safe       => try airStore(f, inst, true),
             .loop             => try airLoop(f, inst),
             .cond_br          => try airCondBr(f, inst),
             .br               => try airBr(f, inst),
@@ -3565,19 +3566,7 @@ fn airBoolToInt(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
-fn storeUndefined(f: *Function, lhs_child_ty: Type, dest_ptr: CValue) !CValue {
-    if (f.wantSafety()) {
-        const writer = f.object.writer();
-        try writer.writeAll("memset(");
-        try f.writeCValue(writer, dest_ptr, .FunctionArgument);
-        try writer.print(", {x}, sizeof(", .{try f.fmtIntLiteral(Type.u8, Value.undef)});
-        try f.renderType(writer, lhs_child_ty);
-        try writer.writeAll("));\n");
-    }
-    return .none;
-}
-
-fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
     // *a = b;
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
 
@@ -3588,18 +3577,19 @@ fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
     const ptr_val = try f.resolveInst(bin_op.lhs);
     const src_ty = f.air.typeOf(bin_op.rhs);
 
-    // TODO Sema should emit a different instruction when the store should
-    // possibly do the safety 0xaa bytes for undefined.
-    const src_val_is_undefined =
-        if (f.air.value(bin_op.rhs)) |v| v.isUndefDeep() else false;
-    if (src_val_is_undefined) {
-        if (ptr_info.host_size == 0) {
-            try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-            return try storeUndefined(f, ptr_info.pointee_type, ptr_val);
-        } else if (!f.wantSafety()) {
-            try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-            return .none;
+    const val_is_undef = if (f.air.value(bin_op.rhs)) |v| v.isUndefDeep() else false;
+
+    if (val_is_undef) {
+        try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
+        if (safety and ptr_info.host_size == 0) {
+            const writer = f.object.writer();
+            try writer.writeAll("memset(");
+            try f.writeCValue(writer, ptr_val, .FunctionArgument);
+            try writer.writeAll(", 0xaa, sizeof(");
+            try f.renderType(writer, ptr_info.pointee_type);
+            try writer.writeAll("));\n");
         }
+        return .none;
     }
 
     const target = f.object.dg.module.getTarget();
src/codegen/llvm.zig
@@ -4649,7 +4649,8 @@ pub const FuncGen = struct {
                 .not            => try self.airNot(inst),
                 .ret            => try self.airRet(inst),
                 .ret_load       => try self.airRetLoad(inst),
-                .store          => try self.airStore(inst),
+                .store          => try self.airStore(inst, false),
+                .store_safe     => try self.airStore(inst, true),
                 .assembly       => try self.airAssembly(inst),
                 .slice_ptr      => try self.airSliceField(inst, 0),
                 .slice_len      => try self.airSliceField(inst, 1),
@@ -8115,48 +8116,36 @@ pub const FuncGen = struct {
         return buildAllocaInner(self.context, self.builder, self.llvm_func, self.di_scope != null, llvm_ty, alignment, self.dg.module.getTarget());
     }
 
-    fn airStore(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+    fn airStore(self: *FuncGen, inst: Air.Inst.Index, safety: bool) !?*llvm.Value {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const dest_ptr = try self.resolveInst(bin_op.lhs);
         const ptr_ty = self.air.typeOf(bin_op.lhs);
         const operand_ty = ptr_ty.childType();
 
-        // TODO Sema should emit a different instruction when the store should
-        // possibly do the safety 0xaa bytes for undefined.
         const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false;
         if (val_is_undef) {
-            {
-                // TODO let's handle this in AIR rather than by having each backend
-                // check the optimization mode of the compilation because the plan is
-                // to support setting the optimization mode at finer grained scopes
-                // which happens in Sema. Codegen should not be aware of this logic.
-                // I think this comment is basically the same as the other TODO comment just
-                // above but I'm leaving them both here to make it look super messy and
-                // thereby bait contributors (or let's be honest, probably myself) into
-                // fixing this instead of letting it rot.
-                const safety = switch (self.dg.module.comp.bin_file.options.optimize_mode) {
-                    .ReleaseSmall, .ReleaseFast => false,
-                    .Debug, .ReleaseSafe => true,
-                };
-                if (!safety) {
-                    return null;
-                }
-            }
+            // Even if safety is disabled, we still emit a memset to undefined since it conveys
+            // extra information to LLVM. However, safety makes the difference between using
+            // 0xaa or actual undefined for the fill byte.
+            const u8_llvm_ty = self.context.intType(8);
+            const fill_byte = if (safety)
+                u8_llvm_ty.constInt(0xaa, .False)
+            else
+                u8_llvm_ty.getUndef();
             const target = self.dg.module.getTarget();
             const operand_size = operand_ty.abiSize(target);
-            const u8_llvm_ty = self.context.intType(8);
-            const fill_char = u8_llvm_ty.constInt(0xaa, .False);
-            const dest_ptr_align = ptr_ty.ptrAlignment(target);
             const usize_llvm_ty = try self.dg.lowerType(Type.usize);
             const len = usize_llvm_ty.constInt(operand_size, .False);
-            _ = self.builder.buildMemSet(dest_ptr, fill_char, len, dest_ptr_align, ptr_ty.isVolatilePtr());
-            if (self.dg.module.comp.bin_file.options.valgrind) {
+            const dest_ptr_align = ptr_ty.ptrAlignment(target);
+            _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr());
+            if (safety and self.dg.module.comp.bin_file.options.valgrind) {
                 self.valgrindMarkUndef(dest_ptr, len);
             }
-        } else {
-            const src_operand = try self.resolveInst(bin_op.rhs);
-            try self.store(dest_ptr, ptr_ty, src_operand, .NotAtomic);
+            return null;
         }
+
+        const src_operand = try self.resolveInst(bin_op.rhs);
+        try self.store(dest_ptr, ptr_ty, src_operand, .NotAtomic);
         return null;
     }
 
src/Liveness/Verify.zig
@@ -239,6 +239,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .bool_and,
             .bool_or,
             .store,
+            .store_safe,
             .array_elem_val,
             .slice_elem_val,
             .ptr_elem_val,
src/Air.zig
@@ -485,7 +485,16 @@ pub const Inst = struct {
         /// Write a value to a pointer. LHS is pointer, RHS is value.
         /// Result type is always void.
         /// Uses the `bin_op` field.
+        /// The value to store may be undefined, in which case the destination
+        /// memory region has undefined bytes after this instruction is
+        /// evaluated. In such case ignoring this instruction is legal
+        /// lowering.
         store,
+        /// Same as `store`, except if the value to store is undefined, the
+        /// memory region should be filled with 0xaa bytes, and any other
+        /// safety metadata such as Valgrind integrations should be notified of
+        /// this memory region being undefined.
+        store_safe,
         /// Indicates the program counter will never get to this instruction.
         /// Result type is always noreturn; no instructions in a block follow this one.
         unreach,
@@ -639,8 +648,9 @@ pub const Inst = struct {
         /// Result type is always void.
         /// Uses the `bin_op` field. LHS is the dest slice. RHS is the element value.
         /// The element value may be undefined, in which case the destination
-        /// memory region has undefined bytes after this function executes. In
-        /// such case ignoring this instruction is legal lowering.
+        /// memory region has undefined bytes after this instruction is
+        /// evaluated. In such case ignoring this instruction is legal
+        /// lowering.
         /// If the length is compile-time known (due to the destination being a
         /// pointer-to-array), then it is guaranteed to be greater than zero.
         memset,
@@ -1242,6 +1252,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .dbg_var_ptr,
         .dbg_var_val,
         .store,
+        .store_safe,
         .fence,
         .atomic_store_unordered,
         .atomic_store_monotonic,
@@ -1423,6 +1434,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index) bool {
         .ret,
         .ret_load,
         .store,
+        .store_safe,
         .unreach,
         .optional_payload_ptr_set,
         .errunion_payload_ptr_set,
src/Liveness.zig
@@ -299,6 +299,7 @@ pub fn categorizeOperand(
         },
 
         .store,
+        .store_safe,
         .atomic_store_unordered,
         .atomic_store_monotonic,
         .atomic_store_release,
@@ -965,6 +966,7 @@ fn analyzeInst(
         .bool_and,
         .bool_or,
         .store,
+        .store_safe,
         .array_elem_val,
         .slice_elem_val,
         .ptr_elem_val,
src/print_air.zig
@@ -140,6 +140,7 @@ const Writer = struct {
             .bool_and,
             .bool_or,
             .store,
+            .store_safe,
             .array_elem_val,
             .slice_elem_val,
             .ptr_elem_val,
src/Sema.zig
@@ -2500,7 +2500,7 @@ fn coerceResultPtr(
 
         // The last one is always `store`.
         const trash_inst = trash_block.instructions.items[trash_block.instructions.items.len - 1];
-        if (air_tags[trash_inst] != .store) {
+        if (air_tags[trash_inst] != .store and air_tags[trash_inst] != .store_safe) {
             // no store instruction is generated for zero sized types
             assert((try sema.typeHasOnePossibleValue(pointee_ty)) != null);
         } else {
@@ -3524,7 +3524,7 @@ fn zirMakePtrConst(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
             const candidate = block.instructions.items[search_index];
             switch (air_tags[candidate]) {
                 .dbg_stmt, .dbg_block_begin, .dbg_block_end => continue,
-                .store => break candidate,
+                .store, .store_safe => break candidate,
                 else => break :ct,
             }
         };
@@ -3750,7 +3750,7 @@ fn zirResolveInferredAlloc(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com
                     const candidate = block.instructions.items[search_index];
                     switch (air_tags[candidate]) {
                         .dbg_stmt, .dbg_block_begin, .dbg_block_end => continue,
-                        .store => break candidate,
+                        .store, .store_safe => break candidate,
                         else => break :ct,
                     }
                 };
@@ -3860,7 +3860,7 @@ fn zirResolveInferredAlloc(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com
                         assert(replacement_block.instructions.items.len > 0);
                         break :result sub_ptr;
                     },
-                    .store => result: {
+                    .store, .store_safe => result: {
                         const bin_op = sema.air_instructions.items(.data)[placeholder_inst].bin_op;
                         try sema.storePtr2(&replacement_block, src, bin_op.lhs, src, bin_op.rhs, src, .bitcast);
                         break :result .void_value;
@@ -4242,7 +4242,10 @@ fn validateUnionInit(
     while (block_index > 0) : (block_index -= 1) {
         const store_inst = block.instructions.items[block_index];
         if (store_inst == field_ptr_air_inst) break;
-        if (air_tags[store_inst] != .store) continue;
+        switch (air_tags[store_inst]) {
+            .store, .store_safe => {},
+            else => continue,
+        }
         const bin_op = air_datas[store_inst].bin_op;
         var lhs = bin_op.lhs;
         if (Air.refToIndex(lhs)) |lhs_index| {
@@ -4454,7 +4457,10 @@ fn validateStructInit(
                     struct_is_comptime = false;
                     continue :field;
                 }
-                if (air_tags[store_inst] != .store) continue;
+                switch (air_tags[store_inst]) {
+                    .store, .store_safe => {},
+                    else => continue,
+                }
                 const bin_op = air_datas[store_inst].bin_op;
                 var lhs = bin_op.lhs;
                 {
@@ -4682,7 +4688,10 @@ fn zirValidateArrayInit(
                 array_is_comptime = false;
                 continue :outer;
             }
-            if (air_tags[store_inst] != .store) continue;
+            switch (air_tags[store_inst]) {
+                .store, .store_safe => {},
+                else => continue,
+            }
             const bin_op = air_datas[store_inst].bin_op;
             var lhs = bin_op.lhs;
             {
@@ -5025,7 +5034,12 @@ fn zirStoreNode(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!v
 
     const ptr_src: LazySrcLoc = .{ .node_offset_store_ptr = inst_data.src_node };
     const operand_src: LazySrcLoc = .{ .node_offset_store_operand = inst_data.src_node };
-    const air_tag: Air.Inst.Tag = if (is_ret) .ret_ptr else .store;
+    const air_tag: Air.Inst.Tag = if (is_ret)
+        .ret_ptr
+    else if (block.wantSafety())
+        .store_safe
+    else
+        .store;
     return sema.storePtr2(block, src, ptr, ptr_src, operand, operand_src, air_tag);
 }
 
@@ -26704,7 +26718,8 @@ fn storePtr(
     ptr: Air.Inst.Ref,
     uncasted_operand: Air.Inst.Ref,
 ) CompileError!void {
-    return sema.storePtr2(block, src, ptr, src, uncasted_operand, src, .store);
+    const air_tag: Air.Inst.Tag = if (block.wantSafety()) .store_safe else .store;
+    return sema.storePtr2(block, src, ptr, src, uncasted_operand, src, air_tag);
 }
 
 fn storePtr2(