Commit 043b1adb8d

David Rubin <87927264+Rexicon226@users.noreply.github.com>
2024-10-05 00:21:27
remove `@fence` (#21585)
closes #11650
1 parent 163d505
doc/langref.html.in
@@ -4218,11 +4218,10 @@ pub fn print(self: *Writer, arg0: []const u8, arg1: i32) !void {
       {#header_close#}
 
       {#header_open|Atomics#}
-      <p>TODO: @fence()</p>
       <p>TODO: @atomic rmw</p>
       <p>TODO: builtin atomic memory ordering enum</p>
 
-      {#see_also|@atomicLoad|@atomicStore|@atomicRmw|@fence|@cmpxchgWeak|@cmpxchgStrong#}
+      {#see_also|@atomicLoad|@atomicStore|@atomicRmw|@cmpxchgWeak|@cmpxchgStrong#}
 
       {#header_close#}
 
@@ -4307,7 +4306,7 @@ comptime {
       an integer or an enum.
       </p>
       <p>{#syntax#}AtomicOrder{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicOrder{#endsyntax#}.</p>
-      {#see_also|@atomicStore|@atomicRmw|@fence|@cmpxchgWeak|@cmpxchgStrong#}
+      {#see_also|@atomicStore|@atomicRmw||@cmpxchgWeak|@cmpxchgStrong#}
       {#header_close#}
 
       {#header_open|@atomicRmw#}
@@ -4322,7 +4321,7 @@ comptime {
       </p>
       <p>{#syntax#}AtomicOrder{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicOrder{#endsyntax#}.</p>
       <p>{#syntax#}AtomicRmwOp{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicRmwOp{#endsyntax#}.</p>
-      {#see_also|@atomicStore|@atomicLoad|@fence|@cmpxchgWeak|@cmpxchgStrong#}
+      {#see_also|@atomicStore|@atomicLoad|@cmpxchgWeak|@cmpxchgStrong#}
       {#header_close#}
 
       {#header_open|@atomicStore#}
@@ -4335,7 +4334,7 @@ comptime {
       an integer or an enum.
       </p>
       <p>{#syntax#}AtomicOrder{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicOrder{#endsyntax#}.</p>
-      {#see_also|@atomicLoad|@atomicRmw|@fence|@cmpxchgWeak|@cmpxchgStrong#}
+      {#see_also|@atomicLoad|@atomicRmw|@cmpxchgWeak|@cmpxchgStrong#}
       {#header_close#}
 
       {#header_open|@bitCast#}
@@ -4568,7 +4567,7 @@ comptime {
       </p>
       <p>{#syntax#}@typeInfo(@TypeOf(ptr)).pointer.alignment{#endsyntax#} must be {#syntax#}>= @sizeOf(T).{#endsyntax#}</p>
       <p>{#syntax#}AtomicOrder{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicOrder{#endsyntax#}.</p>
-      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@fence|@cmpxchgWeak#}
+      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@cmpxchgWeak#}
       {#header_close#}
 
       {#header_open|@cmpxchgWeak#}
@@ -4600,7 +4599,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       </p>
       <p>{#syntax#}@typeInfo(@TypeOf(ptr)).pointer.alignment{#endsyntax#} must be {#syntax#}>= @sizeOf(T).{#endsyntax#}</p>
       <p>{#syntax#}AtomicOrder{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicOrder{#endsyntax#}.</p>
-      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@fence|@cmpxchgStrong#}
+      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@cmpxchgStrong#}
       {#header_close#}
 
       {#header_open|@compileError#}
@@ -4857,15 +4856,6 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       {#see_also|@export#}
       {#header_close#}
 
-      {#header_open|@fence#}
-      <pre>{#syntax#}@fence(order: AtomicOrder) void{#endsyntax#}</pre>
-      <p>
-      The {#syntax#}fence{#endsyntax#} function is used to introduce happens-before edges between operations.
-      </p>
-      <p>{#syntax#}AtomicOrder{#endsyntax#} can be found with {#syntax#}@import("std").builtin.AtomicOrder{#endsyntax#}.</p>
-      {#see_also|@atomicStore|@atomicLoad|@atomicRmw|@cmpxchgWeak|@cmpxchgStrong#}
-      {#header_close#}
-
       {#header_open|@field#}
       <pre>{#syntax#}@field(lhs: anytype, comptime field_name: []const u8) (field){#endsyntax#}</pre>
       <p>Performs field access by a compile-time string. Works on both fields and declarations.
lib/std/Thread/Futex.zig
@@ -794,9 +794,8 @@ const PosixImpl = struct {
         // - T1: bumps pending waiters (was reordered after the ptr == expect check)
         // - T1: goes to sleep and misses both the ptr change and T2's wake up
         //
-        // seq_cst as Acquire barrier to ensure the announcement happens before the ptr check below.
-        // seq_cst as shared modification order to form a happens-before edge with the fence(.seq_cst)+load() in wake().
-        var pending = bucket.pending.fetchAdd(1, .seq_cst);
+        // acquire barrier to ensure the announcement happens before the ptr check below.
+        var pending = bucket.pending.fetchAdd(1, .acquire);
         assert(pending < std.math.maxInt(usize));
 
         // If the wait gets cancelled, remove the pending count we previously added.
@@ -858,15 +857,8 @@ const PosixImpl = struct {
         //
         // What we really want here is a Release load, but that doesn't exist under the C11 memory model.
         // We could instead do `bucket.pending.fetchAdd(0, Release) == 0` which achieves effectively the same thing,
-        // but the RMW operation unconditionally marks the cache-line as modified for others causing unnecessary fetching/contention.
-        //
-        // Instead we opt to do a full-fence + load instead which avoids taking ownership of the cache-line.
-        // fence(seq_cst) effectively converts the ptr update to seq_cst and the pending load to seq_cst: creating a Store-Load barrier.
-        //
-        // The pending count increment in wait() must also now use seq_cst for the update + this pending load
-        // to be in the same modification order as our load isn't using release/acquire to guarantee it.
-        bucket.pending.fence(.seq_cst);
-        if (bucket.pending.load(.monotonic) == 0) {
+        // LLVM lowers the fetchAdd(0, .release) into an mfence+load which avoids gaining ownership of the cache-line.
+        if (bucket.pending.fetchAdd(0, .release) == 0) {
             return;
         }
 
@@ -979,15 +971,14 @@ test "broadcasting" {
         fn wait(self: *@This()) !void {
             // Decrement the counter.
             // Release ensures stuff before this barrier.wait() happens before the last one.
-            const count = self.count.fetchSub(1, .release);
+            // Acquire for the last counter ensures stuff before previous barrier.wait()s happened before it.
+            const count = self.count.fetchSub(1, .acq_rel);
             try testing.expect(count <= num_threads);
             try testing.expect(count > 0);
 
             // First counter to reach zero wakes all other threads.
-            // Acquire for the last counter ensures stuff before previous barrier.wait()s happened before it.
             // Release on futex update ensures stuff before all barrier.wait()'s happens before they all return.
             if (count - 1 == 0) {
-                _ = self.count.load(.acquire); // TODO: could be fence(acquire) if not for TSAN
                 self.futex.store(1, .release);
                 Futex.wake(&self.futex, num_threads - 1);
                 return;
lib/std/Thread/ResetEvent.zig
@@ -112,9 +112,9 @@ const FutexImpl = struct {
         // Try to set the state from `unset` to `waiting` to indicate
         // to the set() thread that others are blocked on the ResetEvent.
         // We avoid using any strict barriers until the end when we know the ResetEvent is set.
-        var state = self.state.load(.monotonic);
+        var state = self.state.load(.acquire);
         if (state == unset) {
-            state = self.state.cmpxchgStrong(state, waiting, .monotonic, .monotonic) orelse waiting;
+            state = self.state.cmpxchgStrong(state, waiting, .acquire, .acquire) orelse waiting;
         }
 
         // Wait until the ResetEvent is set since the state is waiting.
@@ -124,7 +124,7 @@ const FutexImpl = struct {
                 const wait_result = futex_deadline.wait(&self.state, waiting);
 
                 // Check if the ResetEvent was set before possibly reporting error.Timeout below.
-                state = self.state.load(.monotonic);
+                state = self.state.load(.acquire);
                 if (state != waiting) {
                     break;
                 }
@@ -133,9 +133,7 @@ const FutexImpl = struct {
             }
         }
 
-        // Acquire barrier ensures memory accesses before set() happen before we return.
         assert(state == is_set);
-        self.state.fence(.acquire);
     }
 
     fn set(self: *Impl) void {
lib/std/Thread/WaitGroup.zig
@@ -15,11 +15,10 @@ pub fn start(self: *WaitGroup) void {
 }
 
 pub fn finish(self: *WaitGroup) void {
-    const state = self.state.fetchSub(one_pending, .release);
+    const state = self.state.fetchSub(one_pending, .acq_rel);
     assert((state / one_pending) > 0);
 
     if (state == (one_pending | is_waiting)) {
-        self.state.fence(.acquire);
         self.event.set();
     }
 }
lib/std/zig/AstGen.zig
@@ -2901,7 +2901,6 @@ fn addEnsureResult(gz: *GenZir, maybe_unused_result: Zir.Inst.Ref, statement: As
             .extended => switch (gz.astgen.instructions.items(.data)[@intFromEnum(inst)].extended.opcode) {
                 .breakpoint,
                 .disable_instrumentation,
-                .fence,
                 .set_float_mode,
                 .set_align_stack,
                 .branch_hint,
@@ -9307,15 +9306,6 @@ fn builtinCall(
             });
             return rvalue(gz, ri, result, node);
         },
-        .fence => {
-            const atomic_order_ty = try gz.addBuiltinValue(node, .atomic_order);
-            const order = try expr(gz, scope, .{ .rl = .{ .coerced_ty = atomic_order_ty } }, params[0]);
-            _ = try gz.addExtendedPayload(.fence, Zir.Inst.UnNode{
-                .node = gz.nodeIndexToRelative(node),
-                .operand = order,
-            });
-            return rvalue(gz, ri, .void_value, node);
-        },
         .set_float_mode => {
             const float_mode_ty = try gz.addBuiltinValue(node, .float_mode);
             const order = try expr(gz, scope, .{ .rl = .{ .coerced_ty = float_mode_ty } }, params[0]);
lib/std/zig/AstRlAnnotate.zig
@@ -908,7 +908,6 @@ fn builtinCall(astrl: *AstRlAnnotate, block: ?*Block, ri: ResultInfo, node: Ast.
         .c_include,
         .wasm_memory_size,
         .splat,
-        .fence,
         .set_float_mode,
         .set_align_stack,
         .type_info,
lib/std/zig/BuiltinFn.zig
@@ -48,7 +48,6 @@ pub const Tag = enum {
     error_cast,
     @"export",
     @"extern",
-    fence,
     field,
     field_parent_ptr,
     float_cast,
@@ -500,13 +499,6 @@ pub const list = list: {
                 .param_count = 2,
             },
         },
-        .{
-            "@fence",
-            .{
-                .tag = .fence,
-                .param_count = 1,
-            },
-        },
         .{
             "@field",
             .{
lib/std/zig/Zir.zig
@@ -1575,7 +1575,7 @@ pub const Inst = struct {
                 => false,
 
                 .extended => switch (data.extended.opcode) {
-                    .fence, .branch_hint, .breakpoint, .disable_instrumentation => true,
+                    .branch_hint, .breakpoint, .disable_instrumentation => true,
                     else => false,
                 },
             };
@@ -1979,9 +1979,6 @@ pub const Inst = struct {
         /// The `@prefetch` builtin.
         /// `operand` is payload index to `BinNode`.
         prefetch,
-        /// Implements the `@fence` builtin.
-        /// `operand` is payload index to `UnNode`.
-        fence,
         /// Implement builtin `@setFloatMode`.
         /// `operand` is payload index to `UnNode`.
         set_float_mode,
@@ -4014,7 +4011,6 @@ fn findDeclsInner(
                 .wasm_memory_size,
                 .wasm_memory_grow,
                 .prefetch,
-                .fence,
                 .set_float_mode,
                 .set_align_stack,
                 .error_cast,
lib/std/atomic.zig
@@ -10,31 +10,7 @@ pub fn Value(comptime T: type) type {
             return .{ .raw = value };
         }
 
-        /// Perform an atomic fence which uses the atomic value as a hint for
-        /// the modification order. Use this when you want to imply a fence on
-        /// an atomic variable without necessarily performing a memory access.
-        pub inline fn fence(self: *Self, comptime order: AtomicOrder) void {
-            // LLVM's ThreadSanitizer doesn't support the normal fences so we specialize for it.
-            if (builtin.sanitize_thread) {
-                const tsan = struct {
-                    extern "c" fn __tsan_acquire(addr: *anyopaque) void;
-                    extern "c" fn __tsan_release(addr: *anyopaque) void;
-                };
-
-                const addr: *anyopaque = self;
-                return switch (order) {
-                    .unordered, .monotonic => @compileError(@tagName(order) ++ " only applies to atomic loads and stores"),
-                    .acquire => tsan.__tsan_acquire(addr),
-                    .release => tsan.__tsan_release(addr),
-                    .acq_rel, .seq_cst => {
-                        tsan.__tsan_acquire(addr);
-                        tsan.__tsan_release(addr);
-                    },
-                };
-            }
-
-            return @fence(order);
-        }
+        pub const fence = @compileError("@fence is deprecated, use other atomics to establish ordering");
 
         pub inline fn load(self: *const Self, comptime order: AtomicOrder) T {
             return @atomicLoad(T, &self.raw, order);
@@ -148,21 +124,19 @@ test Value {
         const RefCount = @This();
 
         fn ref(rc: *RefCount) void {
-            // No ordering necessary; just updating a counter.
+            // no synchronization necessary; just updating a counter.
             _ = rc.count.fetchAdd(1, .monotonic);
         }
 
         fn unref(rc: *RefCount) void {
-            // Release ensures code before unref() happens-before the
+            // release ensures code before unref() happens-before the
             // count is decremented as dropFn could be called by then.
             if (rc.count.fetchSub(1, .release) == 1) {
-                // acquire ensures count decrement and code before
-                // previous unrefs()s happens-before we call dropFn
-                // below.
-                // Another alternative is to use .acq_rel on the
-                // fetchSub count decrement but it's extra barrier in
-                // possibly hot path.
-                rc.count.fence(.acquire);
+                // seeing 1 in the counter means that other unref()s have happened,
+                // but it doesn't mean that uses before each unref() are visible.
+                // The load acquires the release-sequence created by previous unref()s
+                // in order to ensure visibility of uses before dropping.
+                _ = rc.count.load(.acquire);
                 (rc.dropFn)(rc);
             }
         }
lib/zig.h
@@ -3610,7 +3610,6 @@ typedef enum memory_order zig_memory_order;
 #define zig_atomicrmw_add_float zig_atomicrmw_add
 #undef  zig_atomicrmw_sub_float
 #define zig_atomicrmw_sub_float zig_atomicrmw_sub
-#define zig_fence(order) atomic_thread_fence(order)
 #elif defined(__GNUC__)
 typedef int zig_memory_order;
 #define zig_memory_order_relaxed __ATOMIC_RELAXED
@@ -3634,7 +3633,6 @@ typedef int zig_memory_order;
 #define    zig_atomic_load(res, obj,      order, Type, ReprType)       __atomic_load      (obj, &(res), order)
 #undef  zig_atomicrmw_xchg_float
 #define zig_atomicrmw_xchg_float zig_atomicrmw_xchg
-#define zig_fence(order) __atomic_thread_fence(order)
 #elif _MSC_VER && (_M_IX86 || _M_X64)
 #define zig_memory_order_relaxed 0
 #define zig_memory_order_acquire 2
@@ -3655,11 +3653,6 @@ typedef int zig_memory_order;
 #define  zig_atomicrmw_max(res, obj, arg, order, Type, ReprType) res = zig_msvc_atomicrmw_max_ ##Type(obj, arg)
 #define   zig_atomic_store(     obj, arg, order, Type, ReprType)       zig_msvc_atomic_store_  ##Type(obj, arg)
 #define    zig_atomic_load(res, obj,      order, Type, ReprType) res = zig_msvc_atomic_load_   ##order##_##Type(obj)
-#if _M_X64
-#define zig_fence(order) __faststorefence()
-#else
-#define zig_fence(order) zig_msvc_atomic_barrier()
-#endif
 /* TODO: _MSC_VER && (_M_ARM || _M_ARM64) */
 #else
 #define zig_memory_order_relaxed 0
@@ -3681,7 +3674,6 @@ typedef int zig_memory_order;
 #define  zig_atomicrmw_max(res, obj, arg, order, Type, ReprType) zig_atomics_unavailable
 #define   zig_atomic_store(     obj, arg, order, Type, ReprType) zig_atomics_unavailable
 #define    zig_atomic_load(res, obj,      order, Type, ReprType) zig_atomics_unavailable
-#define zig_fence(order) zig_fence_unavailable
 #endif
 
 #if _MSC_VER && (_M_IX86 || _M_X64)
src/Air/types_resolved.zig
@@ -416,7 +416,6 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
             .work_item_id,
             .work_group_size,
             .work_group_id,
-            .fence,
             .dbg_stmt,
             .err_return_trace,
             .save_err_return_trace_index,
src/arch/aarch64/CodeGen.zig
@@ -739,7 +739,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => try self.airRetAddr(inst),
             .frame_addr      => try self.airFrameAddress(inst),
-            .fence           => try self.airFence(),
             .cond_br         => try self.airCondBr(inst),
             .fptrunc         => try self.airFptrunc(inst),
             .fpext           => try self.airFpext(inst),
@@ -4264,11 +4263,6 @@ fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ .none, .none, .none });
 }
 
-fn airFence(self: *Self) !void {
-    return self.fail("TODO implement fence() for {}", .{self.target.cpu.arch});
-    //return self.finishAirBookkeeping();
-}
-
 fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
     if (modifier == .always_tail) return self.fail("TODO implement tail calls for aarch64", .{});
     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
src/arch/arm/CodeGen.zig
@@ -726,7 +726,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => try self.airRetAddr(inst),
             .frame_addr      => try self.airFrameAddress(inst),
-            .fence           => try self.airFence(),
             .cond_br         => try self.airCondBr(inst),
             .fptrunc         => try self.airFptrunc(inst),
             .fpext           => try self.airFpext(inst),
@@ -4244,11 +4243,6 @@ fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ .none, .none, .none });
 }
 
-fn airFence(self: *Self) !void {
-    return self.fail("TODO implement fence() for {}", .{self.target.cpu.arch});
-    //return self.finishAirBookkeeping();
-}
-
 fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
     if (modifier == .always_tail) return self.fail("TODO implement tail calls for arm", .{});
     const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
src/arch/riscv64/CodeGen.zig
@@ -1593,7 +1593,6 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
             .breakpoint      => try func.airBreakpoint(),
             .ret_addr        => try func.airRetAddr(inst),
             .frame_addr      => try func.airFrameAddress(inst),
-            .fence           => try func.airFence(inst),
             .cond_br         => try func.airCondBr(inst),
             .dbg_stmt        => try func.airDbgStmt(inst),
             .fptrunc         => try func.airFptrunc(inst),
@@ -4833,26 +4832,6 @@ fn airFrameAddress(func: *Func, inst: Air.Inst.Index) !void {
     return func.finishAir(inst, dst_mcv, .{ .none, .none, .none });
 }
 
-fn airFence(func: *Func, inst: Air.Inst.Index) !void {
-    const order = func.air.instructions.items(.data)[@intFromEnum(inst)].fence;
-    const pred: Mir.Barrier, const succ: Mir.Barrier = switch (order) {
-        .unordered, .monotonic => unreachable,
-        .acquire => .{ .r, .rw },
-        .release => .{ .rw, .r },
-        .acq_rel => .{ .rw, .rw },
-        .seq_cst => .{ .rw, .rw },
-    };
-
-    _ = try func.addInst(.{
-        .tag = if (order == .acq_rel) .fencetso else .fence,
-        .data = .{ .fence = .{
-            .pred = pred,
-            .succ = succ,
-        } },
-    });
-    return func.finishAirBookkeeping();
-}
-
 fn airCall(func: *Func, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
     if (modifier == .always_tail) return func.fail("TODO implement tail calls for riscv64", .{});
     const pl_op = func.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
src/arch/sparc64/CodeGen.zig
@@ -581,7 +581,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => @panic("TODO try self.airRetAddr(inst)"),
             .frame_addr      => @panic("TODO try self.airFrameAddress(inst)"),
-            .fence           => try self.airFence(inst),
             .cond_br         => try self.airCondBr(inst),
             .fptrunc         => @panic("TODO try self.airFptrunc(inst)"),
             .fpext           => @panic("TODO try self.airFpext(inst)"),
@@ -1693,29 +1692,6 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
-fn airFence(self: *Self, inst: Air.Inst.Index) !void {
-    // TODO weaken this as needed, currently this implements the strongest membar form
-    const fence = self.air.instructions.items(.data)[@intFromEnum(inst)].fence;
-    _ = fence;
-
-    // membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad
-    _ = try self.addInst(.{
-        .tag = .membar,
-        .data = .{
-            .membar_mask = .{
-                .mmask = .{
-                    .store_store = true,
-                    .store_load = true,
-                    .load_store = true,
-                    .load_load = true,
-                },
-            },
-        },
-    });
-
-    return self.finishAir(inst, .dead, .{ .none, .none, .none });
-}
-
 fn airIntFromFloat(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
     const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airIntFromFloat for {}", .{
src/arch/wasm/CodeGen.zig
@@ -2040,7 +2040,6 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .atomic_rmw => func.airAtomicRmw(inst),
         .cmpxchg_weak => func.airCmpxchg(inst),
         .cmpxchg_strong => func.airCmpxchg(inst),
-        .fence => func.airFence(inst),
 
         .add_optimized,
         .sub_optimized,
@@ -7742,20 +7741,6 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     }
 }
 
-fn airFence(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
-    const pt = func.pt;
-    const zcu = pt.zcu;
-    // Only when the atomic feature is enabled, and we're not building
-    // for a single-threaded build, can we emit the `fence` instruction.
-    // In all other cases, we emit no instructions for a fence.
-    const single_threaded = zcu.navFileScope(func.owner_nav).mod.single_threaded;
-    if (func.useAtomicFeature() and !single_threaded) {
-        try func.addAtomicTag(.atomic_fence);
-    }
-
-    return func.finishAir(inst, .none, &.{});
-}
-
 fn airAtomicStore(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const pt = func.pt;
     const zcu = pt.zcu;
src/arch/x86_64/CodeGen.zig
@@ -2294,7 +2294,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => try self.airRetAddr(inst),
             .frame_addr      => try self.airFrameAddress(inst),
-            .fence           => try self.airFence(inst),
             .cond_br         => try self.airCondBr(inst),
             .fptrunc         => try self.airFptrunc(inst),
             .fpext           => try self.airFpext(inst),
@@ -12251,16 +12250,6 @@ fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
 }
 
-fn airFence(self: *Self, inst: Air.Inst.Index) !void {
-    const order = self.air.instructions.items(.data)[@intFromEnum(inst)].fence;
-    switch (order) {
-        .unordered, .monotonic => unreachable,
-        .acquire, .release, .acq_rel => {},
-        .seq_cst => try self.asmOpOnly(.{ ._, .mfence }),
-    }
-    self.finishAirBookkeeping();
-}
-
 fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void {
     if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{});
 
src/codegen/c.zig
@@ -3144,7 +3144,6 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .breakpoint => try airBreakpoint(f.object.writer()),
             .ret_addr   => try airRetAddr(f, inst),
             .frame_addr => try airFrameAddress(f, inst),
-            .fence      => try airFence(f, inst),
 
             .ptr_add => try airPtrAddSub(f, inst, '+'),
             .ptr_sub => try airPtrAddSub(f, inst, '-'),
@@ -4988,17 +4987,6 @@ fn airFrameAddress(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
-fn airFence(f: *Function, inst: Air.Inst.Index) !CValue {
-    const atomic_order = f.air.instructions.items(.data)[@intFromEnum(inst)].fence;
-    const writer = f.object.writer();
-
-    try writer.writeAll("zig_fence(");
-    try writeMemoryOrder(writer, atomic_order);
-    try writer.writeAll(");\n");
-
-    return .none;
-}
-
 fn airUnreach(f: *Function) !void {
     // Not even allowed to call unreachable in a naked function.
     if (f.object.dg.is_naked_fn) return;
src/codegen/llvm.zig
@@ -5139,7 +5139,6 @@ pub const FuncGen = struct {
                 .float_from_int => try self.airFloatFromInt(inst),
                 .cmpxchg_weak   => try self.airCmpxchg(inst, .weak),
                 .cmpxchg_strong => try self.airCmpxchg(inst, .strong),
-                .fence          => try self.airFence(inst),
                 .atomic_rmw     => try self.airAtomicRmw(inst),
                 .atomic_load    => try self.airAtomicLoad(inst),
                 .memset         => try self.airMemset(inst, false),
@@ -9666,13 +9665,6 @@ pub const FuncGen = struct {
         return self.wip.cast(.ptrtoint, result, try o.lowerType(Type.usize), "");
     }
 
-    fn airFence(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
-        const atomic_order = self.air.instructions.items(.data)[@intFromEnum(inst)].fence;
-        const ordering = toLlvmAtomicOrdering(atomic_order);
-        _ = try self.wip.fence(self.sync_scope, ordering);
-        return .none;
-    }
-
     fn airCmpxchg(
         self: *FuncGen,
         inst: Air.Inst.Index,
src/Liveness/Verify.zig
@@ -56,7 +56,6 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .ret_ptr,
             .breakpoint,
             .dbg_stmt,
-            .fence,
             .ret_addr,
             .frame_addr,
             .wasm_memory_size,
src/Air.zig
@@ -734,10 +734,6 @@ pub const Inst = struct {
         cmpxchg_weak,
         /// Uses the `ty_pl` field with payload `Cmpxchg`.
         cmpxchg_strong,
-        /// Lowers to a memory fence instruction.
-        /// Result type is always void.
-        /// Uses the `fence` field.
-        fence,
         /// Atomically load from a pointer.
         /// Result type is the element type of the pointer.
         /// Uses the `atomic_load` field.
@@ -1066,7 +1062,6 @@ pub const Inst = struct {
             line: u32,
             column: u32,
         },
-        fence: std.builtin.AtomicOrder,
         atomic_load: struct {
             ptr: Ref,
             order: std.builtin.AtomicOrder,
@@ -1478,7 +1473,6 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
         .dbg_arg_inline,
         .store,
         .store_safe,
-        .fence,
         .atomic_store_unordered,
         .atomic_store_monotonic,
         .atomic_store_release,
@@ -1653,7 +1647,6 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
         .memcpy,
         .cmpxchg_weak,
         .cmpxchg_strong,
-        .fence,
         .atomic_store_unordered,
         .atomic_store_monotonic,
         .atomic_store_release,
src/Liveness.zig
@@ -346,8 +346,6 @@ pub fn categorizeOperand(
         .work_group_id,
         => return .none,
 
-        .fence => return .write,
-
         .not,
         .bitcast,
         .load,
@@ -975,7 +973,6 @@ fn analyzeInst(
         .ret_ptr,
         .breakpoint,
         .dbg_stmt,
-        .fence,
         .ret_addr,
         .frame_addr,
         .wasm_memory_size,
src/print_air.zig
@@ -303,7 +303,6 @@ const Writer = struct {
             .try_ptr, .try_ptr_cold => try w.writeTryPtr(s, inst),
             .loop_switch_br, .switch_br => try w.writeSwitchBr(s, inst),
             .cmpxchg_weak, .cmpxchg_strong => try w.writeCmpxchg(s, inst),
-            .fence => try w.writeFence(s, inst),
             .atomic_load => try w.writeAtomicLoad(s, inst),
             .prefetch => try w.writePrefetch(s, inst),
             .atomic_store_unordered => try w.writeAtomicStore(s, inst, .unordered),
@@ -552,12 +551,6 @@ const Writer = struct {
         try w.writeOperand(s, inst, 2, extra.rhs);
     }
 
-    fn writeFence(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
-        const atomic_order = w.air.instructions.items(.data)[@intFromEnum(inst)].fence;
-
-        try s.print("{s}", .{@tagName(atomic_order)});
-    }
-
     fn writeAtomicLoad(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
         const atomic_load = w.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load;
 
src/print_zir.zig
@@ -566,7 +566,6 @@ const Writer = struct {
             .await_nosuspend,
             .c_undef,
             .c_include,
-            .fence,
             .set_float_mode,
             .set_align_stack,
             .wasm_memory_size,
src/Sema.zig
@@ -1321,11 +1321,6 @@ fn analyzeBodyInner(
                     .closure_get        => try sema.zirClosureGet(        block, extended),
                     // zig fmt: on
 
-                    .fence => {
-                        try sema.zirFence(block, extended);
-                        i += 1;
-                        continue;
-                    },
                     .set_float_mode => {
                         try sema.zirSetFloatMode(block, extended);
                         i += 1;
@@ -6556,25 +6551,6 @@ fn zirSetRuntimeSafety(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Compile
     });
 }
 
-fn zirFence(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!void {
-    if (block.is_comptime) return;
-
-    const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data;
-    const order_src = block.builtinCallArgSrc(extra.node, 0);
-    const order = try sema.resolveAtomicOrder(block, order_src, extra.operand, .{
-        .needed_comptime_reason = "atomic order of @fence must be comptime-known",
-    });
-
-    if (@intFromEnum(order) < @intFromEnum(std.builtin.AtomicOrder.acquire)) {
-        return sema.fail(block, order_src, "atomic ordering must be acquire or stricter", .{});
-    }
-
-    _ = try block.addInst(.{
-        .tag = .fence,
-        .data = .{ .fence = order },
-    });
-}
-
 fn zirBreak(sema: *Sema, start_block: *Block, inst: Zir.Inst.Index) CompileError!void {
     const tracy = trace(@src());
     defer tracy.end();
test/behavior/atomics.zig
@@ -37,16 +37,6 @@ fn testCmpxchg() !void {
     try expect(x == 42);
 }
 
-test "fence" {
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
-
-    var x: i32 = 1234;
-    @fence(.seq_cst);
-    x = 5678;
-}
-
 test "atomicrmw and atomicload" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/builtin_functions_returning_void_or_noreturn.zig
@@ -16,7 +16,6 @@ test {
     try testing.expectEqual({}, @atomicStore(u8, &val, 0, .unordered));
     try testing.expectEqual(void, @TypeOf(@breakpoint()));
     try testing.expectEqual({}, @export(&x, .{ .name = "x" }));
-    try testing.expectEqual({}, @fence(.acquire));
     try testing.expectEqual({}, @memcpy(@as([*]u8, @ptrFromInt(1))[0..0], @as([*]u8, @ptrFromInt(1))[0..0]));
     try testing.expectEqual({}, @memset(@as([*]u8, @ptrFromInt(1))[0..0], undefined));
     try testing.expectEqual(noreturn, @TypeOf(if (true) @panic("") else {}));
test/cases/compile_errors/atomic_orderings_of_fence_Acquire_or_stricter.zig
@@ -1,9 +0,0 @@
-export fn entry() void {
-    @fence(.monotonic);
-}
-
-// error
-// backend=stage2
-// target=native
-//
-// :2:13: error: atomic ordering must be acquire or stricter