Commit 5fb4a7df38

mlugg <mlugg@mlugg.co.uk>
2024-04-25 04:46:10
Air: add explicit `repeat` instruction to repeat loops
This commit introduces a new AIR instruction, `repeat`, which causes control flow to move back to the start of a given AIR loop. `loop` instructions will no longer automatically perform this operation after control flow reaches the end of the body. The motivation for making this change now was really just consistency with the upcoming implementation of #8220: it wouldn't make sense to have this feature work significantly differently. However, there were already some TODOs kicking around which wanted this feature. It's useful for two key reasons: * It allows loops over AIR instruction bodies to loop precisely until they reach a `noreturn` instruction. This allows for tail calling a few things, and avoiding a range check on each iteration of a hot path, plus gives a nice assertion that validates AIR structure a little. This is a very minor benefit, which this commit does apply to the LLVM and C backends. * It should allow for more compact ZIR and AIR to be emitted by having AstGen emit `repeat` instructions more often rather than having `continue` statements `break` to a `block` which is *followed* by a `repeat`. This is done in status quo because `repeat` instructions only ever cause the direct parent block to repeat. Now that AIR is more flexible, this flexibility can be pretty trivially extended to ZIR, and we can then emit better ZIR. This commit does not implement this. Support for this feature is currently regressed on all self-hosted native backends, including x86_64. This support will be added where necessary before this branch is merged.
1 parent 1b000b9
src/Air/types_resolved.zig
@@ -420,6 +420,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
             .dbg_stmt,
             .err_return_trace,
             .save_err_return_trace_index,
+            .repeat,
             => {},
         }
     }
src/arch/aarch64/CodeGen.zig
@@ -734,6 +734,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .bitcast         => try self.airBitCast(inst),
             .block           => try self.airBlock(inst),
             .br              => try self.airBr(inst),
+            .repeat          => return self.fail("TODO implement `repeat`", .{}),
             .trap            => try self.airTrap(),
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => try self.airRetAddr(inst),
src/arch/arm/CodeGen.zig
@@ -721,6 +721,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .bitcast         => try self.airBitCast(inst),
             .block           => try self.airBlock(inst),
             .br              => try self.airBr(inst),
+            .repeat          => return self.fail("TODO implement `repeat`", .{}),
             .trap            => try self.airTrap(),
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => try self.airRetAddr(inst),
src/arch/riscv64/CodeGen.zig
@@ -1579,6 +1579,7 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
             .bitcast         => try func.airBitCast(inst),
             .block           => try func.airBlock(inst),
             .br              => try func.airBr(inst),
+            .repeat          => return func.fail("TODO implement `repeat`", .{}),
             .trap            => try func.airTrap(),
             .breakpoint      => try func.airBreakpoint(),
             .ret_addr        => try func.airRetAddr(inst),
src/arch/sparc64/CodeGen.zig
@@ -576,6 +576,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .bitcast         => try self.airBitCast(inst),
             .block           => try self.airBlock(inst),
             .br              => try self.airBr(inst),
+            .repeat          => return self.fail("TODO implement `repeat`", .{}),
             .trap            => try self.airTrap(),
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => @panic("TODO try self.airRetAddr(inst)"),
src/arch/wasm/CodeGen.zig
@@ -1903,6 +1903,7 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .trap => func.airTrap(inst),
         .breakpoint => func.airBreakpoint(inst),
         .br => func.airBr(inst),
+        .repeat => return func.fail("TODO implement `repeat`", .{}),
         .int_from_bool => func.airIntFromBool(inst),
         .cond_br => func.airCondBr(inst),
         .intcast => func.airIntcast(inst),
src/arch/x86_64/CodeGen.zig
@@ -2247,6 +2247,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .bitcast         => try self.airBitCast(inst),
             .block           => try self.airBlock(inst),
             .br              => try self.airBr(inst),
+            .repeat          => return self.fail("TODO implement `repeat`", .{}),
             .trap            => try self.airTrap(),
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => try self.airRetAddr(inst),
src/codegen/c.zig
@@ -3137,11 +3137,9 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
 
             .arg      => try airArg(f, inst),
 
-            .trap       => try airTrap(f, f.object.writer()),
             .breakpoint => try airBreakpoint(f.object.writer()),
             .ret_addr   => try airRetAddr(f, inst),
             .frame_addr => try airFrameAddress(f, inst),
-            .unreach    => try airUnreach(f),
             .fence      => try airFence(f, inst),
 
             .ptr_add => try airPtrAddSub(f, inst, '+'),
@@ -3248,21 +3246,13 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .alloc            => try airAlloc(f, inst),
             .ret_ptr          => try airRetPtr(f, inst),
             .assembly         => try airAsm(f, inst),
-            .block            => try airBlock(f, inst),
             .bitcast          => try airBitcast(f, inst),
             .intcast          => try airIntCast(f, inst),
             .trunc            => try airTrunc(f, inst),
             .int_from_bool      => try airIntFromBool(f, inst),
             .load             => try airLoad(f, inst),
-            .ret              => try airRet(f, inst, false),
-            .ret_safe         => try airRet(f, inst, false), // TODO
-            .ret_load         => try airRet(f, inst, true),
             .store            => try airStore(f, inst, false),
             .store_safe       => try airStore(f, inst, true),
-            .loop             => try airLoop(f, inst),
-            .cond_br          => try airCondBr(f, inst),
-            .br               => try airBr(f, inst),
-            .switch_br        => try airSwitchBr(f, inst),
             .struct_field_ptr => try airStructFieldPtr(f, inst),
             .array_to_slice   => try airArrayToSlice(f, inst),
             .cmpxchg_weak     => try airCmpxchg(f, inst, "weak"),
@@ -3296,14 +3286,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .try_ptr_cold => try airTryPtr(f, inst),
 
             .dbg_stmt => try airDbgStmt(f, inst),
-            .dbg_inline_block => try airDbgInlineBlock(f, inst),
             .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => try airDbgVar(f, inst),
 
-            .call              => try airCall(f, inst, .auto),
-            .call_always_tail  => .none,
-            .call_never_tail   => try airCall(f, inst, .never_tail),
-            .call_never_inline => try airCall(f, inst, .never_inline),
-
             .float_from_int,
             .int_from_float,
             .fptrunc,
@@ -3390,6 +3374,39 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .work_group_size,
             .work_group_id,
             => unreachable,
+
+            // Instructions that are known to always be `noreturn` based on their tag.
+            .br         => return airBr(f, inst),
+            .repeat     => return airRepeat(f, inst),
+            .cond_br    => return airCondBr(f, inst),
+            .switch_br  => return airSwitchBr(f, inst),
+            .loop       => return airLoop(f, inst),
+            .ret        => return airRet(f, inst, false),
+            .ret_safe   => return airRet(f, inst, false), // TODO
+            .ret_load   => return airRet(f, inst, true),
+            .trap       => return airTrap(f, f.object.writer()),
+            .unreach    => return airUnreach(f),
+
+            // Instructions which may be `noreturn`.
+            .block => res: {
+                const res = try airBlock(f, inst);
+                if (f.typeOfIndex(inst).isNoReturn(zcu)) return;
+                break :res res;
+            },
+            .dbg_inline_block => res: {
+                const res = try airDbgInlineBlock(f, inst);
+                if (f.typeOfIndex(inst).isNoReturn(zcu)) return;
+                break :res res;
+            },
+            // TODO: calls should be in this category! The AIR we emit for them is a bit weird.
+            // The instruction has type `noreturn`, but there are instructions (and maybe a safety
+            // check) following nonetheless. The `unreachable` or safety check should be emitted by
+            // backends instead.
+            .call              => try airCall(f, inst, .auto),
+            .call_always_tail  => .none,
+            .call_never_tail   => try airCall(f, inst, .never_tail),
+            .call_never_inline => try airCall(f, inst, .never_inline),
+
             // zig fmt: on
         };
         if (result_value == .new_local) {
@@ -3401,6 +3418,7 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             else => result_value,
         });
     }
+    unreachable;
 }
 
 fn airSliceField(f: *Function, inst: Air.Inst.Index, is_ptr: bool, field_name: []const u8) !CValue {
@@ -3718,7 +3736,7 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
-fn airRet(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !CValue {
+fn airRet(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !void {
     const pt = f.object.dg.pt;
     const zcu = pt.zcu;
     const un_op = f.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
@@ -3769,7 +3787,6 @@ fn airRet(f: *Function, inst: Air.Inst.Index, is_ptr: bool) !CValue {
         // Not even allowed to return void in a naked function.
         if (!f.object.dg.is_naked_fn) try writer.writeAll("return;\n");
     }
-    return .none;
 }
 
 fn airIntCast(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4741,7 +4758,7 @@ fn lowerTry(
     return local;
 }
 
-fn airBr(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airBr(f: *Function, inst: Air.Inst.Index) !void {
     const branch = f.air.instructions.items(.data)[@intFromEnum(inst)].br;
     const block = f.blocks.get(branch.block_inst).?;
     const result = block.result;
@@ -4761,7 +4778,12 @@ fn airBr(f: *Function, inst: Air.Inst.Index) !CValue {
     }
 
     try writer.print("goto zig_block_{d};\n", .{block.block_id});
-    return .none;
+}
+
+fn airRepeat(f: *Function, inst: Air.Inst.Index) !void {
+    const repeat = f.air.instructions.items(.data)[@intFromEnum(inst)].repeat;
+    const writer = f.object.writer();
+    try writer.print("goto zig_loop_{d};\n", .{@intFromEnum(repeat.loop_inst)});
 }
 
 fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -4889,12 +4911,10 @@ fn bitcast(f: *Function, dest_ty: Type, operand: CValue, operand_ty: Type) !CVal
     return local;
 }
 
-fn airTrap(f: *Function, writer: anytype) !CValue {
+fn airTrap(f: *Function, writer: anytype) !void {
     // Not even allowed to call trap in a naked function.
-    if (f.object.dg.is_naked_fn) return .none;
-
+    if (f.object.dg.is_naked_fn) return;
     try writer.writeAll("zig_trap();\n");
-    return .none;
 }
 
 fn airBreakpoint(writer: anytype) !CValue {
@@ -4933,28 +4953,27 @@ fn airFence(f: *Function, inst: Air.Inst.Index) !CValue {
     return .none;
 }
 
-fn airUnreach(f: *Function) !CValue {
+fn airUnreach(f: *Function) !void {
     // Not even allowed to call unreachable in a naked function.
-    if (f.object.dg.is_naked_fn) return .none;
-
+    if (f.object.dg.is_naked_fn) return;
     try f.object.writer().writeAll("zig_unreachable();\n");
-    return .none;
 }
 
-fn airLoop(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airLoop(f: *Function, inst: Air.Inst.Index) !void {
     const ty_pl = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
     const loop = f.air.extraData(Air.Block, ty_pl.payload);
     const body: []const Air.Inst.Index = @ptrCast(f.air.extra[loop.end..][0..loop.data.body_len]);
     const writer = f.object.writer();
 
-    try writer.writeAll("for (;;) ");
-    try genBody(f, body); // no need to restore state, we're noreturn
-    try writer.writeByte('\n');
-
-    return .none;
+    // `repeat` instructions matching this loop will branch to
+    // this label. Since we need a label for arbitrary `repeat`
+    // anyway, there's actually no need to use a "real" looping
+    // construct at all!
+    try writer.print("zig_loop_{d}:\n", .{@intFromEnum(inst)});
+    try genBodyInner(f, body); // no need to restore state, we're noreturn
 }
 
-fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airCondBr(f: *Function, inst: Air.Inst.Index) !void {
     const pl_op = f.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
     const cond = try f.resolveInst(pl_op.operand);
     try reap(f, inst, &.{pl_op.operand});
@@ -4983,11 +5002,9 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue {
     // instance) `br` to a block (label).
 
     try genBodyInner(f, else_body);
-
-    return .none;
 }
 
-fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !void {
     const pt = f.object.dg.pt;
     const zcu = pt.zcu;
     const switch_br = f.air.unwrapSwitch(inst);
@@ -5097,7 +5114,6 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
 
     f.object.indent_writer.popIndent();
     try writer.writeAll("}\n");
-    return .none;
 }
 
 fn asmInputNeedsLocal(f: *Function, constraint: []const u8, value: CValue) bool {
src/codegen/llvm.zig
@@ -1720,6 +1720,7 @@ pub const Object = struct {
             .arg_inline_index = 0,
             .func_inst_table = .{},
             .blocks = .{},
+            .loops = .{},
             .sync_scope = if (owner_mod.single_threaded) .singlethread else .system,
             .file = file,
             .scope = subprogram,
@@ -4841,6 +4842,9 @@ pub const FuncGen = struct {
         breaks: *BreakList,
     }),
 
+    /// Maps `loop` instructions to the bb to branch to to repeat the loop.
+    loops: std.AutoHashMapUnmanaged(Air.Inst.Index, Builder.Function.Block.Index),
+
     sync_scope: Builder.SyncScope,
 
     const Fuzz = struct {
@@ -4867,6 +4871,7 @@ pub const FuncGen = struct {
         self.wip.deinit();
         self.func_inst_table.deinit(gpa);
         self.blocks.deinit(gpa);
+        self.loops.deinit(gpa);
     }
 
     fn todo(self: *FuncGen, comptime format: []const u8, args: anytype) Error {
@@ -5058,14 +5063,9 @@ pub const FuncGen = struct {
                 .arg            => try self.airArg(inst),
                 .bitcast        => try self.airBitCast(inst),
                 .int_from_bool  => try self.airIntFromBool(inst),
-                .block          => try self.airBlock(inst),
-                .br             => try self.airBr(inst),
-                .switch_br      => try self.airSwitchBr(inst),
-                .trap           => try self.airTrap(inst),
                 .breakpoint     => try self.airBreakpoint(inst),
                 .ret_addr       => try self.airRetAddr(inst),
                 .frame_addr     => try self.airFrameAddress(inst),
-                .cond_br        => try self.airCondBr(inst),
                 .@"try"         => try self.airTry(body[i..], false),
                 .try_cold       => try self.airTry(body[i..], true),
                 .try_ptr        => try self.airTryPtr(inst, false),
@@ -5076,22 +5076,13 @@ pub const FuncGen = struct {
                 .fpext          => try self.airFpext(inst),
                 .int_from_ptr   => try self.airIntFromPtr(inst),
                 .load           => try self.airLoad(body[i..]),
-                .loop           => try self.airLoop(inst),
                 .not            => try self.airNot(inst),
-                .ret            => try self.airRet(inst, false),
-                .ret_safe       => try self.airRet(inst, true),
-                .ret_load       => try self.airRetLoad(inst),
                 .store          => try self.airStore(inst, false),
                 .store_safe     => try self.airStore(inst, true),
                 .assembly       => try self.airAssembly(inst),
                 .slice_ptr      => try self.airSliceField(inst, 0),
                 .slice_len      => try self.airSliceField(inst, 1),
 
-                .call              => try self.airCall(inst, .auto),
-                .call_always_tail  => try self.airCall(inst, .always_tail),
-                .call_never_tail   => try self.airCall(inst, .never_tail),
-                .call_never_inline => try self.airCall(inst, .never_inline),
-
                 .ptr_slice_ptr_ptr => try self.airPtrSliceFieldPtr(inst, 0),
                 .ptr_slice_len_ptr => try self.airPtrSliceFieldPtr(inst, 1),
 
@@ -5176,9 +5167,7 @@ pub const FuncGen = struct {
 
                 .inferred_alloc, .inferred_alloc_comptime => unreachable,
 
-                .unreach  => try self.airUnreach(inst),
                 .dbg_stmt => try self.airDbgStmt(inst),
-                .dbg_inline_block => try self.airDbgInlineBlock(inst),
                 .dbg_var_ptr => try self.airDbgVarPtr(inst),
                 .dbg_var_val => try self.airDbgVarVal(inst, false),
                 .dbg_arg_inline => try self.airDbgVarVal(inst, true),
@@ -5191,10 +5180,50 @@ pub const FuncGen = struct {
                 .work_item_id => try self.airWorkItemId(inst),
                 .work_group_size => try self.airWorkGroupSize(inst),
                 .work_group_id => try self.airWorkGroupId(inst),
+
+                // Instructions that are known to always be `noreturn` based on their tag.
+                .br        => return self.airBr(inst),
+                .repeat    => return self.airRepeat(inst),
+                .cond_br   => return self.airCondBr(inst),
+                .switch_br => return self.airSwitchBr(inst),
+                .loop      => return self.airLoop(inst),
+                .ret       => return self.airRet(inst, false),
+                .ret_safe  => return self.airRet(inst, true),
+                .ret_load  => return self.airRetLoad(inst),
+                .trap      => return self.airTrap(inst),
+                .unreach   => return self.airUnreach(inst),
+
+                // Instructions which may be `noreturn`.
+                .block => res: {
+                    const res = try self.airBlock(inst);
+                    if (self.typeOfIndex(inst).isNoReturn(zcu)) return;
+                    break :res res;
+                },
+                .dbg_inline_block => res: {
+                    const res = try self.airDbgInlineBlock(inst);
+                    if (self.typeOfIndex(inst).isNoReturn(zcu)) return;
+                    break :res res;
+                },
+                .call, .call_always_tail, .call_never_tail, .call_never_inline => |tag| res: {
+                    const res = try self.airCall(inst, switch (tag) {
+                        .call              => .auto,
+                        .call_always_tail  => .always_tail,
+                        .call_never_tail   => .never_tail,
+                        .call_never_inline => .never_inline,
+                        else               => unreachable,
+                    });
+                    // TODO: the AIR we emit for calls is a bit weird - the instruction has
+                    // type `noreturn`, but there are instructions (and maybe a safety check) following
+                    // nonetheless. The `unreachable` or safety check should be emitted by backends instead.
+                    //if (self.typeOfIndex(inst).isNoReturn(mod)) return;
+                    break :res res;
+                },
+
                 // zig fmt: on
             };
             if (val != .none) try self.func_inst_table.putNoClobber(self.gpa, inst.toRef(), val);
         }
+        unreachable;
     }
 
     fn genBodyDebugScope(
@@ -5640,7 +5669,7 @@ pub const FuncGen = struct {
         _ = try fg.wip.@"unreachable"();
     }
 
-    fn airRet(self: *FuncGen, inst: Air.Inst.Index, safety: bool) !Builder.Value {
+    fn airRet(self: *FuncGen, inst: Air.Inst.Index, safety: bool) !void {
         const o = self.ng.object;
         const pt = o.pt;
         const zcu = pt.zcu;
@@ -5675,7 +5704,7 @@ pub const FuncGen = struct {
                     try self.valgrindMarkUndef(self.ret_ptr, len);
                 }
                 _ = try self.wip.retVoid();
-                return .none;
+                return;
             }
 
             const unwrapped_operand = operand.unwrap();
@@ -5684,12 +5713,12 @@ pub const FuncGen = struct {
             // Return value was stored previously
             if (unwrapped_operand == .instruction and unwrapped_ret == .instruction and unwrapped_operand.instruction == unwrapped_ret.instruction) {
                 _ = try self.wip.retVoid();
-                return .none;
+                return;
             }
 
             try self.store(self.ret_ptr, ptr_ty, operand, .none);
             _ = try self.wip.retVoid();
-            return .none;
+            return;
         }
         const fn_info = zcu.typeToFunc(Type.fromInterned(ip.getNav(self.ng.nav_index).typeOf(ip))).?;
         if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
@@ -5701,7 +5730,7 @@ pub const FuncGen = struct {
             } else {
                 _ = try self.wip.retVoid();
             }
-            return .none;
+            return;
         }
 
         const abi_ret_ty = try lowerFnRetTy(o, fn_info);
@@ -5725,29 +5754,29 @@ pub const FuncGen = struct {
                 try self.valgrindMarkUndef(rp, len);
             }
             _ = try self.wip.ret(try self.wip.load(.normal, abi_ret_ty, rp, alignment, ""));
-            return .none;
+            return;
         }
 
         if (isByRef(ret_ty, zcu)) {
             // operand is a pointer however self.ret_ptr is null so that means
             // we need to return a value.
             _ = try self.wip.ret(try self.wip.load(.normal, abi_ret_ty, operand, alignment, ""));
-            return .none;
+            return;
         }
 
         const llvm_ret_ty = operand.typeOfWip(&self.wip);
         if (abi_ret_ty == llvm_ret_ty) {
             _ = try self.wip.ret(operand);
-            return .none;
+            return;
         }
 
         const rp = try self.buildAlloca(llvm_ret_ty, alignment);
         _ = try self.wip.store(.normal, operand, rp, alignment);
         _ = try self.wip.ret(try self.wip.load(.normal, abi_ret_ty, rp, alignment, ""));
-        return .none;
+        return;
     }
 
-    fn airRetLoad(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+    fn airRetLoad(self: *FuncGen, inst: Air.Inst.Index) !void {
         const o = self.ng.object;
         const pt = o.pt;
         const zcu = pt.zcu;
@@ -5765,17 +5794,17 @@ pub const FuncGen = struct {
             } else {
                 _ = try self.wip.retVoid();
             }
-            return .none;
+            return;
         }
         if (self.ret_ptr != .none) {
             _ = try self.wip.retVoid();
-            return .none;
+            return;
         }
         const ptr = try self.resolveInst(un_op);
         const abi_ret_ty = try lowerFnRetTy(o, fn_info);
         const alignment = ret_ty.abiAlignment(zcu).toLlvm();
         _ = try self.wip.ret(try self.wip.load(.normal, abi_ret_ty, ptr, alignment, ""));
-        return .none;
+        return;
     }
 
     fn airCVaArg(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
@@ -6039,7 +6068,7 @@ pub const FuncGen = struct {
         }
     }
 
-    fn airBr(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+    fn airBr(self: *FuncGen, inst: Air.Inst.Index) !void {
         const o = self.ng.object;
         const zcu = o.pt.zcu;
         const branch = self.air.instructions.items(.data)[@intFromEnum(inst)].br;
@@ -6055,10 +6084,16 @@ pub const FuncGen = struct {
             try block.breaks.list.append(self.gpa, .{ .bb = self.wip.cursor.block, .val = val });
         } else block.breaks.len += 1;
         _ = try self.wip.br(block.parent_bb);
-        return .none;
     }
 
-    fn airCondBr(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+    fn airRepeat(self: *FuncGen, inst: Air.Inst.Index) !void {
+        const repeat = self.air.instructions.items(.data)[@intFromEnum(inst)].repeat;
+        const loop_bb = self.loops.get(repeat.loop_inst).?;
+        loop_bb.ptr(&self.wip).incoming += 1;
+        _ = try self.wip.br(loop_bb);
+    }
+
+    fn airCondBr(self: *FuncGen, inst: Air.Inst.Index) !void {
         const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
         const cond = try self.resolveInst(pl_op.operand);
         const extra = self.air.extraData(Air.CondBr, pl_op.payload);
@@ -6117,7 +6152,6 @@ pub const FuncGen = struct {
         try self.genBodyDebugScope(null, else_body, extra.data.branch_hints.else_cov);
 
         // No need to reset the insert cursor since this instruction is noreturn.
-        return .none;
     }
 
     fn airTry(self: *FuncGen, body_tail: []const Air.Inst.Index, err_cold: bool) !Builder.Value {
@@ -6223,7 +6257,7 @@ pub const FuncGen = struct {
         return fg.wip.extractValue(err_union, &.{offset}, "");
     }
 
-    fn airSwitchBr(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+    fn airSwitchBr(self: *FuncGen, inst: Air.Inst.Index) !void {
         const o = self.ng.object;
 
         const switch_br = self.air.unwrapSwitch(inst);
@@ -6371,31 +6405,20 @@ pub const FuncGen = struct {
         }
 
         // No need to reset the insert cursor since this instruction is noreturn.
-        return .none;
     }
 
-    fn airLoop(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
-        const o = self.ng.object;
-        const zcu = o.pt.zcu;
+    fn airLoop(self: *FuncGen, inst: Air.Inst.Index) !void {
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const loop = self.air.extraData(Air.Block, ty_pl.payload);
         const body: []const Air.Inst.Index = @ptrCast(self.air.extra[loop.end..][0..loop.data.body_len]);
-        const loop_block = try self.wip.block(2, "Loop");
+        const loop_block = try self.wip.block(1, "Loop"); // `airRepeat` will increment incoming each time
         _ = try self.wip.br(loop_block);
 
+        try self.loops.putNoClobber(self.gpa, inst, loop_block);
+        defer assert(self.loops.remove(inst));
+
         self.wip.cursor = .{ .block = loop_block };
         try self.genBodyDebugScope(null, body, .none);
-
-        // TODO instead of this logic, change AIR to have the property that
-        // every block is guaranteed to end with a noreturn instruction.
-        // Then we can simply rely on the fact that a repeat or break instruction
-        // would have been emitted already. Also the main loop in genBody can
-        // be while(true) instead of for(body), which will eliminate 1 branch on
-        // a hot path.
-        if (body.len == 0 or !self.typeOfIndex(body[body.len - 1]).isNoReturn(zcu)) {
-            _ = try self.wip.br(loop_block);
-        }
-        return .none;
     }
 
     fn airArrayToSlice(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
@@ -6890,10 +6913,9 @@ pub const FuncGen = struct {
         return self.wip.not(operand, "");
     }
 
-    fn airUnreach(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+    fn airUnreach(self: *FuncGen, inst: Air.Inst.Index) !void {
         _ = inst;
         _ = try self.wip.@"unreachable"();
-        return .none;
     }
 
     fn airDbgStmt(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
@@ -9296,11 +9318,10 @@ pub const FuncGen = struct {
         return fg.load(ptr, ptr_ty);
     }
 
-    fn airTrap(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+    fn airTrap(self: *FuncGen, inst: Air.Inst.Index) !void {
         _ = inst;
         _ = try self.wip.callIntrinsic(.normal, .none, .trap, &.{}, &.{}, "");
         _ = try self.wip.@"unreachable"();
-        return .none;
     }
 
     fn airBreakpoint(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
src/codegen/spirv.zig
@@ -3340,6 +3340,7 @@ const NavGen = struct {
             .store, .store_safe => return self.airStore(inst),
 
             .br             => return self.airBr(inst),
+            .repeat         => return self.fail("TODO implement `repeat`", .{}),
             .breakpoint     => return,
             .cond_br        => return self.airCondBr(inst),
             .loop           => return self.airLoop(inst),
src/Liveness/Verify.zig
@@ -1,28 +1,38 @@
-//! Verifies that liveness information is valid.
+//! Verifies that Liveness information is valid.
 
 gpa: std.mem.Allocator,
 air: Air,
 liveness: Liveness,
 live: LiveMap = .{},
 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, LiveMap) = .{},
+loops: std.AutoHashMapUnmanaged(Air.Inst.Index, LiveMap) = .{},
 intern_pool: *const InternPool,
 
 pub const Error = error{ LivenessInvalid, OutOfMemory };
 
 pub fn deinit(self: *Verify) void {
     self.live.deinit(self.gpa);
-    var block_it = self.blocks.valueIterator();
-    while (block_it.next()) |block| block.deinit(self.gpa);
-    self.blocks.deinit(self.gpa);
+    {
+        var it = self.blocks.valueIterator();
+        while (it.next()) |block| block.deinit(self.gpa);
+        self.blocks.deinit(self.gpa);
+    }
+    {
+        var it = self.loops.valueIterator();
+        while (it.next()) |block| block.deinit(self.gpa);
+        self.loops.deinit(self.gpa);
+    }
     self.* = undefined;
 }
 
 pub fn verify(self: *Verify) Error!void {
     self.live.clearRetainingCapacity();
     self.blocks.clearRetainingCapacity();
+    self.loops.clearRetainingCapacity();
     try self.verifyBody(self.air.getMainBody());
     // We don't care about `self.live` now, because the loop body was noreturn - everything being dead was checked on `ret` etc
     assert(self.blocks.count() == 0);
+    assert(self.loops.count() == 0);
 }
 
 const LiveMap = std.AutoHashMapUnmanaged(Air.Inst.Index, void);
@@ -430,6 +440,13 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
                 }
                 try self.verifyInst(inst);
             },
+            .repeat => {
+                const repeat = data[@intFromEnum(inst)].repeat;
+                const expected_live = self.loops.get(repeat.loop_inst) orelse
+                    return invalid("%{}: loop %{} not in scope", .{ @intFromEnum(inst), @intFromEnum(repeat.loop_inst) });
+
+                try self.verifyMatchingLiveness(repeat.loop_inst, expected_live);
+            },
             .block, .dbg_inline_block => |tag| {
                 const ty_pl = data[@intFromEnum(inst)].ty_pl;
                 const block_ty = ty_pl.ty.toType();
@@ -475,14 +492,17 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
                 const extra = self.air.extraData(Air.Block, ty_pl.payload);
                 const loop_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]);
 
-                var live = try self.live.clone(self.gpa);
-                defer live.deinit(self.gpa);
+                // The same stuff should be alive after the loop as before it.
+                const gop = try self.loops.getOrPut(self.gpa, inst);
+                defer {
+                    var live = self.loops.fetchRemove(inst).?;
+                    live.value.deinit(self.gpa);
+                }
+                if (gop.found_existing) return invalid("%{}: loop already exists", .{@intFromEnum(inst)});
+                gop.value_ptr.* = try self.live.clone(self.gpa);
 
                 try self.verifyBody(loop_body);
 
-                // The same stuff should be alive after the loop as before it
-                try self.verifyMatchingLiveness(inst, live);
-
                 try self.verifyInstOperands(inst, .{ .none, .none, .none });
             },
             .cond_br => {
src/Air.zig
@@ -274,13 +274,15 @@ pub const Inst = struct {
         /// is to encounter a `br` that targets this `block`.  If the `block` type is `noreturn`,
         /// then there do not exist any `br` instructions targeting this `block`.
         block,
-        /// A labeled block of code that loops forever. At the end of the body it is implied
-        /// to repeat; no explicit "repeat" instruction terminates loop bodies.
+        /// A labeled block of code that loops forever. The body must be `noreturn`: loops
+        /// occur through an explicit `repeat` instruction pointing back to this one.
         /// Result type is always `noreturn`; no instructions in a block follow this one.
-        /// The body never ends with a `noreturn` instruction, so the "repeat" operation
-        /// is always statically reachable.
+        /// There is always at least one `repeat` instruction referencing the loop.
         /// Uses the `ty_pl` field. Payload is `Block`.
         loop,
+        /// Sends control flow back to the beginning of a parent `loop` body.
+        /// Uses the `repeat` field.
+        repeat,
         /// Return from a block with a result.
         /// Result type is always noreturn; no instructions in a block follow this one.
         /// Uses the `br` field.
@@ -1045,6 +1047,9 @@ pub const Inst = struct {
             block_inst: Index,
             operand: Ref,
         },
+        repeat: struct {
+            loop_inst: Index,
+        },
         pl_op: struct {
             operand: Ref,
             payload: u32,
@@ -1445,6 +1450,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
         => return datas[@intFromEnum(inst)].ty_op.ty.toType(),
 
         .loop,
+        .repeat,
         .br,
         .cond_br,
         .switch_br,
@@ -1602,6 +1608,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
         .arg,
         .block,
         .loop,
+        .repeat,
         .br,
         .trap,
         .breakpoint,
src/Liveness.zig
@@ -70,7 +70,8 @@ pub const Block = struct {
 const LivenessPass = enum {
     /// In this pass, we perform some basic analysis of loops to gain information the main pass
     /// needs. In particular, for every `loop`, we track the following information:
-    /// * Every block which the loop body contains a `br` to.
+    /// * Every outer block which the loop body contains a `br` to.
+    /// * Every outer loop which the loop body contains a `repeat` to.
     /// * Every operand referenced within the loop body but created outside the loop.
     /// This gives the main analysis pass enough information to determine the full set of
     /// instructions which need to be alive when a loop repeats. This data is TEMPORARILY stored in
@@ -89,7 +90,8 @@ fn LivenessPassData(comptime pass: LivenessPass) type {
     return switch (pass) {
         .loop_analysis => struct {
             /// The set of blocks which are exited with a `br` instruction at some point within this
-            /// body and which we are currently within.
+            /// body and which we are currently within. Also includes `loop`s which are the target
+            /// of a `repeat` instruction.
             breaks: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{},
 
             /// The set of operands for which we have seen at least one usage but not their birth.
@@ -102,7 +104,7 @@ fn LivenessPassData(comptime pass: LivenessPass) type {
         },
 
         .main_analysis => struct {
-            /// Every `block` currently under analysis.
+            /// Every `block` and `loop` currently under analysis.
             block_scopes: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockScope) = .{},
 
             /// The set of instructions currently alive in the current control
@@ -114,7 +116,8 @@ fn LivenessPassData(comptime pass: LivenessPass) type {
             old_extra: std.ArrayListUnmanaged(u32) = .{},
 
             const BlockScope = struct {
-                /// The set of instructions which are alive upon a `br` to this block.
+                /// If this is a `block`, these instructions are alive upon a `br` to this block.
+                /// If this is a `loop`, these instructions are alive upon a `repeat` to this block.
                 live_set: std.AutoHashMapUnmanaged(Air.Inst.Index, void),
             };
 
@@ -326,6 +329,7 @@ pub fn categorizeOperand(
         .ret_ptr,
         .trap,
         .breakpoint,
+        .repeat,
         .dbg_stmt,
         .unreach,
         .ret_addr,
@@ -1201,6 +1205,7 @@ fn analyzeInst(
         },
 
         .br => return analyzeInstBr(a, pass, data, inst),
+        .repeat => return analyzeInstRepeat(a, pass, data, inst),
 
         .assembly => {
             const extra = a.air.extraData(Air.Asm, inst_datas[@intFromEnum(inst)].ty_pl.payload);
@@ -1380,6 +1385,33 @@ fn analyzeInstBr(
     return analyzeOperands(a, pass, data, inst, .{ br.operand, .none, .none });
 }
 
+fn analyzeInstRepeat(
+    a: *Analysis,
+    comptime pass: LivenessPass,
+    data: *LivenessPassData(pass),
+    inst: Air.Inst.Index,
+) !void {
+    const inst_datas = a.air.instructions.items(.data);
+    const repeat = inst_datas[@intFromEnum(inst)].repeat;
+    const gpa = a.gpa;
+
+    switch (pass) {
+        .loop_analysis => {
+            try data.breaks.put(gpa, repeat.loop_inst, {});
+        },
+
+        .main_analysis => {
+            const block_scope = data.block_scopes.get(repeat.loop_inst).?; // we should always be repeating an enclosing loop
+
+            const new_live_set = try block_scope.live_set.clone(gpa);
+            data.live_set.deinit(gpa);
+            data.live_set = new_live_set;
+        },
+    }
+
+    return analyzeOperands(a, pass, data, inst, .{ .none, .none, .none });
+}
+
 fn analyzeInstBlock(
     a: *Analysis,
     comptime pass: LivenessPass,
@@ -1402,8 +1434,10 @@ fn analyzeInstBlock(
 
         .main_analysis => {
             log.debug("[{}] %{}: block live set is {}", .{ pass, inst, fmtInstSet(&data.live_set) });
+            // We can move the live set because the body should have a noreturn
+            // instruction which overrides the set.
             try data.block_scopes.put(gpa, inst, .{
-                .live_set = try data.live_set.clone(gpa),
+                .live_set = data.live_set.move(),
             });
             defer {
                 log.debug("[{}] %{}: popped block scope", .{ pass, inst });
@@ -1471,10 +1505,15 @@ fn analyzeInstLoop(
 
             try analyzeBody(a, pass, data, body);
 
+            // `loop`s are guaranteed to have at least one matching `repeat`.
+            // However, we no longer care about repeats of this loop itself.
+            assert(data.breaks.remove(inst));
+
+            const extra_index: u32 = @intCast(a.extra.items.len);
+
             const num_breaks = data.breaks.count();
             try a.extra.ensureUnusedCapacity(gpa, 1 + num_breaks);
 
-            const extra_index = @as(u32, @intCast(a.extra.items.len));
             a.extra.appendAssumeCapacity(num_breaks);
 
             var it = data.breaks.keyIterator();
@@ -1543,6 +1582,17 @@ fn analyzeInstLoop(
                 }
             }
 
+            // Now, `data.live_set` is the operands which must be alive when the loop repeats.
+            // Move them into a block scope for corresponding `repeat` instructions to notice.
+            log.debug("[{}] %{}: loop live set is {}", .{ pass, inst, fmtInstSet(&data.live_set) });
+            try data.block_scopes.putNoClobber(gpa, inst, .{
+                .live_set = data.live_set.move(),
+            });
+            defer {
+                log.debug("[{}] %{}: popped loop block scop", .{ pass, inst });
+                var scope = data.block_scopes.fetchRemove(inst).?.value;
+                scope.live_set.deinit(gpa);
+            }
             try analyzeBody(a, pass, data, body);
         },
     }
src/print_air.zig
@@ -296,6 +296,7 @@ const Writer = struct {
             .aggregate_init => try w.writeAggregateInit(s, inst),
             .union_init => try w.writeUnionInit(s, inst),
             .br => try w.writeBr(s, inst),
+            .repeat => try w.writeRepeat(s, inst),
             .cond_br => try w.writeCondBr(s, inst),
             .@"try", .try_cold => try w.writeTry(s, inst),
             .try_ptr, .try_ptr_cold => try w.writeTryPtr(s, inst),
@@ -708,6 +709,11 @@ const Writer = struct {
         try w.writeOperand(s, inst, 0, br.operand);
     }
 
+    fn writeRepeat(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const repeat = w.air.instructions.items(.data)[@intFromEnum(inst)].repeat;
+        try w.writeInstIndex(s, repeat.loop_inst, false);
+    }
+
     fn writeTry(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
         const pl_op = w.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
         const extra = w.air.extraData(Air.Try, pl_op.payload);
src/Sema.zig
@@ -1559,6 +1559,8 @@ fn analyzeBodyInner(
                     // We are definitely called by `zirLoop`, which will treat the
                     // fact that this body does not terminate `noreturn` as an
                     // implicit repeat.
+                    // TODO: since AIR has `repeat` now, we could change ZIR to generate
+                    // more optimal code utilizing `repeat` instructions across blocks!
                     break;
                 }
             },
@@ -5811,17 +5813,30 @@ fn zirLoop(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError
     // Use `analyzeBodyInner` directly to push any comptime control flow up the stack.
     try sema.analyzeBodyInner(&loop_block, body);
 
+    // TODO: since AIR has `repeat` now, we could change ZIR to generate
+    // more optimal code utilizing `repeat` instructions across blocks!
+    // For now, if the generated loop body does not terminate `noreturn`,
+    // then `analyzeBodyInner` is signalling that it ended with `repeat`.
+
     const loop_block_len = loop_block.instructions.items.len;
     if (loop_block_len > 0 and sema.typeOf(loop_block.instructions.items[loop_block_len - 1].toRef()).isNoReturn(zcu)) {
         // If the loop ended with a noreturn terminator, then there is no way for it to loop,
         // so we can just use the block instead.
         try child_block.instructions.appendSlice(gpa, loop_block.instructions.items);
     } else {
+        _ = try loop_block.addInst(.{
+            .tag = .repeat,
+            .data = .{ .repeat = .{
+                .loop_inst = loop_inst,
+            } },
+        });
+        // Note that `loop_block_len` is now off by one.
+
         try child_block.instructions.append(gpa, loop_inst);
 
-        try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Block).@"struct".fields.len + loop_block_len);
+        try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Block).@"struct".fields.len + loop_block_len + 1);
         sema.air_instructions.items(.data)[@intFromEnum(loop_inst)].ty_pl.payload = sema.addExtraAssumeCapacity(
-            Air.Block{ .body_len = @intCast(loop_block_len) },
+            Air.Block{ .body_len = @intCast(loop_block_len + 1) },
         );
         sema.air_extra.appendSliceAssumeCapacity(@ptrCast(loop_block.instructions.items));
     }