Commit 70894d5c2f

Andrew Kelley <andrew@ziglang.org>
2021-12-27 23:26:56
AstGen: fix loop result locations
The main problem was that the loop body was treated as an expression that was one of the peer result values of a loop, when in reality the loop body is noreturn and only the `break` operands are the result values of loops. This was solved by introducing an override that prevents rvalue() from emitting a store to result location instruction for loop bodies. An orthogonal change also included in this commit is switching `elem_val` index expressions to using `coerced_ty` and doing the coercion to `usize` inside `Sema`, resulting in smaller ZIR (since the cast becomes implied). I also changed the break operand expression to use `reachableExpr`, introducing a new compile error for double break. This makes a few more behavior tests pass for `while` and `for` loops.
1 parent 2c23699
src/AstGen.zig
@@ -1709,23 +1709,42 @@ fn breakExpr(parent_gz: *GenZir, parent_scope: *Scope, node: Ast.Node.Index) Inn
                     return Zir.Inst.Ref.unreachable_value;
                 }
                 block_gz.break_count += 1;
-                const operand = try expr(parent_gz, parent_scope, block_gz.break_result_loc, rhs);
-                // if list grew as much as rvalue_rl_count, then a break inside operand already saved the store_to_block_ptr
-                const have_store_to_block = block_gz.rvalue_rl_count > block_gz.labeled_store_to_block_ptr_list.items.len;
-
-                const br = try parent_gz.addBreak(.@"break", block_inst, operand);
-
-                if (block_gz.break_result_loc == .block_ptr) {
-                    try block_gz.labeled_breaks.append(astgen.gpa, br);
-
-                    if (have_store_to_block) {
-                        const zir_tags = parent_gz.astgen.instructions.items(.tag);
-                        const zir_datas = parent_gz.astgen.instructions.items(.data);
-                        const store_inst = @intCast(u32, zir_tags.len - 2);
-                        assert(zir_tags[store_inst] == .store_to_block_ptr);
-                        assert(zir_datas[store_inst].bin.lhs == block_gz.rl_ptr);
-                        try block_gz.labeled_store_to_block_ptr_list.append(astgen.gpa, store_inst);
-                    }
+
+                // The loop scope has a mechanism to prevent rvalue() from emitting a
+                // store to the result location for the loop body (since it is continues
+                // rather than returning a result from the loop) but here is a `break`
+                // which needs to override this behavior.
+                const prev_rvalue_noresult = parent_gz.rvalue_noresult;
+                parent_gz.rvalue_noresult = .none;
+                const operand = try reachableExpr(parent_gz, parent_scope, block_gz.break_result_loc, rhs, node);
+                parent_gz.rvalue_noresult = prev_rvalue_noresult;
+
+                switch (block_gz.break_result_loc) {
+                    .block_ptr => {
+                        const br = try parent_gz.addBreak(.@"break", block_inst, operand);
+                        try block_gz.labeled_breaks.append(astgen.gpa, br);
+
+                        // if list grew as much as rvalue_rl_count, then a break
+                        // inside operand already saved the store_to_block_ptr
+                        const have_store_to_block = block_gz.rvalue_rl_count >
+                            block_gz.labeled_store_to_block_ptr_list.items.len;
+                        if (have_store_to_block) {
+                            const zir_tags = parent_gz.astgen.instructions.items(.tag);
+                            const zir_datas = parent_gz.astgen.instructions.items(.data);
+                            const store_inst = @intCast(u32, zir_tags.len - 2);
+                            assert(zir_tags[store_inst] == .store_to_block_ptr);
+                            assert(zir_datas[store_inst].bin.lhs == block_gz.rl_ptr);
+                            try block_gz.labeled_store_to_block_ptr_list.append(astgen.gpa, store_inst);
+                        }
+                    },
+                    .ptr => {
+                        // In this case we don't have any mechanism to intercept it;
+                        // we assume the result location is written, and we break with void.
+                        _ = try parent_gz.addBreak(.@"break", block_inst, .void_value);
+                    },
+                    else => {
+                        _ = try parent_gz.addBreak(.@"break", block_inst, operand);
+                    },
                 }
                 return Zir.Inst.Ref.unreachable_value;
             },
@@ -4776,7 +4795,7 @@ fn arrayAccess(
         else => return rvalue(gz, rl, try gz.addBin(
             .elem_val,
             try expr(gz, scope, .none, node_datas[node].lhs),
-            try expr(gz, scope, .{ .ty = .usize_type }, node_datas[node].rhs),
+            try expr(gz, scope, .{ .coerced_ty = .usize_type }, node_datas[node].rhs),
         ), node),
     }
 }
@@ -5183,6 +5202,7 @@ fn whileExpr(
     // make scope now but don't stack on parent_gz until loop_scope
     // gets unstacked after cont_expr is emitted and added below
     var then_scope = parent_gz.makeSubBlock(&continue_scope.base);
+    then_scope.markAsLoopBody(loop_scope);
     then_scope.instructions_top = GenZir.unstacked_top;
     defer then_scope.unstack();
 
@@ -5267,9 +5287,6 @@ fn whileExpr(
     then_scope.instructions_top = then_scope.instructions.items.len;
     if (payload_inst != 0) try then_scope.instructions.append(astgen.gpa, payload_inst);
     const then_result = try expr(&then_scope, then_sub_scope, loop_scope.break_result_loc, while_full.ast.then_expr);
-    if (!then_scope.endsWithNoReturn()) {
-        loop_scope.break_count += 1;
-    }
     try checkUsed(parent_gz, &then_scope.base, then_sub_scope);
 
     var else_scope = parent_gz.makeSubBlock(&continue_scope.base);
@@ -5426,6 +5443,7 @@ fn forExpr(
     }
 
     var then_scope = parent_gz.makeSubBlock(&cond_scope.base);
+    then_scope.markAsLoopBody(loop_scope);
     defer then_scope.unstack();
 
     var payload_val_scope: Scope.LocalVal = undefined;
@@ -5482,9 +5500,6 @@ fn forExpr(
     };
 
     const then_result = try expr(&then_scope, then_sub_scope, loop_scope.break_result_loc, for_full.ast.then_expr);
-    if (!then_scope.endsWithNoReturn()) {
-        loop_scope.break_count += 1;
-    }
     try checkUsed(parent_gz, &then_scope.base, then_sub_scope);
 
     var else_scope = parent_gz.makeSubBlock(&cond_scope.base);
@@ -8369,19 +8384,25 @@ fn rvalue(
             }
         },
         .ptr => |ptr_inst| {
-            _ = try gz.addPlNode(.store_node, src_node, Zir.Inst.Bin{
-                .lhs = ptr_inst,
-                .rhs = result,
-            });
+            if (gz.rvalue_noresult != ptr_inst) {
+                _ = try gz.addPlNode(.store_node, src_node, Zir.Inst.Bin{
+                    .lhs = ptr_inst,
+                    .rhs = result,
+                });
+            }
             return result;
         },
         .inferred_ptr => |alloc| {
-            _ = try gz.addBin(.store_to_inferred_ptr, alloc, result);
+            if (gz.rvalue_noresult != alloc) {
+                _ = try gz.addBin(.store_to_inferred_ptr, alloc, result);
+            }
             return result;
         },
         .block_ptr => |block_scope| {
-            block_scope.rvalue_rl_count += 1;
-            _ = try gz.addBin(.store_to_block_ptr, block_scope.rl_ptr, result);
+            if (gz.rvalue_noresult != block_scope.rl_ptr) {
+                block_scope.rvalue_rl_count += 1;
+                _ = try gz.addBin(.store_to_block_ptr, block_scope.rl_ptr, result);
+            }
             return result;
         },
     }
@@ -8890,6 +8911,7 @@ const GenZir = struct {
     rl_ptr: Zir.Inst.Ref = .none,
     /// When a block has a type result location, here it is.
     rl_ty_inst: Zir.Inst.Ref = .none,
+    rvalue_noresult: Zir.Inst.Ref = .none,
     /// Keeps track of how many branches of a block did not actually
     /// consume the result location. astgen uses this to figure out
     /// whether to rely on break instructions or writing to the result
@@ -10096,6 +10118,17 @@ const GenZir = struct {
             }
         }
     }
+
+    /// Control flow does not fall through the "then" block of a loop; it continues
+    /// back to the while condition. This prevents `rvalue` from
+    /// adding an invalid store to the result location of `then_scope`.
+    fn markAsLoopBody(gz: *GenZir, loop_scope: GenZir) void {
+        gz.rvalue_noresult = switch (loop_scope.break_result_loc) {
+            .ptr, .inferred_ptr => |ptr| ptr,
+            .block_ptr => |block| block.rl_ptr,
+            else => .none,
+        };
+    }
 };
 
 /// This can only be for short-lived references; the memory becomes invalidated
src/Sema.zig
@@ -12528,7 +12528,7 @@ fn elemVal(
     block: *Block,
     src: LazySrcLoc,
     array: Air.Inst.Ref,
-    elem_index: Air.Inst.Ref,
+    elem_index_uncasted: Air.Inst.Ref,
     elem_index_src: LazySrcLoc,
 ) CompileError!Air.Inst.Ref {
     const array_src = src; // TODO better source location
@@ -12538,6 +12538,8 @@ fn elemVal(
         return sema.fail(block, src, "array access of non-indexable type '{}'", .{array_ty});
     }
 
+    const elem_index = try sema.coerce(block, Type.usize, elem_index_uncasted, elem_index_src);
+
     switch (array_ty.zigTypeTag()) {
         .Pointer => switch (array_ty.ptrSize()) {
             .Slice => {
test/behavior/bugs/9967.zig
@@ -1,8 +0,0 @@
-const std = @import("std");
-
-test "nested breaks to same labeled block" {
-    const a = blk: {
-        break :blk break :blk @as(u32, 1);
-    };
-    try std.testing.expectEqual(a, 1);
-}
test/behavior/for.zig
@@ -116,3 +116,20 @@ test "for with null and T peer types and inferred result location type" {
     try S.doTheTest(&[_]u8{ 1, 2 });
     comptime try S.doTheTest(&[_]u8{ 1, 2 });
 }
+
+test "2 break statements and an else" {
+    const S = struct {
+        fn entry(t: bool, f: bool) !void {
+            var buf: [10]u8 = undefined;
+            var ok = false;
+            ok = for (buf) |item| {
+                _ = item;
+                if (f) break false;
+                if (t) break true;
+            } else false;
+            try expect(ok);
+        }
+    };
+    try S.entry(true, false);
+    comptime try S.entry(true, false);
+}
test/behavior/for_stage1.zig
@@ -26,23 +26,6 @@ fn mangleString(s: []u8) void {
     }
 }
 
-test "2 break statements and an else" {
-    const S = struct {
-        fn entry(t: bool, f: bool) !void {
-            var buf: [10]u8 = undefined;
-            var ok = false;
-            ok = for (buf) |item| {
-                _ = item;
-                if (f) break false;
-                if (t) break true;
-            } else false;
-            try expect(ok);
-        }
-    };
-    try S.entry(true, false);
-    comptime try S.entry(true, false);
-}
-
 test "for copies its payload" {
     const S = struct {
         fn doTheTest() !void {
test/behavior/while.zig
@@ -236,3 +236,33 @@ test "while on error union with else result follow break prong" {
     } else |_| @as(i32, 2);
     try expect(result == 10);
 }
+
+test "while bool 2 break statements and an else" {
+    const S = struct {
+        fn entry(t: bool, f: bool) !void {
+            var ok = false;
+            ok = while (t) {
+                if (f) break false;
+                if (t) break true;
+            } else false;
+            try expect(ok);
+        }
+    };
+    try S.entry(true, false);
+    comptime try S.entry(true, false);
+}
+
+test "while optional 2 break statements and an else" {
+    const S = struct {
+        fn entry(opt_t: ?bool, f: bool) !void {
+            var ok = false;
+            ok = while (opt_t) |t| {
+                if (f) break false;
+                if (t) break true;
+            } else false;
+            try expect(ok);
+        }
+    };
+    try S.entry(true, false);
+    comptime try S.entry(true, false);
+}
test/behavior/while_stage1.zig
@@ -1,36 +1,6 @@
 const std = @import("std");
 const expect = std.testing.expect;
 
-test "while bool 2 break statements and an else" {
-    const S = struct {
-        fn entry(t: bool, f: bool) !void {
-            var ok = false;
-            ok = while (t) {
-                if (f) break false;
-                if (t) break true;
-            } else false;
-            try expect(ok);
-        }
-    };
-    try S.entry(true, false);
-    comptime try S.entry(true, false);
-}
-
-test "while optional 2 break statements and an else" {
-    const S = struct {
-        fn entry(opt_t: ?bool, f: bool) !void {
-            var ok = false;
-            ok = while (opt_t) |t| {
-                if (f) break false;
-                if (t) break true;
-            } else false;
-            try expect(ok);
-        }
-    };
-    try S.entry(true, false);
-    comptime try S.entry(true, false);
-}
-
 test "while error 2 break statements and an else" {
     const S = struct {
         fn entry(opt_t: anyerror!bool, f: bool) !void {
test/behavior.zig
@@ -145,7 +145,6 @@ test {
                 _ = @import("behavior/bugs/7027.zig");
                 _ = @import("behavior/bugs/7047.zig");
                 _ = @import("behavior/bugs/9584.zig");
-                _ = @import("behavior/bugs/9967.zig");
                 _ = @import("behavior/bugs/10147.zig");
                 _ = @import("behavior/byteswap.zig");
                 _ = @import("behavior/call_stage1.zig");
test/compile_errors.zig
@@ -5042,6 +5042,17 @@ pub fn addCases(ctx: *TestContext) !void {
         "tmp.zig:2:12: note: control flow is diverted here",
     });
 
+    ctx.objErrStage1("unreachable code - double break",
+        \\export fn a() void {
+        \\    const b = blk: {
+        \\        break :blk break :blk @as(u32, 1);
+        \\    };
+        \\}
+    , &[_][]const u8{
+        "tmp.zig:3:9: error: unreachable code",
+        "tmp.zig:3:20: note: control flow is diverted here",
+    });
+
     ctx.objErrStage1("chained comparison operators",
         \\export fn a(value: u32) bool {
         \\    return 1 < value < 1000;