Commit b4da8eef2a

mlugg <mlugg@mlugg.co.uk>
2025-01-03 08:54:53
Zir: split up start and end of range in `for_len`
The old lowering was kind of neat, but it unintentionally allowed the syntax `for (123) |_| { ... }`, and there wasn't really a way to fix that. So, instead, we include both the start and the end of the range in the `for_len` instruction (each operand to `for` now has *two* entries in this multi-op instruction). This slightly increases the size of ZIR for loops of predominantly indexables, but the difference is small enough that it's not worth complicating ZIR to try and fix it.
1 parent 252c203
Changed files (4)
lib
src
test
cases
compile_errors
lib/std/zig/AstGen.zig
@@ -7020,7 +7020,7 @@ fn forExpr(
     const indexables = try gpa.alloc(Zir.Inst.Ref, for_full.ast.inputs.len);
     defer gpa.free(indexables);
     // elements of this array can be `none`, indicating no length check.
-    const lens = try gpa.alloc(Zir.Inst.Ref, for_full.ast.inputs.len);
+    const lens = try gpa.alloc([2]Zir.Inst.Ref, for_full.ast.inputs.len);
     defer gpa.free(lens);
 
     // We will use a single zero-based counter no matter how many indexables there are.
@@ -7039,7 +7039,7 @@ fn forExpr(
 
     {
         var capture_token = for_full.payload_token;
-        for (for_full.ast.inputs, indexables, lens) |input, *indexable_ref, *len_ref| {
+        for (for_full.ast.inputs, indexables, lens) |input, *indexable_ref, *len_refs| {
             const capture_is_ref = token_tags[capture_token] == .asterisk;
             const ident_tok = capture_token + @intFromBool(capture_is_ref);
             const is_discard = mem.eql(u8, tree.tokenSlice(ident_tok), "_");
@@ -7068,24 +7068,21 @@ fn forExpr(
                     try astgen.appendErrorTok(ident_tok, "discard of unbounded counter", .{});
                 }
 
-                const start_is_zero = nodeIsTriviallyZero(tree, start_node);
-                const range_len = if (end_val == .none or start_is_zero)
-                    end_val
-                else
-                    try parent_gz.addPlNode(.sub, input, Zir.Inst.Bin{
-                        .lhs = end_val,
-                        .rhs = start_val,
-                    });
+                if (end_val == .none) {
+                    len_refs.* = .{ .none, .none };
+                } else {
+                    any_len_checks = true;
+                    len_refs.* = .{ start_val, end_val };
+                }
 
-                any_len_checks = any_len_checks or range_len != .none;
+                const start_is_zero = nodeIsTriviallyZero(tree, start_node);
                 indexable_ref.* = if (start_is_zero) .none else start_val;
-                len_ref.* = range_len;
             } else {
                 const indexable = try expr(parent_gz, scope, .{ .rl = .none }, input);
 
                 any_len_checks = true;
                 indexable_ref.* = indexable;
-                len_ref.* = indexable;
+                len_refs.* = .{ indexable, .none };
             }
         }
     }
@@ -7097,12 +7094,13 @@ fn forExpr(
     // We use a dedicated ZIR instruction to assert the lengths to assist with
     // nicer error reporting as well as fewer ZIR bytes emitted.
     const len: Zir.Inst.Ref = len: {
-        const lens_len: u32 = @intCast(lens.len);
+        const all_lens = @as([*]Zir.Inst.Ref, @ptrCast(lens))[0 .. lens.len * 2];
+        const lens_len: u32 = @intCast(all_lens.len);
         try astgen.extra.ensureUnusedCapacity(gpa, @typeInfo(Zir.Inst.MultiOp).@"struct".fields.len + lens_len);
         const len = try parent_gz.addPlNode(.for_len, node, Zir.Inst.MultiOp{
             .operands_len = lens_len,
         });
-        appendRefsAssumeCapacity(astgen, lens);
+        appendRefsAssumeCapacity(astgen, all_lens);
         break :len len;
     };
 
lib/std/zig/Zir.zig
@@ -526,8 +526,10 @@ pub const Inst = struct {
         /// Asserts that all the lengths provided match. Used to build a for loop.
         /// Return value is the length as a usize.
         /// Uses the `pl_node` field with payload `MultiOp`.
-        /// There is exactly one item corresponding to each AST node inside the for
-        /// loop condition. Any item may be `none`, indicating an unbounded range.
+        /// There are two items for each AST node inside the for loop condition.
+        /// If both items in a pair are `.none`, then this node is an unbounded range.
+        /// If only the second item in a pair is `.none`, then the first is an indexable.
+        /// Otherwise, the node is a bounded range `a..b`, with the items being `a` and `b`.
         /// Illegal behaviors:
         ///  * If all lengths are unbounded ranges (always a compile error).
         ///  * If any two lengths do not match each other.
src/Sema.zig
@@ -4356,7 +4356,8 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
     const ip = &zcu.intern_pool;
     const inst_data = sema.code.instructions.items(.data)[@intFromEnum(inst)].pl_node;
     const extra = sema.code.extraData(Zir.Inst.MultiOp, inst_data.payload_index);
-    const args = sema.code.refSlice(extra.end, extra.data.operands_len);
+    const all_args = sema.code.refSlice(extra.end, extra.data.operands_len);
+    const arg_pairs: []const [2]Zir.Inst.Ref = @as([*]const [2]Zir.Inst.Ref, @ptrCast(all_args))[0..@divExact(all_args.len, 2)];
     const src = block.nodeOffset(inst_data.src_node);
 
     var len: Air.Inst.Ref = .none;
@@ -4364,27 +4365,24 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
     var len_idx: u32 = undefined;
     var any_runtime = false;
 
-    const runtime_arg_lens = try gpa.alloc(Air.Inst.Ref, args.len);
+    const runtime_arg_lens = try gpa.alloc(Air.Inst.Ref, arg_pairs.len);
     defer gpa.free(runtime_arg_lens);
 
     // First pass to look for comptime values.
-    for (args, 0..) |zir_arg, i_usize| {
+    for (arg_pairs, 0..) |zir_arg_pair, i_usize| {
         const i: u32 = @intCast(i_usize);
         runtime_arg_lens[i] = .none;
-        if (zir_arg == .none) continue;
-        const object = try sema.resolveInst(zir_arg);
-        const object_ty = sema.typeOf(object);
-        // Each arg could be an indexable, or a range, in which case the length
-        // is passed directly as an integer.
-        const is_int = switch (object_ty.zigTypeTag(zcu)) {
-            .int, .comptime_int => true,
-            else => false,
-        };
+        if (zir_arg_pair[0] == .none) continue;
+
         const arg_src = block.src(.{ .for_input = .{
             .for_node_offset = inst_data.src_node,
             .input_index = i,
         } });
-        const arg_len_uncoerced = if (is_int) object else l: {
+
+        const arg_len_uncoerced = if (zir_arg_pair[1] == .none) l: {
+            // This argument is an indexable.
+            const object = try sema.resolveInst(zir_arg_pair[0]);
+            const object_ty = sema.typeOf(object);
             if (!object_ty.isIndexable(zcu)) {
                 // Instead of using checkIndexable we customize this error.
                 const msg = msg: {
@@ -4401,8 +4399,12 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
                 return sema.failWithOwnedErrorMsg(block, msg);
             }
             if (!object_ty.indexableHasLen(zcu)) continue;
-
             break :l try sema.fieldVal(block, arg_src, object, try ip.getOrPutString(gpa, pt.tid, "len", .no_embedded_nulls), arg_src);
+        } else l: {
+            // This argument is a range.
+            const range_start = try sema.resolveInst(zir_arg_pair[0]);
+            const range_end = try sema.resolveInst(zir_arg_pair[1]);
+            break :l try sema.analyzeArithmetic(block, .sub, range_end, range_start, arg_src, arg_src, arg_src, true);
         };
         const arg_len = try sema.coerce(block, Type.usize, arg_len_uncoerced, arg_src);
         if (len == .none) {
@@ -4444,17 +4446,12 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
         const msg = msg: {
             const msg = try sema.errMsg(src, "unbounded for loop", .{});
             errdefer msg.destroy(gpa);
-            for (args, 0..) |zir_arg, i_usize| {
+            for (arg_pairs, 0..) |zir_arg_pair, i_usize| {
                 const i: u32 = @intCast(i_usize);
-                if (zir_arg == .none) continue;
-                const object = try sema.resolveInst(zir_arg);
+                if (zir_arg_pair[0] == .none) continue;
+                if (zir_arg_pair[1] != .none) continue;
+                const object = try sema.resolveInst(zir_arg_pair[0]);
                 const object_ty = sema.typeOf(object);
-                // Each arg could be an indexable, or a range, in which case the length
-                // is passed directly as an integer.
-                switch (object_ty.zigTypeTag(zcu)) {
-                    .int, .comptime_int => continue,
-                    else => {},
-                }
                 const arg_src = block.src(.{ .for_input = .{
                     .for_node_offset = inst_data.src_node,
                     .input_index = i,
test/cases/compile_errors/for.zig
@@ -28,10 +28,11 @@ export fn d() void {
         _ = x3;
     }
 }
+export fn e() void {
+    for (123) |_| {}
+}
 
 // error
-// backend=stage2
-// target=native
 //
 // :2:5: error: non-matching for loop lengths
 // :2:11: note: length 10 here
@@ -43,3 +44,5 @@ export fn d() void {
 // :25:5: error: unbounded for loop
 // :25:10: note: type '[*]const u8' has no upper bound
 // :25:18: note: type '[*]const u8' has no upper bound
+// :32:10: error: type 'comptime_int' is not indexable and not a range
+// :32:10: note: for loop operand must be a range, array, slice, tuple, or vector