Commit 97d7fddfb7

Andrew Kelley <andrew@ziglang.org>
2021-04-03 04:11:51
stage2: progress towards basic structs
Introduce `ResultLoc.none_or_ref` which is used by field access expressions to avoid unnecessary loads when the field access itself will do the load. This turns: ```zig p.y - p.x - p.x ``` from ```zir %14 = load(%4) node_offset:8:12 %15 = field_val(%14, "y") node_offset:8:13 %16 = load(%4) node_offset:8:18 %17 = field_val(%16, "x") node_offset:8:19 %18 = sub(%15, %17) node_offset:8:16 %19 = load(%4) node_offset:8:24 %20 = field_val(%19, "x") node_offset:8:25 ``` to ```zir %14 = field_val(%4, "y") node_offset:8:13 %15 = field_val(%4, "x") node_offset:8:19 %16 = sub(%14, %15) node_offset:8:16 %17 = field_val(%4, "x") node_offset:8:25 ``` Much more compact. This requires `Sema.zirFieldVal` to support both pointers and non-pointers. C backend: Implement typedefs for struct types, as well as the following TZIR instructions: * mul * mulwrap * addwrap * subwrap * ref * struct_field_ptr Note that add, addwrap, sub, subwrap, mul, mulwrap instructions are all incorrect currently and need to be updated to properly handle wrapping and non wrapping for signed and unsigned. C backend: change indentation delta to 1, to make the output smaller and to process fewer bytes. I promise I will add a test case as soon as I fix those warnings that are being printed for my test case.
1 parent 43d364a
Changed files (6)
src/codegen/c.zig
@@ -44,22 +44,34 @@ fn formatTypeAsCIdentifier(
     var buffer = [1]u8{0} ** 128;
     // We don't care if it gets cut off, it's still more unique than a number
     var buf = std.fmt.bufPrint(&buffer, "{}", .{data}) catch &buffer;
+    return formatIdent(buf, "", .{}, writer);
+}
+
+pub fn typeToCIdentifier(t: Type) std.fmt.Formatter(formatTypeAsCIdentifier) {
+    return .{ .data = t };
+}
 
-    for (buf) |c, i| {
+fn formatIdent(
+    ident: []const u8,
+    comptime fmt: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    for (ident) |c, i| {
         switch (c) {
-            0 => return writer.writeAll(buf[0..i]),
-            'a'...'z', 'A'...'Z', '_', '$' => {},
+            'a'...'z', 'A'...'Z', '_' => try writer.writeByte(c),
             '0'...'9' => if (i == 0) {
-                buf[i] = '_';
+                try writer.print("${x:2}", .{c});
+            } else {
+                try writer.writeByte(c);
             },
-            else => buf[i] = '_',
+            else => try writer.print("${x:2}", .{c}),
         }
     }
-    return writer.writeAll(buf);
 }
 
-pub fn typeToCIdentifier(t: Type) std.fmt.Formatter(formatTypeAsCIdentifier) {
-    return .{ .data = t };
+pub fn fmtIdent(ident: []const u8) std.fmt.Formatter(formatIdent) {
+    return .{ .data = ident };
 }
 
 /// This data is available when outputting .c code for a Module.
@@ -430,6 +442,36 @@ pub const DeclGen = struct {
                 try w.writeAll(name);
                 dg.typedefs.putAssumeCapacityNoClobber(t, .{ .name = name, .rendered = rendered });
             },
+            .Struct => {
+                if (dg.typedefs.get(t)) |some| {
+                    return w.writeAll(some.name);
+                }
+                const struct_obj = t.castTag(.@"struct").?.data; // Handle 0 bit types elsewhere.
+                const fqn = try struct_obj.getFullyQualifiedName(dg.typedefs.allocator);
+                defer dg.typedefs.allocator.free(fqn);
+
+                var buffer = std.ArrayList(u8).init(dg.typedefs.allocator);
+                defer buffer.deinit();
+
+                try buffer.appendSlice("typedef struct {\n");
+                for (struct_obj.fields.entries.items) |entry| {
+                    try buffer.append(' ');
+                    try dg.renderType(buffer.writer(), entry.value.ty);
+                    try buffer.writer().print(" {s};\n", .{fmtIdent(entry.key)});
+                }
+                try buffer.appendSlice("} ");
+
+                const name_start = buffer.items.len;
+                try buffer.writer().print("zig_S_{s};\n", .{fmtIdent(fqn)});
+
+                const rendered = buffer.toOwnedSlice();
+                errdefer dg.typedefs.allocator.free(rendered);
+                const name = rendered[name_start .. rendered.len - 2];
+
+                try dg.typedefs.ensureCapacity(dg.typedefs.capacity() + 1);
+                try w.writeAll(name);
+                dg.typedefs.putAssumeCapacityNoClobber(t, .{ .name = name, .rendered = rendered });
+            },
             .Null, .Undefined => unreachable, // must be const or comptime
             else => |e| return dg.fail(.{ .node_offset = 0 }, "TODO: C backend: implement type {s}", .{
                 @tagName(e),
@@ -525,8 +567,23 @@ pub fn genBody(o: *Object, body: ir.Body) error{ AnalysisFail, OutOfMemory }!voi
 
     for (body.instructions) |inst| {
         const result_value = switch (inst.tag) {
-            .constant => unreachable, // excluded from function bodies
+            // TODO use a different strategy for add that communicates to the optimizer
+            // that wrapping is UB.
             .add => try genBinOp(o, inst.castTag(.add).?, " + "),
+            // TODO make this do wrapping arithmetic for signed ints
+            .addwrap => try genBinOp(o, inst.castTag(.add).?, " + "),
+            // TODO use a different strategy for sub that communicates to the optimizer
+            // that wrapping is UB.
+            .sub => try genBinOp(o, inst.castTag(.sub).?, " - "),
+            // TODO make this do wrapping arithmetic for signed ints
+            .subwrap => try genBinOp(o, inst.castTag(.sub).?, " - "),
+            // TODO use a different strategy for mul that communicates to the optimizer
+            // that wrapping is UB.
+            .mul => try genBinOp(o, inst.castTag(.sub).?, " * "),
+            // TODO make this do wrapping multiplication for signed ints
+            .mulwrap => try genBinOp(o, inst.castTag(.sub).?, " * "),
+
+            .constant => unreachable, // excluded from function bodies
             .alloc => try genAlloc(o, inst.castTag(.alloc).?),
             .arg => genArg(o),
             .assembly => try genAsm(o, inst.castTag(.assembly).?),
@@ -546,7 +603,6 @@ pub fn genBody(o: *Object, body: ir.Body) error{ AnalysisFail, OutOfMemory }!voi
             .ret => try genRet(o, inst.castTag(.ret).?),
             .retvoid => try genRetVoid(o),
             .store => try genStore(o, inst.castTag(.store).?),
-            .sub => try genBinOp(o, inst.castTag(.sub).?, " - "),
             .unreach => try genUnreach(o, inst.castTag(.unreach).?),
             .loop => try genLoop(o, inst.castTag(.loop).?),
             .condbr => try genCondBr(o, inst.castTag(.condbr).?),
@@ -567,17 +623,24 @@ pub fn genBody(o: *Object, body: ir.Body) error{ AnalysisFail, OutOfMemory }!voi
             .wrap_optional => try genWrapOptional(o, inst.castTag(.wrap_optional).?),
             .optional_payload => try genOptionalPayload(o, inst.castTag(.optional_payload).?),
             .optional_payload_ptr => try genOptionalPayload(o, inst.castTag(.optional_payload_ptr).?),
+            .ref => try genRef(o, inst.castTag(.ref).?),
+            .struct_field_ptr => try genStructFieldPtr(o, inst.castTag(.struct_field_ptr).?),
+
             .is_err => try genIsErr(o, inst.castTag(.is_err).?),
             .is_err_ptr => try genIsErr(o, inst.castTag(.is_err_ptr).?),
             .error_to_int => try genErrorToInt(o, inst.castTag(.error_to_int).?),
             .int_to_error => try genIntToError(o, inst.castTag(.int_to_error).?),
+
             .unwrap_errunion_payload => try genUnwrapErrUnionPay(o, inst.castTag(.unwrap_errunion_payload).?),
             .unwrap_errunion_err => try genUnwrapErrUnionErr(o, inst.castTag(.unwrap_errunion_err).?),
             .unwrap_errunion_payload_ptr => try genUnwrapErrUnionPay(o, inst.castTag(.unwrap_errunion_payload_ptr).?),
             .unwrap_errunion_err_ptr => try genUnwrapErrUnionErr(o, inst.castTag(.unwrap_errunion_err_ptr).?),
             .wrap_errunion_payload => try genWrapErrUnionPay(o, inst.castTag(.wrap_errunion_payload).?),
             .wrap_errunion_err => try genWrapErrUnionErr(o, inst.castTag(.wrap_errunion_err).?),
-            else => |e| return o.dg.fail(.{ .node_offset = 0 }, "TODO: C backend: implement codegen for {}", .{e}),
+            .br_block_flat => return o.dg.fail(.{ .node_offset = 0 }, "TODO: C backend: implement codegen for br_block_flat", .{}),
+            .ptrtoint => return o.dg.fail(.{ .node_offset = 0 }, "TODO: C backend: implement codegen for ptrtoint", .{}),
+            .varptr => return o.dg.fail(.{ .node_offset = 0 }, "TODO: C backend: implement codegen for varptr", .{}),
+            .floatcast => return o.dg.fail(.{ .node_offset = 0 }, "TODO: C backend: implement codegen for floatcast", .{}),
         };
         switch (result_value) {
             .none => {},
@@ -996,6 +1059,37 @@ fn genOptionalPayload(o: *Object, inst: *Inst.UnOp) !CValue {
     return local;
 }
 
+fn genRef(o: *Object, inst: *Inst.UnOp) !CValue {
+    const writer = o.writer();
+    const operand = try o.resolveInst(inst.operand);
+
+    const local = try o.allocLocal(inst.base.ty, .Const);
+    try writer.writeAll(" = ");
+    try o.writeCValue(writer, operand);
+    try writer.writeAll(";\n");
+    return local;
+}
+
+fn genStructFieldPtr(o: *Object, inst: *Inst.StructFieldPtr) !CValue {
+    const writer = o.writer();
+    const struct_ptr = try o.resolveInst(inst.struct_ptr);
+    const struct_obj = inst.struct_ptr.ty.elemType().castTag(.@"struct").?.data;
+    const field_name = struct_obj.fields.entries.items[inst.field_index].key;
+
+    const local = try o.allocLocal(inst.base.ty, .Const);
+    switch (struct_ptr) {
+        .local_ref => |i| {
+            try writer.print(" = &t{d}.{};\n", .{ i, fmtIdent(field_name) });
+        },
+        else => {
+            try writer.writeAll(" = &");
+            try o.writeCValue(writer, struct_ptr);
+            try writer.print("->{};\n", .{fmtIdent(field_name)});
+        },
+    }
+    return local;
+}
+
 // *(E!T) -> E NOT *E
 fn genUnwrapErrUnionErr(o: *Object, inst: *Inst.UnOp) !CValue {
     const writer = o.writer();
@@ -1088,7 +1182,7 @@ fn IndentWriter(comptime UnderlyingWriter: type) type {
         pub const Error = UnderlyingWriter.Error;
         pub const Writer = std.io.Writer(*Self, Error, write);
 
-        pub const indent_delta = 4;
+        pub const indent_delta = 1;
 
         underlying_writer: UnderlyingWriter,
         indent_count: usize = 0,
src/AstGen.zig
@@ -124,6 +124,9 @@ pub const ResultLoc = union(enum) {
     /// The expression must generate a pointer rather than a value. For example, the left hand side
     /// of an assignment uses this kind of result location.
     ref,
+    /// The callee will accept a ref, but it is not necessary, and the `ResultLoc`
+    /// may be treated as `none` instead.
+    none_or_ref,
     /// The expression will be coerced into this type, but it will be evaluated as an rvalue.
     ty: zir.Inst.Ref,
     /// The expression must store its result into this typed pointer. The result instruction
@@ -157,7 +160,7 @@ pub const ResultLoc = union(enum) {
         var elide_store_to_block_ptr_instructions = false;
         switch (rl) {
             // In this branch there will not be any store_to_block_ptr instructions.
-            .discard, .none, .ty, .ref => return .{
+            .discard, .none, .none_or_ref, .ty, .ref => return .{
                 .tag = .break_operand,
                 .elide_store_to_block_ptr_instructions = false,
             },
@@ -606,8 +609,13 @@ pub fn expr(gz: *GenZir, scope: *Scope, rl: ResultLoc, node: ast.Node.Index) Inn
 
         .deref => {
             const lhs = try expr(gz, scope, .none, node_datas[node].lhs);
-            const result = try gz.addUnNode(.load, lhs, node);
-            return rvalue(gz, scope, rl, result, node);
+            switch (rl) {
+                .ref, .none_or_ref => return lhs,
+                else => {
+                    const result = try gz.addUnNode(.load, lhs, node);
+                    return rvalue(gz, scope, rl, result, node);
+                },
+            }
         },
         .address_of => {
             const result = try expr(gz, scope, .ref, node_datas[node].lhs);
@@ -816,7 +824,7 @@ pub fn structInitExpr(
     }
     switch (rl) {
         .discard => return mod.failNode(scope, node, "TODO implement structInitExpr discard", .{}),
-        .none => return mod.failNode(scope, node, "TODO implement structInitExpr none", .{}),
+        .none, .none_or_ref => return mod.failNode(scope, node, "TODO implement structInitExpr none", .{}),
         .ref => unreachable, // struct literal not valid as l-value
         .ty => |ty_inst| {
             return mod.failNode(scope, node, "TODO implement structInitExpr ty", .{});
@@ -1980,7 +1988,7 @@ fn orelseCatchExpr(
     // TODO handle catch
     const operand_rl: ResultLoc = switch (block_scope.break_result_loc) {
         .ref => .ref,
-        .discard, .none, .block_ptr, .inferred_ptr => .none,
+        .discard, .none, .none_or_ref, .block_ptr, .inferred_ptr => .none,
         .ty => |elem_ty| blk: {
             const wrapped_ty = try block_scope.addUnNode(.optional_type, elem_ty, node);
             break :blk .{ .ty = wrapped_ty };
@@ -2156,7 +2164,7 @@ pub fn fieldAccess(
             .field_name_start = str_index,
         }),
         else => return rvalue(gz, scope, rl, try gz.addPlNode(.field_val, node, zir.Inst.Field{
-            .lhs = try expr(gz, scope, .none, object_node),
+            .lhs = try expr(gz, scope, .none_or_ref, object_node),
             .field_name_start = str_index,
         }), node),
     }
@@ -3474,9 +3482,13 @@ fn identifier(
             .local_ptr => {
                 const local_ptr = s.cast(Scope.LocalPtr).?;
                 if (mem.eql(u8, local_ptr.name, ident_name)) {
-                    if (rl == .ref) return local_ptr.ptr;
-                    const loaded = try gz.addUnNode(.load, local_ptr.ptr, ident);
-                    return rvalue(gz, scope, rl, loaded, ident);
+                    switch (rl) {
+                        .ref, .none_or_ref => return local_ptr.ptr,
+                        else => {
+                            const loaded = try gz.addUnNode(.load, local_ptr.ptr, ident);
+                            return rvalue(gz, scope, rl, loaded, ident);
+                        },
+                    }
                 }
                 s = local_ptr.parent;
             },
@@ -3493,7 +3505,7 @@ fn identifier(
     }
     const decl_index = @intCast(u32, gop.index);
     switch (rl) {
-        .ref => return gz.addDecl(.decl_ref, decl_index, ident),
+        .ref, .none_or_ref => return gz.addDecl(.decl_ref, decl_index, ident),
         else => return rvalue(gz, scope, rl, try gz.addDecl(.decl_val, decl_index, ident), ident),
     }
 }
@@ -3697,7 +3709,7 @@ fn as(
 ) InnerError!zir.Inst.Ref {
     const dest_type = try typeExpr(gz, scope, lhs);
     switch (rl) {
-        .none, .discard, .ref, .ty => {
+        .none, .none_or_ref, .discard, .ref, .ty => {
             const result = try expr(gz, scope, .{ .ty = dest_type }, rhs);
             return rvalue(gz, scope, rl, result, node);
         },
@@ -3781,7 +3793,7 @@ fn bitCast(
             });
             return rvalue(gz, scope, rl, result, node);
         },
-        .ref => unreachable, // `@bitCast` is not allowed as an r-value.
+        .ref, .none_or_ref => unreachable, // `@bitCast` is not allowed as an r-value.
         .ptr => |result_ptr| {
             const casted_result_ptr = try gz.addUnNode(.bitcast_result_ptr, result_ptr, node);
             return expr(gz, scope, .{ .ptr = casted_result_ptr }, rhs);
@@ -4354,7 +4366,7 @@ fn rvalue(
     src_node: ast.Node.Index,
 ) InnerError!zir.Inst.Ref {
     switch (rl) {
-        .none => return result,
+        .none, .none_or_ref => return result,
         .discard => {
             // Emit a compile error for discarding error values.
             _ = try gz.addUnNode(.ensure_result_non_error, result, src_node);
src/Module.zig
@@ -373,6 +373,11 @@ pub const Struct = struct {
         /// Uses `unreachable_value` to indicate no default.
         default_val: Value,
     };
+
+    pub fn getFullyQualifiedName(struct_obj: *Struct, gpa: *Allocator) ![]u8 {
+        // TODO this should return e.g. "std.fs.Dir.OpenOptions"
+        return gpa.dupe(u8, mem.spanZ(struct_obj.owner_decl.name));
+    }
 };
 
 /// Some Fn struct memory is owned by the Decl's TypedValue.Managed arena allocator.
@@ -1048,6 +1053,9 @@ pub const Scope = struct {
                     gz.rl_ty_inst = ty_inst;
                     gz.break_result_loc = parent_rl;
                 },
+                .none_or_ref => {
+                    gz.break_result_loc = .ref;
+                },
                 .discard, .none, .ptr, .ref => {
                     gz.break_result_loc = parent_rl;
                 },
src/Sema.zig
@@ -600,6 +600,7 @@ fn zirStructDecl(
 
     const struct_obj = try new_decl_arena.allocator.create(Module.Struct);
     const struct_ty = try Type.Tag.@"struct".create(&new_decl_arena.allocator, struct_obj);
+    const struct_val = try Value.Tag.ty.create(&new_decl_arena.allocator, struct_ty);
     struct_obj.* = .{
         .owner_decl = sema.owner_decl,
         .fields = fields_map,
@@ -611,7 +612,7 @@ fn zirStructDecl(
     };
     const new_decl = try sema.mod.createAnonymousDecl(&block.base, &new_decl_arena, .{
         .ty = Type.initTag(.type),
-        .val = try Value.Tag.ty.create(gpa, struct_ty),
+        .val = struct_val,
     });
     return sema.analyzeDeclVal(block, src, new_decl);
 }
@@ -2139,7 +2140,10 @@ fn zirFieldVal(sema: *Sema, block: *Scope.Block, inst: zir.Inst.Index) InnerErro
     const extra = sema.code.extraData(zir.Inst.Field, inst_data.payload_index).data;
     const field_name = sema.code.nullTerminatedString(extra.field_name_start);
     const object = try sema.resolveInst(extra.lhs);
-    const object_ptr = try sema.analyzeRef(block, src, object);
+    const object_ptr = if (object.ty.zigTypeTag() == .Pointer)
+        object
+    else
+        try sema.analyzeRef(block, src, object);
     const result_ptr = try sema.namedFieldPtr(block, src, object_ptr, field_name, field_name_src);
     return sema.analyzeLoad(block, src, result_ptr, result_ptr.src);
 }
src/zir.zig
@@ -338,6 +338,7 @@ pub const Inst = struct {
         field_ptr,
         /// Given a struct or object that contains virtual fields, returns the named field.
         /// The field name is stored in string_bytes. Used by a.b syntax.
+        /// This instruction also accepts a pointer.
         /// Uses `pl_node` field. The AST node is the a.b syntax. Payload is Field.
         field_val,
         /// Given a pointer to a struct or object that contains virtual fields, returns a pointer
test/stage2/cbe.zig
@@ -489,8 +489,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\ZIG_EXTERN_C zig_noreturn void _start(void);
         \\
         \\zig_noreturn void _start(void) {
-        \\    zig_breakpoint();
-        \\    zig_unreachable();
+        \\ zig_breakpoint();
+        \\ zig_unreachable();
         \\}
         \\
     );