Commit 078037ab9b

Andrew Kelley <andrew@ziglang.org>
2022-03-11 08:04:42
stage2: passing threadlocal tests for x86_64-linux
* use the real start code for LLVM backend with x86_64-linux - there is still a check for zig_backend after initializing the TLS area to skip some stuff. * introduce new AIR instructions and implement them for the LLVM backend. They are the same as `call` except with a modifier. - call_always_tail - call_never_tail - call_never_inline * LLVM backend calls hasRuntimeBitsIgnoringComptime in more places to avoid unnecessarily depending on comptimeOnly being resolved for some types. * LLVM backend: remove duplicate code for setting linkage and value name. The canonical place for this is in `updateDeclExports`. * LLVM backend: do some assembly template massaging to make `%%` rendered as `%`. More hacks will be needed to make inline assembly catch up with stage1.
1 parent b28b3f6
lib/std/start.zig
@@ -22,7 +22,16 @@ comptime {
     // The self-hosted compiler is not fully capable of handling all of this start.zig file.
     // Until then, we have simplified logic here for self-hosted. TODO remove this once
     // self-hosted is capable enough to handle all of the real start.zig logic.
-    if (builtin.zig_backend != .stage1) {
+    if (builtin.zig_backend == .stage2_wasm or
+        builtin.zig_backend == .stage2_c or
+        builtin.zig_backend == .stage2_x86_64 or
+        builtin.zig_backend == .stage2_x86 or
+        builtin.zig_backend == .stage2_aarch64 or
+        builtin.zig_backend == .stage2_arm or
+        builtin.zig_backend == .stage2_riscv64 or
+        (builtin.zig_backend == .stage2_llvm and native_os != .linux) or
+        (builtin.zig_backend == .stage2_llvm and native_arch != .x86_64))
+    {
         if (builtin.output_mode == .Exe) {
             if ((builtin.link_libc or builtin.object_format == .c) and @hasDecl(root, "main")) {
                 if (@typeInfo(@TypeOf(root.main)).Fn.calling_convention != .C) {
@@ -399,6 +408,11 @@ fn posixCallMainAndExit() noreturn {
         // Initialize the TLS area.
         std.os.linux.tls.initStaticTLS(phdrs);
 
+        if (builtin.zig_backend == .stage2_llvm) {
+            root.main();
+            exit2(0);
+        }
+
         // The way Linux executables represent stack size is via the PT_GNU_STACK
         // program header. However the kernel does not recognize it; it always gives 8 MiB.
         // Here we look for the stack size in our program headers and use setrlimit
src/arch/aarch64/CodeGen.zig
@@ -585,7 +585,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ret_addr        => try self.airRetAddr(inst),
             .frame_addr      => try self.airFrameAddress(inst),
             .fence           => try self.airFence(),
-            .call            => try self.airCall(inst),
             .cond_br         => try self.airCondBr(inst),
             .dbg_stmt        => try self.airDbgStmt(inst),
             .fptrunc         => try self.airFptrunc(inst),
@@ -634,6 +633,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .prefetch        => try self.airPrefetch(inst),
             .mul_add         => try self.airMulAdd(inst),
 
+            .call              => try self.airCall(inst, .auto),
+            .call_always_tail  => try self.airCall(inst, .always_tail),
+            .call_never_tail   => try self.airCall(inst, .never_tail),
+            .call_never_inline => try self.airCall(inst, .never_inline),
+
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
             .atomic_store_release   => try self.airAtomicStore(inst, .Release),
@@ -2325,7 +2329,8 @@ fn airFence(self: *Self) !void {
     //return self.finishAirBookkeeping();
 }
 
-fn airCall(self: *Self, inst: Air.Inst.Index) !void {
+fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.Modifier) !void {
+    if (modifier == .always_tail) return self.fail("TODO implement tail calls for aarch64", .{});
     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
     const callee = pl_op.operand;
     const extra = self.air.extraData(Air.Call, pl_op.payload);
src/arch/arm/CodeGen.zig
@@ -581,7 +581,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ret_addr        => try self.airRetAddr(inst),
             .frame_addr      => try self.airFrameAddress(inst),
             .fence           => try self.airFence(),
-            .call            => try self.airCall(inst),
             .cond_br         => try self.airCondBr(inst),
             .dbg_stmt        => try self.airDbgStmt(inst),
             .fptrunc         => try self.airFptrunc(inst),
@@ -630,6 +629,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .prefetch        => try self.airPrefetch(inst),
             .mul_add         => try self.airMulAdd(inst),
 
+            .call              => try self.airCall(inst, .auto),
+            .call_always_tail  => try self.airCall(inst, .always_tail),
+            .call_never_tail   => try self.airCall(inst, .never_tail),
+            .call_never_inline => try self.airCall(inst, .never_inline),
+
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
             .atomic_store_release   => try self.airAtomicStore(inst, .Release),
@@ -2510,7 +2514,8 @@ fn airFence(self: *Self) !void {
     //return self.finishAirBookkeeping();
 }
 
-fn airCall(self: *Self, inst: Air.Inst.Index) !void {
+fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.Modifier) !void {
+    if (modifier == .always_tail) return self.fail("TODO implement tail calls for arm", .{});
     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
     const callee = pl_op.operand;
     const extra = self.air.extraData(Air.Call, pl_op.payload);
src/arch/riscv64/CodeGen.zig
@@ -553,7 +553,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ret_addr        => try self.airRetAddr(inst),
             .frame_addr      => try self.airFrameAddress(inst),
             .fence           => try self.airFence(),
-            .call            => try self.airCall(inst),
             .cond_br         => try self.airCondBr(inst),
             .dbg_stmt        => try self.airDbgStmt(inst),
             .fptrunc         => try self.airFptrunc(inst),
@@ -602,6 +601,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .prefetch        => try self.airPrefetch(inst),
             .mul_add         => try self.airMulAdd(inst),
 
+            .call              => try self.airCall(inst, .auto),
+            .call_always_tail  => try self.airCall(inst, .always_tail),
+            .call_never_tail   => try self.airCall(inst, .never_tail),
+            .call_never_inline => try self.airCall(inst, .never_inline),
+
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
             .atomic_store_release   => try self.airAtomicStore(inst, .Release),
@@ -1458,7 +1462,8 @@ fn airFence(self: *Self) !void {
     //return self.finishAirBookkeeping();
 }
 
-fn airCall(self: *Self, inst: Air.Inst.Index) !void {
+fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.Modifier) !void {
+    if (modifier == .always_tail) return self.fail("TODO implement tail calls for riscv64", .{});
     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
     const fn_ty = self.air.typeOf(pl_op.operand);
     const callee = pl_op.operand;
@@ -2496,7 +2501,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             result.stack_byte_count = next_stack_offset;
             result.stack_align = 16;
         },
-        else => return self.fail("TODO implement function parameters for {} on aarch64", .{cc}),
+        else => return self.fail("TODO implement function parameters for {} on riscv64", .{cc}),
     }
 
     if (ret_ty.zigTypeTag() == .NoReturn) {
src/arch/wasm/CodeGen.zig
@@ -1218,7 +1218,6 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .breakpoint => self.airBreakpoint(inst),
         .br => self.airBr(inst),
         .bool_to_int => self.airBoolToInt(inst),
-        .call => self.airCall(inst),
         .cond_br => self.airCondBr(inst),
         .dbg_stmt => WValue.none,
         .intcast => self.airIntcast(inst),
@@ -1227,6 +1226,11 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .float_to_int => self.airFloatToInt(inst),
         .get_union_tag => self.airGetUnionTag(inst),
 
+        .call => self.airCall(inst, .auto),
+        .call_always_tail => self.airCall(inst, .always_tail),
+        .call_never_tail => self.airCall(inst, .never_tail),
+        .call_never_inline => self.airCall(inst, .never_inline),
+
         .is_err => self.airIsErr(inst, .i32_ne),
         .is_non_err => self.airIsErr(inst, .i32_eq),
 
@@ -1375,7 +1379,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
 fn airRetPtr(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
     const child_type = self.air.typeOfIndex(inst).childType();
 
-    if (!child_type.isFnOrHasRuntimeBits()) {
+    if (!child_type.isFnOrHasRuntimeBitsIgnoreComptime()) {
         return self.allocStack(Type.usize); // create pointer to void
     }
 
@@ -1401,7 +1405,8 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
     return .none;
 }
 
-fn airCall(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
+fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.Modifier) InnerError!WValue {
+    if (modifier == .always_tail) return self.fail("TODO implement tail calls for wasm", .{});
     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
     const extra = self.air.extraData(Air.Call, pl_op.payload);
     const args = self.air.extra[extra.end..][0..extra.data.args_len];
src/arch/x86_64/CodeGen.zig
@@ -670,7 +670,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .ret_addr        => try self.airRetAddr(inst),
             .frame_addr      => try self.airFrameAddress(inst),
             .fence           => try self.airFence(),
-            .call            => try self.airCall(inst),
             .cond_br         => try self.airCondBr(inst),
             .dbg_stmt        => try self.airDbgStmt(inst),
             .fptrunc         => try self.airFptrunc(inst),
@@ -719,6 +718,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .prefetch        => try self.airPrefetch(inst),
             .mul_add         => try self.airMulAdd(inst),
 
+            .call              => try self.airCall(inst, .auto),
+            .call_always_tail  => try self.airCall(inst, .always_tail),
+            .call_never_tail   => try self.airCall(inst, .never_tail),
+            .call_never_inline => try self.airCall(inst, .never_inline),
+
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
             .atomic_store_release   => try self.airAtomicStore(inst, .Release),
@@ -3263,7 +3267,8 @@ fn airFence(self: *Self) !void {
     //return self.finishAirBookkeeping();
 }
 
-fn airCall(self: *Self, inst: Air.Inst.Index) !void {
+fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.Modifier) !void {
+    if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{});
     const pl_op = self.air.instructions.items(.data)[inst].pl_op;
     const callee = pl_op.operand;
     const extra = self.air.extraData(Air.Call, pl_op.payload);
src/codegen/c.zig
@@ -1685,7 +1685,6 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .assembly         => try airAsm(f, inst),
             .block            => try airBlock(f, inst),
             .bitcast          => try airBitcast(f, inst),
-            .call             => try airCall(f, inst),
             .dbg_stmt         => try airDbgStmt(f, inst),
             .intcast          => try airIntCast(f, inst),
             .trunc            => try airTrunc(f, inst),
@@ -1721,6 +1720,11 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .union_init       => try airUnionInit(f, inst),
             .prefetch         => try airPrefetch(f, inst),
 
+            .call              => try airCall(f, inst, .auto),
+            .call_always_tail  => try airCall(f, inst, .always_tail),
+            .call_never_tail   => try airCall(f, inst, .never_tail),
+            .call_never_inline => try airCall(f, inst, .never_inline),
+
             .int_to_float,
             .float_to_int,
             .fptrunc,
@@ -1904,7 +1908,7 @@ fn airAlloc(f: *Function, inst: Air.Inst.Index) !CValue {
 
     const elem_type = inst_ty.elemType();
     const mutability: Mutability = if (inst_ty.isConstPtr()) .Const else .Mut;
-    if (!elem_type.isFnOrHasRuntimeBits()) {
+    if (!elem_type.isFnOrHasRuntimeBitsIgnoreComptime()) {
         return CValue.undefined_ptr;
     }
 
@@ -1979,7 +1983,7 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
 fn airRet(f: *Function, inst: Air.Inst.Index) !CValue {
     const un_op = f.air.instructions.items(.data)[inst].un_op;
     const writer = f.object.writer();
-    if (f.air.typeOf(un_op).isFnOrHasRuntimeBits()) {
+    if (f.air.typeOf(un_op).isFnOrHasRuntimeBitsIgnoreComptime()) {
         const operand = try f.resolveInst(un_op);
         try writer.writeAll("return ");
         try f.writeCValue(writer, operand);
@@ -1995,7 +1999,7 @@ fn airRetLoad(f: *Function, inst: Air.Inst.Index) !CValue {
     const writer = f.object.writer();
     const ptr_ty = f.air.typeOf(un_op);
     const ret_ty = ptr_ty.childType();
-    if (!ret_ty.isFnOrHasRuntimeBits()) {
+    if (!ret_ty.isFnOrHasRuntimeBitsIgnoreComptime()) {
         try writer.writeAll("return;\n");
     }
     const ptr = try f.resolveInst(un_op);
@@ -2561,7 +2565,18 @@ fn airSlice(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
-fn airCall(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airCall(
+    f: *Function,
+    inst: Air.Inst.Index,
+    modifier: std.builtin.CallOptions.Modifier,
+) !CValue {
+    switch (modifier) {
+        .auto => {},
+        .always_tail => return f.fail("TODO: C backend: call with always_tail attribute", .{}),
+        .never_tail => return f.fail("TODO: C backend: call with never_tail attribute", .{}),
+        .never_inline => return f.fail("TODO: C backend: call with never_inline attribute", .{}),
+        else => unreachable,
+    }
     const pl_op = f.air.instructions.items(.data)[inst].pl_op;
     const extra = f.air.extraData(Air.Call, pl_op.payload);
     const args = @bitCast([]const Air.Inst.Ref, f.air.extra[extra.end..][0..extra.data.args_len]);
src/codegen/llvm.zig
@@ -661,14 +661,19 @@ pub const Object = struct {
         // If the module does not already have the function, we ignore this function call
         // because we call `updateDeclExports` at the end of `updateFunc` and `updateDecl`.
         const llvm_global = self.decl_map.get(decl) orelse return;
-        const is_extern = decl.isExtern();
-        if (is_extern) {
+        if (decl.isExtern()) {
             llvm_global.setValueName(decl.name);
             llvm_global.setUnnamedAddr(.False);
             llvm_global.setLinkage(.External);
             if (decl.val.castTag(.variable)) |variable| {
-                if (variable.data.is_threadlocal) llvm_global.setThreadLocalMode(.GeneralDynamicTLSModel);
-                if (variable.data.is_weak_linkage) llvm_global.setLinkage(.ExternalWeak);
+                if (variable.data.is_threadlocal) {
+                    llvm_global.setThreadLocalMode(.GeneralDynamicTLSModel);
+                } else {
+                    llvm_global.setThreadLocalMode(.NotThreadLocal);
+                }
+                if (variable.data.is_weak_linkage) {
+                    llvm_global.setLinkage(.ExternalWeak);
+                }
             }
         } else if (exports.len != 0) {
             const exp_name = exports[0].options.name;
@@ -681,7 +686,9 @@ pub const Object = struct {
                 .LinkOnce => llvm_global.setLinkage(.LinkOnceODR),
             }
             if (decl.val.castTag(.variable)) |variable| {
-                if (variable.data.is_threadlocal) llvm_global.setThreadLocalMode(.GeneralDynamicTLSModel);
+                if (variable.data.is_threadlocal) {
+                    llvm_global.setThreadLocalMode(.GeneralDynamicTLSModel);
+                }
             }
             // If a Decl is exported more than one time (which is rare),
             // we add aliases for all but the first export.
@@ -709,6 +716,14 @@ pub const Object = struct {
             llvm_global.setValueName2(fqn.ptr, fqn.len);
             llvm_global.setLinkage(.Internal);
             llvm_global.setUnnamedAddr(.True);
+            if (decl.val.castTag(.variable)) |variable| {
+                const single_threaded = module.comp.bin_file.options.single_threaded;
+                if (variable.data.is_threadlocal and !single_threaded) {
+                    llvm_global.setThreadLocalMode(.GeneralDynamicTLSModel);
+                } else {
+                    llvm_global.setThreadLocalMode(.NotThreadLocal);
+                }
+            }
         }
     }
 
@@ -937,19 +952,6 @@ pub const DeclGen = struct {
         const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace(llvm_type, fqn, llvm_addrspace);
         gop.value_ptr.* = llvm_global;
 
-        if (decl.isExtern()) {
-            llvm_global.setValueName(decl.name);
-            llvm_global.setUnnamedAddr(.False);
-            llvm_global.setLinkage(.External);
-            if (decl.val.castTag(.variable)) |variable| {
-                if (variable.data.is_threadlocal) llvm_global.setThreadLocalMode(.GeneralDynamicTLSModel);
-                if (variable.data.is_weak_linkage) llvm_global.setLinkage(.ExternalWeak);
-            }
-        } else {
-            llvm_global.setLinkage(.Internal);
-            llvm_global.setUnnamedAddr(.True);
-        }
-
         return llvm_global;
     }
 
@@ -1033,8 +1035,8 @@ pub const DeclGen = struct {
                 const elem_ty = ptr_info.pointee_type;
                 const lower_elem_ty = switch (elem_ty.zigTypeTag()) {
                     .Opaque, .Fn => true,
-                    .Array => elem_ty.childType().hasRuntimeBits(),
-                    else => elem_ty.hasRuntimeBits(),
+                    .Array => elem_ty.childType().hasRuntimeBitsIgnoreComptime(),
+                    else => elem_ty.hasRuntimeBitsIgnoreComptime(),
                 };
                 const llvm_elem_ty = if (lower_elem_ty)
                     try dg.llvmType(elem_ty)
@@ -3158,7 +3160,6 @@ pub const FuncGen = struct {
                 .breakpoint     => try self.airBreakpoint(inst),
                 .ret_addr       => try self.airRetAddr(inst),
                 .frame_addr     => try self.airFrameAddress(inst),
-                .call           => try self.airCall(inst),
                 .cond_br        => try self.airCondBr(inst),
                 .intcast        => try self.airIntCast(inst),
                 .trunc          => try self.airTrunc(inst),
@@ -3175,6 +3176,11 @@ pub const FuncGen = struct {
                 .slice_ptr      => try self.airSliceField(inst, 0),
                 .slice_len      => try self.airSliceField(inst, 1),
 
+                .call              => try self.airCall(inst, .Auto),
+                .call_always_tail  => try self.airCall(inst, .AlwaysTail),
+                .call_never_tail   => try self.airCall(inst, .NeverTail),
+                .call_never_inline => try self.airCall(inst, .NeverInline),
+
                 .ptr_slice_ptr_ptr => try self.airPtrSliceFieldPtr(inst, 0),
                 .ptr_slice_len_ptr => try self.airPtrSliceFieldPtr(inst, 1),
 
@@ -3253,7 +3259,7 @@ pub const FuncGen = struct {
         }
     }
 
-    fn airCall(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airCall(self: *FuncGen, inst: Air.Inst.Index, attr: llvm.CallAttr) !?*const llvm.Value {
         const pl_op = self.air.instructions.items(.data)[inst].pl_op;
         const extra = self.air.extraData(Air.Call, pl_op.payload);
         const args = @bitCast([]const Air.Inst.Ref, self.air.extra[extra.end..][0..extra.data.args_len]);
@@ -3298,7 +3304,7 @@ pub const FuncGen = struct {
             llvm_args.items.ptr,
             @intCast(c_uint, llvm_args.items.len),
             toLlvmCallConv(zig_fn_ty.fnCallingConvention(), target),
-            .Auto,
+            attr,
             "",
         );
 
@@ -4063,6 +4069,34 @@ pub const FuncGen = struct {
         }
         const asm_source = std.mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len];
 
+        // hackety hacks until stage2 has proper inline asm in the frontend.
+        var rendered_template = std.ArrayList(u8).init(self.gpa);
+        defer rendered_template.deinit();
+
+        const State = enum { start, percent };
+
+        var state: State = .start;
+
+        for (asm_source) |byte| {
+            switch (state) {
+                .start => switch (byte) {
+                    '%' => state = .percent,
+                    else => try rendered_template.append(byte),
+                },
+                .percent => switch (byte) {
+                    '%' => {
+                        try rendered_template.append('%');
+                        state = .start;
+                    },
+                    else => {
+                        try rendered_template.append('%');
+                        try rendered_template.append(byte);
+                        state = .start;
+                    },
+                },
+            }
+        }
+
         const ret_ty = self.air.typeOfIndex(inst);
         const ret_llvm_ty = try self.dg.llvmType(ret_ty);
         const llvm_fn_ty = llvm.functionType(
@@ -4073,8 +4107,8 @@ pub const FuncGen = struct {
         );
         const asm_fn = llvm.getInlineAsm(
             llvm_fn_ty,
-            asm_source.ptr,
-            asm_source.len,
+            rendered_template.items.ptr,
+            rendered_template.items.len,
             llvm_constraints.items.ptr,
             llvm_constraints.items.len,
             llvm.Bool.fromBool(is_volatile),
@@ -5206,7 +5240,7 @@ pub const FuncGen = struct {
         if (self.liveness.isUnused(inst)) return null;
         const ptr_ty = self.air.typeOfIndex(inst);
         const pointee_type = ptr_ty.childType();
-        if (!pointee_type.isFnOrHasRuntimeBits()) return self.dg.lowerPtrToVoid(ptr_ty);
+        if (!pointee_type.isFnOrHasRuntimeBitsIgnoreComptime()) return self.dg.lowerPtrToVoid(ptr_ty);
 
         const pointee_llvm_ty = try self.dg.llvmType(pointee_type);
         const alloca_inst = self.buildAlloca(pointee_llvm_ty);
@@ -5220,7 +5254,7 @@ pub const FuncGen = struct {
         if (self.liveness.isUnused(inst)) return null;
         const ptr_ty = self.air.typeOfIndex(inst);
         const ret_ty = ptr_ty.childType();
-        if (!ret_ty.isFnOrHasRuntimeBits()) return self.dg.lowerPtrToVoid(ptr_ty);
+        if (!ret_ty.isFnOrHasRuntimeBitsIgnoreComptime()) return self.dg.lowerPtrToVoid(ptr_ty);
         if (self.ret_ptr) |ret_ptr| return ret_ptr;
         const ret_llvm_ty = try self.dg.llvmType(ret_ty);
         const target = self.dg.module.getTarget();
@@ -5457,7 +5491,7 @@ pub const FuncGen = struct {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const ptr_ty = self.air.typeOf(bin_op.lhs);
         const operand_ty = ptr_ty.childType();
-        if (!operand_ty.isFnOrHasRuntimeBits()) return null;
+        if (!operand_ty.isFnOrHasRuntimeBitsIgnoreComptime()) return null;
         var ptr = try self.resolveInst(bin_op.lhs);
         var element = try self.resolveInst(bin_op.rhs);
         const opt_abi_ty = self.dg.getAtomicAbiType(operand_ty, false);
@@ -6329,7 +6363,7 @@ pub const FuncGen = struct {
 
     fn load(self: *FuncGen, ptr: *const llvm.Value, ptr_ty: Type) !?*const llvm.Value {
         const info = ptr_ty.ptrInfo().data;
-        if (!info.pointee_type.hasRuntimeBits()) return null;
+        if (!info.pointee_type.hasRuntimeBitsIgnoreComptime()) return null;
 
         const target = self.dg.module.getTarget();
         const ptr_alignment = ptr_ty.ptrAlignment(target);
@@ -6384,7 +6418,7 @@ pub const FuncGen = struct {
     ) void {
         const info = ptr_ty.ptrInfo().data;
         const elem_ty = info.pointee_type;
-        if (!elem_ty.isFnOrHasRuntimeBits()) {
+        if (!elem_ty.isFnOrHasRuntimeBitsIgnoreComptime()) {
             return;
         }
         const target = self.dg.module.getTarget();
src/Air.zig
@@ -226,6 +226,12 @@ pub const Inst = struct {
         /// Uses the `pl_op` field with the `Call` payload. operand is the callee.
         /// Triggers `resolveTypeLayout` on the return type of the callee.
         call,
+        /// Same as `call` except with the `always_tail` attribute.
+        call_always_tail,
+        /// Same as `call` except with the `never_tail` attribute.
+        call_never_tail,
+        /// Same as `call` except with the `never_inline` attribute.
+        call_never_inline,
         /// Count leading zeroes of an integer according to its representation in twos complement.
         /// Result type will always be an unsigned integer big enough to fit the answer.
         /// Uses the `ty_op` field.
@@ -969,7 +975,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
 
         .tag_name, .error_name => return Type.initTag(.const_slice_u8_sentinel_0),
 
-        .call => {
+        .call, .call_always_tail, .call_never_tail, .call_never_inline => {
             const callee_ty = air.typeOf(datas[inst].pl_op.operand);
             switch (callee_ty.zigTypeTag()) {
                 .Fn => return callee_ty.fnReturnType(),
src/Liveness.zig
@@ -399,7 +399,7 @@ fn analyzeInst(
             return trackOperands(a, new_set, inst, main_tomb, .{ prefetch.ptr, .none, .none });
         },
 
-        .call => {
+        .call, .call_always_tail, .call_never_tail, .call_never_inline => {
             const inst_data = inst_datas[inst].pl_op;
             const callee = inst_data.operand;
             const extra = a.air.extraData(Air.Call, inst_data.payload);
src/print_air.zig
@@ -227,12 +227,17 @@ const Writer = struct {
             .ptr_elem_ptr,
             => try w.writeTyPlBin(s, inst),
 
+            .call,
+            .call_always_tail,
+            .call_never_tail,
+            .call_never_inline,
+            => try w.writeCall(s, inst),
+
             .struct_field_ptr => try w.writeStructField(s, inst),
             .struct_field_val => try w.writeStructField(s, inst),
             .constant => try w.writeConstant(s, inst),
             .assembly => try w.writeAssembly(s, inst),
             .dbg_stmt => try w.writeDbgStmt(s, inst),
-            .call => try w.writeCall(s, inst),
             .aggregate_init => try w.writeAggregateInit(s, inst),
             .union_init => try w.writeUnionInit(s, inst),
             .br => try w.writeBr(s, inst),
src/print_zir.zig
@@ -1942,6 +1942,7 @@ const Writer = struct {
             break :blk init_inst;
         };
         try self.writeFlag(stream, ", is_extern", small.is_extern);
+        try self.writeFlag(stream, ", is_threadlocal", small.is_threadlocal);
         try self.writeOptionalInstRef(stream, ", align=", align_inst);
         try self.writeOptionalInstRef(stream, ", init=", init_inst);
         try stream.writeAll("))");
src/Sema.zig
@@ -4458,21 +4458,19 @@ fn analyzeCall(
         );
     }
 
-    switch (modifier) {
+    const call_tag: Air.Inst.Tag = switch (modifier) {
         .auto,
         .always_inline,
         .compile_time,
         .no_async,
-        => {},
-
-        .async_kw,
-        .never_tail,
-        .never_inline,
-        .always_tail,
-        => return sema.fail(block, call_src, "TODO implement call with modifier {}", .{
-            modifier,
-        }),
-    }
+        => Air.Inst.Tag.call,
+
+        .never_tail => Air.Inst.Tag.call_never_tail,
+        .never_inline => Air.Inst.Tag.call_never_inline,
+        .always_tail => Air.Inst.Tag.call_always_tail,
+
+        .async_kw => return sema.fail(block, call_src, "TODO implement async call", .{}),
+    };
 
     const gpa = sema.gpa;
 
@@ -4490,6 +4488,7 @@ fn analyzeCall(
             func_ty_info,
             ensure_result_used,
             uncasted_args,
+            call_tag,
         )) |some| {
             return some;
         } else |err| switch (err) {
@@ -4771,7 +4770,7 @@ fn analyzeCall(
         try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Call).Struct.fields.len +
             args.len);
         const func_inst = try block.addInst(.{
-            .tag = .call,
+            .tag = call_tag,
             .data = .{ .pl_op = .{
                 .operand = func,
                 .payload = sema.addExtraAssumeCapacity(Air.Call{
@@ -4798,6 +4797,7 @@ fn instantiateGenericCall(
     func_ty_info: Type.Payload.Function.Data,
     ensure_result_used: bool,
     uncasted_args: []const Air.Inst.Ref,
+    call_tag: Air.Inst.Tag,
 ) CompileError!Air.Inst.Ref {
     const mod = sema.mod;
     const gpa = sema.gpa;
@@ -5107,7 +5107,7 @@ fn instantiateGenericCall(
     try sema.air_extra.ensureUnusedCapacity(sema.gpa, @typeInfo(Air.Call).Struct.fields.len +
         runtime_args_len);
     const func_inst = try block.addInst(.{
-        .tag = .call,
+        .tag = call_tag,
         .data = .{ .pl_op = .{
             .operand = callee_inst,
             .payload = sema.addExtraAssumeCapacity(Air.Call{
src/type.zig
@@ -2187,7 +2187,8 @@ pub const Type = extern union {
                 if (fn_info.is_generic) return false;
                 if (fn_info.is_var_args) return true;
                 switch (fn_info.cc) {
-                    // If there was a comptime calling convention, it should also return false here.
+                    // If there was a comptime calling convention,
+                    // it should also return false here.
                     .Inline => return false,
                     else => {},
                 }
@@ -2198,6 +2199,14 @@ pub const Type = extern union {
         }
     }
 
+    /// Same as `isFnOrHasRuntimeBits` but comptime-only types may return a false positive.
+    pub fn isFnOrHasRuntimeBitsIgnoreComptime(ty: Type) bool {
+        return switch (ty.zigTypeTag()) {
+            .Fn => true,
+            else => return ty.hasRuntimeBitsIgnoreComptime(),
+        };
+    }
+
     pub fn isNoReturn(self: Type) bool {
         const definitely_correct_result =
             self.tag_if_small_enough != .bound_fn and
test/behavior/bugs/7250.zig
@@ -15,6 +15,11 @@ threadlocal var g_uart0 = nrfx_uart_t{
 
 test "reference a global threadlocal variable" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch != .x86_64) return error.SkipZigTest; // TODO
 
     _ = nrfx_uart_rx(&g_uart0);
 }
test/behavior/basic.zig
@@ -720,6 +720,11 @@ test "string concatenation" {
 
 test "thread local variable" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch != .x86_64) return error.SkipZigTest; // TODO
 
     const S = struct {
         threadlocal var t: i32 = 1234;
@@ -746,11 +751,12 @@ fn maybe(x: bool) anyerror!?u32 {
 }
 
 test "pointer to thread local array" {
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch != .x86_64) return error.SkipZigTest; // TODO
 
     const s = "Hello world";
     std.mem.copy(u8, buffer[0..], s);
test/stage2/aarch64.zig
@@ -159,7 +159,7 @@ pub fn addCases(ctx: *TestContext) !void {
     {
         var case = ctx.exe("hello world with updates", macos_aarch64);
         case.addError("", &[_][]const u8{
-            ":99:9: error: struct 'tmp.tmp' has no member named 'main'",
+            ":108:9: error: struct 'tmp.tmp' has no member named 'main'",
         });
 
         // Incorrect return type
test/stage2/x86_64.zig
@@ -1925,7 +1925,7 @@ fn addLinuxTestCases(ctx: *TestContext) !void {
         var case = ctx.exe("hello world with updates", linux_x64);
 
         case.addError("", &[_][]const u8{
-            ":99:9: error: struct 'tmp.tmp' has no member named 'main'",
+            ":108:9: error: struct 'tmp.tmp' has no member named 'main'",
         });
 
         // Incorrect return type
@@ -2176,7 +2176,7 @@ fn addMacOsTestCases(ctx: *TestContext) !void {
     {
         var case = ctx.exe("darwin hello world with updates", macos_x64);
         case.addError("", &[_][]const u8{
-            ":99:9: error: struct 'tmp.tmp' has no member named 'main'",
+            ":108:9: error: struct 'tmp.tmp' has no member named 'main'",
         });
 
         // Incorrect return type