Commit 817fa3af86

Jacob Young <jacobly0@users.noreply.github.com>
2023-07-30 09:18:10
std: cleanup asm usage
After fixing some issues with inline assembly in the C backend, the std cleanups have the side effect of making these functions compatible with the backend, allowing it to be used on linux without linking libc.
1 parent 43b8304
Changed files (10)
lib/std/os/linux/arm-eabi.zig
@@ -104,7 +104,19 @@ const CloneFn = *const fn (arg: usize) callconv(.C) u8;
 pub extern fn clone(func: CloneFn, stack: usize, flags: u32, arg: usize, ptid: *i32, tls: usize, ctid: *i32) usize;
 
 pub fn restore() callconv(.Naked) void {
-    return asm volatile ("svc #0"
+    if (@import("builtin").zig_backend == .stage2_c) {
+        asm volatile (
+            \\ mov r7, %[number]
+            \\ svc #0
+            \\ bx lr
+            :
+            : [number] "i" (@intFromEnum(SYS.sigreturn)),
+            : "memory"
+        );
+        unreachable;
+    }
+
+    asm volatile ("svc #0"
         :
         : [number] "{r7}" (@intFromEnum(SYS.sigreturn)),
         : "memory"
@@ -112,7 +124,19 @@ pub fn restore() callconv(.Naked) void {
 }
 
 pub fn restore_rt() callconv(.Naked) void {
-    return asm volatile ("svc #0"
+    if (@import("builtin").zig_backend == .stage2_c) {
+        asm volatile (
+            \\ mov r7, %[number]
+            \\ svc #0
+            \\ bx lr
+            :
+            : [number] "i" (@intFromEnum(SYS.rt_sigreturn)),
+            : "memory"
+        );
+        unreachable;
+    }
+
+    asm volatile ("svc #0"
         :
         : [number] "{r7}" (@intFromEnum(SYS.rt_sigreturn)),
         : "memory"
lib/std/os/linux/arm64.zig
@@ -106,20 +106,24 @@ pub extern fn clone(func: CloneFn, stack: usize, flags: u32, arg: usize, ptid: *
 pub const restore = restore_rt;
 
 pub fn restore_rt() callconv(.Naked) void {
-    switch (@import("builtin").zig_backend) {
-        .stage2_c => return asm volatile (
+    if (@import("builtin").zig_backend == .stage2_c) {
+        asm volatile (
             \\ mov x8, %[number]
             \\ svc #0
+            \\ ret
             :
             : [number] "i" (@intFromEnum(SYS.rt_sigreturn)),
             : "memory", "cc"
-        ),
-        else => return asm volatile ("svc #0"
-            :
-            : [number] "{x8}" (@intFromEnum(SYS.rt_sigreturn)),
-            : "memory", "cc"
-        ),
+        );
+        unreachable;
     }
+
+    asm volatile (
+        \\ svc #0
+        :
+        : [number] "{x8}" (@intFromEnum(SYS.rt_sigreturn)),
+        : "memory", "cc"
+    );
 }
 
 pub const O = struct {
lib/std/os/linux/tls.zig
@@ -115,12 +115,14 @@ pub fn setThreadPointer(addr: usize) void {
                 .entry_number = tls_image.gdt_entry_number,
                 .base_addr = addr,
                 .limit = 0xfffff,
-                .seg_32bit = 1,
-                .contents = 0, // Data
-                .read_exec_only = 0,
-                .limit_in_pages = 1,
-                .seg_not_present = 0,
-                .useable = 1,
+                .flags = .{
+                    .seg_32bit = 1,
+                    .contents = 0, // Data
+                    .read_exec_only = 0,
+                    .limit_in_pages = 1,
+                    .seg_not_present = 0,
+                    .useable = 1,
+                },
             };
             const rc = std.os.linux.syscall1(.set_thread_area, @intFromPtr(&user_desc));
             assert(rc == 0);
lib/std/os/linux/x86.zig
@@ -124,45 +124,45 @@ const CloneFn = *const fn (arg: usize) callconv(.C) u8;
 pub extern fn clone(func: CloneFn, stack: usize, flags: u32, arg: usize, ptid: *i32, tls: usize, ctid: *i32) usize;
 
 pub fn restore() callconv(.Naked) void {
-    switch (@import("builtin").zig_backend) {
-        .stage2_c => asm volatile (
+    if (@import("builtin").zig_backend == .stage2_c) {
+        asm volatile (
             \\ movl %[number], %%eax
             \\ int $0x80
-            \\ ret
+            \\ retl
             :
             : [number] "i" (@intFromEnum(SYS.sigreturn)),
             : "memory"
-        ),
-        else => asm volatile (
-            \\ int $0x80
-            \\ ret
-            :
-            : [number] "{eax}" (@intFromEnum(SYS.sigreturn)),
-            : "memory"
-        ),
+        );
+        unreachable;
     }
-    unreachable;
+
+    asm volatile (
+        \\ int $0x80
+        :
+        : [number] "{eax}" (@intFromEnum(SYS.sigreturn)),
+        : "memory"
+    );
 }
 
 pub fn restore_rt() callconv(.Naked) void {
-    switch (@import("builtin").zig_backend) {
-        .stage2_c => asm volatile (
+    if (@import("builtin").zig_backend == .stage2_c) {
+        asm volatile (
             \\ movl %[number], %%eax
             \\ int $0x80
-            \\ ret
+            \\ retl
             :
             : [number] "i" (@intFromEnum(SYS.rt_sigreturn)),
             : "memory"
-        ),
-        else => asm volatile (
-            \\ int $0x80
-            \\ ret
-            :
-            : [number] "{eax}" (@intFromEnum(SYS.rt_sigreturn)),
-            : "memory"
-        ),
+        );
+        unreachable;
     }
-    unreachable;
+
+    asm volatile (
+        \\ int $0x80
+        :
+        : [number] "{eax}" (@intFromEnum(SYS.rt_sigreturn)),
+        : "memory"
+    );
 }
 
 pub const O = struct {
@@ -354,16 +354,19 @@ pub const ucontext_t = extern struct {
 
 pub const Elf_Symndx = u32;
 
-pub const user_desc = packed struct {
+pub const user_desc = extern struct {
     entry_number: u32,
     base_addr: u32,
     limit: u32,
-    seg_32bit: u1,
-    contents: u2,
-    read_exec_only: u1,
-    limit_in_pages: u1,
-    seg_not_present: u1,
-    useable: u1,
+    flags: packed struct(u32) {
+        seg_32bit: u1,
+        contents: u2,
+        read_exec_only: u1,
+        limit_in_pages: u1,
+        seg_not_present: u1,
+        useable: u1,
+        _: u25 = undefined,
+    },
 };
 
 /// socketcall() call numbers
@@ -400,63 +403,63 @@ noinline fn getContextReturnAddress() usize {
 
 pub fn getContextInternal() callconv(.Naked) void {
     asm volatile (
-        \\ movl $0, (%[flags_offset])(%%edx)
-        \\ movl $0, (%[link_offset])(%%edx)
-        \\ movl %%edi, (%[edi_offset])(%%edx)
-        \\ movl %%esi, (%[esi_offset])(%%edx)
-        \\ movl %%ebp, (%[ebp_offset])(%%edx)
-        \\ movl %%ebx, (%[ebx_offset])(%%edx)
-        \\ movl %%edx, (%[edx_offset])(%%edx)
-        \\ movl %%ecx, (%[ecx_offset])(%%edx)
-        \\ movl %%eax, (%[eax_offset])(%%edx)
+        \\ movl $0, %[flags_offset:c](%%edx)
+        \\ movl $0, %[link_offset:c](%%edx)
+        \\ movl %%edi, %[edi_offset:c](%%edx)
+        \\ movl %%esi, %[esi_offset:c](%%edx)
+        \\ movl %%ebp, %[ebp_offset:c](%%edx)
+        \\ movl %%ebx, %[ebx_offset:c](%%edx)
+        \\ movl %%edx, %[edx_offset:c](%%edx)
+        \\ movl %%ecx, %[ecx_offset:c](%%edx)
+        \\ movl %%eax, %[eax_offset:c](%%edx)
         \\ movl (%%esp), %%ecx
-        \\ movl %%ecx, (%[eip_offset])(%%edx)
+        \\ movl %%ecx, %[eip_offset:c](%%edx)
         \\ leal 4(%%esp), %%ecx
-        \\ movl %%ecx, (%[esp_offset])(%%edx)
+        \\ movl %%ecx, %[esp_offset:c](%%edx)
         \\ xorl %%ecx, %%ecx
         \\ movw %%fs, %%cx
-        \\ movl %%ecx, (%[fs_offset])(%%edx)
-        \\ leal (%[regspace_offset])(%%edx), %%ecx
-        \\ movl %%ecx, (%[fpregs_offset])(%%edx)
+        \\ movl %%ecx, %[fs_offset:c](%%edx)
+        \\ leal %[regspace_offset:c](%%edx), %%ecx
+        \\ movl %%ecx, %[fpregs_offset:c](%%edx)
         \\ fnstenv (%%ecx)
         \\ fldenv (%%ecx)
         \\ pushl %%ebx
         \\ pushl %%esi
         \\ xorl %%ebx, %%ebx
         \\ movl %[sigaltstack], %%eax
-        \\ leal (%[stack_offset])(%%edx), %%ecx
+        \\ leal %[stack_offset:c](%%edx), %%ecx
         \\ int $0x80
-        \\ cmpl $0, %%eax
-        \\ jne return
+        \\ testl %%eax, %%eax
+        \\ jnz 0f
         \\ movl %[sigprocmask], %%eax
         \\ xorl %%ecx, %%ecx
-        \\ leal (%[sigmask_offset])(%%edx), %%edx
+        \\ leal %[sigmask_offset:c](%%edx), %%edx
         \\ movl %[sigset_size], %%esi
         \\ int $0x80
-        \\ return:
+        \\0:
         \\ popl %%esi
         \\ popl %%ebx
         :
-        : [flags_offset] "p" (@offsetOf(ucontext_t, "flags")),
-          [link_offset] "p" (@offsetOf(ucontext_t, "link")),
-          [edi_offset] "p" (comptime gpRegisterOffset(REG.EDI)),
-          [esi_offset] "p" (comptime gpRegisterOffset(REG.ESI)),
-          [ebp_offset] "p" (comptime gpRegisterOffset(REG.EBP)),
-          [esp_offset] "p" (comptime gpRegisterOffset(REG.ESP)),
-          [ebx_offset] "p" (comptime gpRegisterOffset(REG.EBX)),
-          [edx_offset] "p" (comptime gpRegisterOffset(REG.EDX)),
-          [ecx_offset] "p" (comptime gpRegisterOffset(REG.ECX)),
-          [eax_offset] "p" (comptime gpRegisterOffset(REG.EAX)),
-          [eip_offset] "p" (comptime gpRegisterOffset(REG.EIP)),
-          [fs_offset] "p" (comptime gpRegisterOffset(REG.FS)),
-          [fpregs_offset] "p" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")),
-          [regspace_offset] "p" (@offsetOf(ucontext_t, "regspace")),
+        : [flags_offset] "i" (@offsetOf(ucontext_t, "flags")),
+          [link_offset] "i" (@offsetOf(ucontext_t, "link")),
+          [edi_offset] "i" (comptime gpRegisterOffset(REG.EDI)),
+          [esi_offset] "i" (comptime gpRegisterOffset(REG.ESI)),
+          [ebp_offset] "i" (comptime gpRegisterOffset(REG.EBP)),
+          [esp_offset] "i" (comptime gpRegisterOffset(REG.ESP)),
+          [ebx_offset] "i" (comptime gpRegisterOffset(REG.EBX)),
+          [edx_offset] "i" (comptime gpRegisterOffset(REG.EDX)),
+          [ecx_offset] "i" (comptime gpRegisterOffset(REG.ECX)),
+          [eax_offset] "i" (comptime gpRegisterOffset(REG.EAX)),
+          [eip_offset] "i" (comptime gpRegisterOffset(REG.EIP)),
+          [fs_offset] "i" (comptime gpRegisterOffset(REG.FS)),
+          [fpregs_offset] "i" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")),
+          [regspace_offset] "i" (@offsetOf(ucontext_t, "regspace")),
           [sigaltstack] "i" (@intFromEnum(linux.SYS.sigaltstack)),
-          [stack_offset] "p" (@offsetOf(ucontext_t, "stack")),
+          [stack_offset] "i" (@offsetOf(ucontext_t, "stack")),
           [sigprocmask] "i" (@intFromEnum(linux.SYS.rt_sigprocmask)),
-          [sigmask_offset] "p" (@offsetOf(ucontext_t, "sigmask")),
+          [sigmask_offset] "i" (@offsetOf(ucontext_t, "sigmask")),
           [sigset_size] "i" (linux.NSIG / 8),
-        : "memory", "eax", "ecx", "edx"
+        : "cc", "memory", "eax", "ecx", "edx"
     );
 }
 
@@ -464,11 +467,13 @@ pub inline fn getcontext(context: *ucontext_t) usize {
     // This method is used so that getContextInternal can control
     // its prologue in order to read ESP from a constant offset.
     // The unused &getContextInternal input is required so the function is included in the binary.
+    var clobber_edx: usize = undefined;
     return asm volatile (
-        \\ call os.linux.x86.getContextInternal
-        : [ret] "={eax}" (-> usize),
-        : [context] "{edx}" (context),
+        \\ calll %[getContextInternal:P]
+        : [_] "={eax}" (-> usize),
+          [_] "={edx}" (clobber_edx),
+        : [_] "{edx}" (context),
           [getContextInternal] "X" (&getContextInternal),
-        : "memory", "ecx"
+        : "cc", "memory", "ecx"
     );
 }
lib/std/os/linux/x86_64.zig
@@ -108,24 +108,24 @@ pub extern fn clone(func: CloneFn, stack: usize, flags: usize, arg: usize, ptid:
 pub const restore = restore_rt;
 
 pub fn restore_rt() callconv(.Naked) void {
-    switch (@import("builtin").zig_backend) {
-        .stage2_c => asm volatile (
+    if (@import("builtin").zig_backend == .stage2_c) {
+        asm volatile (
             \\ movl %[number], %%eax
             \\ syscall
             \\ retq
             :
             : [number] "i" (@intFromEnum(SYS.rt_sigreturn)),
             : "rcx", "r11", "memory"
-        ),
-        else => asm volatile (
-            \\ syscall
-            \\ retq
-            :
-            : [number] "{rax}" (@intFromEnum(SYS.rt_sigreturn)),
-            : "rcx", "r11", "memory"
-        ),
+        );
+        unreachable;
     }
-    unreachable;
+
+    asm volatile (
+        \\ syscall
+        :
+        : [number] "{rax}" (@intFromEnum(SYS.rt_sigreturn)),
+        : "rcx", "r11", "memory"
+    );
 }
 
 pub const mode_t = usize;
@@ -403,77 +403,77 @@ fn gpRegisterOffset(comptime reg_index: comptime_int) usize {
 fn getContextInternal() callconv(.Naked) void {
     // TODO: Read GS/FS registers?
     asm volatile (
-        \\ movq $0, (%[flags_offset])(%%rdi)
-        \\ movq $0, (%[link_offset])(%%rdi)
-        \\ movq %%r8, (%[r8_offset])(%%rdi)
-        \\ movq %%r9, (%[r9_offset])(%%rdi)
-        \\ movq %%r10, (%[r10_offset])(%%rdi)
-        \\ movq %%r11, (%[r11_offset])(%%rdi)
-        \\ movq %%r12, (%[r12_offset])(%%rdi)
-        \\ movq %%r13, (%[r13_offset])(%%rdi)
-        \\ movq %%r14, (%[r14_offset])(%%rdi)
-        \\ movq %%r15, (%[r15_offset])(%%rdi)
-        \\ movq %%rdi, (%[rdi_offset])(%%rdi)
-        \\ movq %%rsi, (%[rsi_offset])(%%rdi)
-        \\ movq %%rbp, (%[rbp_offset])(%%rdi)
-        \\ movq %%rbx, (%[rbx_offset])(%%rdi)
-        \\ movq %%rdx, (%[rdx_offset])(%%rdi)
-        \\ movq %%rax, (%[rax_offset])(%%rdi)
-        \\ movq %%rcx, (%[rcx_offset])(%%rdi)
+        \\ movq $0, %[flags_offset:c](%%rdi)
+        \\ movq $0, %[link_offset:c](%%rdi)
+        \\ movq %%r8, %[r8_offset:c](%%rdi)
+        \\ movq %%r9, %[r9_offset:c](%%rdi)
+        \\ movq %%r10, %[r10_offset:c](%%rdi)
+        \\ movq %%r11, %[r11_offset:c](%%rdi)
+        \\ movq %%r12, %[r12_offset:c](%%rdi)
+        \\ movq %%r13, %[r13_offset:c](%%rdi)
+        \\ movq %%r14, %[r14_offset:c](%%rdi)
+        \\ movq %%r15, %[r15_offset:c](%%rdi)
+        \\ movq %%rdi, %[rdi_offset:c](%%rdi)
+        \\ movq %%rsi, %[rsi_offset:c](%%rdi)
+        \\ movq %%rbp, %[rbp_offset:c](%%rdi)
+        \\ movq %%rbx, %[rbx_offset:c](%%rdi)
+        \\ movq %%rdx, %[rdx_offset:c](%%rdi)
+        \\ movq %%rax, %[rax_offset:c](%%rdi)
+        \\ movq %%rcx, %[rcx_offset:c](%%rdi)
         \\ movq (%%rsp), %%rcx
-        \\ movq %%rcx, (%[rip_offset])(%%rdi)
+        \\ movq %%rcx, %[rip_offset:c](%%rdi)
         \\ leaq 8(%%rsp), %%rcx
-        \\ movq %%rcx, (%[rsp_offset])(%%rdi)
+        \\ movq %%rcx, %[rsp_offset:c](%%rdi)
         \\ pushfq
-        \\ popq (%[efl_offset])(%%rdi)
-        \\ leaq (%[fpmem_offset])(%%rdi), %%rcx
-        \\ movq %%rcx, (%[fpstate_offset])(%%rdi)
+        \\ popq %[efl_offset:c](%%rdi)
+        \\ leaq %[fpmem_offset:c](%%rdi), %%rcx
+        \\ movq %%rcx, %[fpstate_offset:c](%%rdi)
         \\ fnstenv (%%rcx)
         \\ fldenv (%%rcx)
-        \\ stmxcsr (%[mxcsr_offset])(%%rdi)
-        \\ leaq (%[stack_offset])(%%rdi), %%rsi
+        \\ stmxcsr %[mxcsr_offset:c](%%rdi)
+        \\ leaq %[stack_offset:c](%%rdi), %%rsi
         \\ movq %%rdi, %%r8
-        \\ xorq %%rdi, %%rdi
+        \\ xorl %%edi, %%edi
         \\ movq %[sigaltstack], %%rax
         \\ syscall
-        \\ cmpq $0, %%rax
-        \\ jne return
+        \\ testq %%rax, %%rax
+        \\ jnz 0f
         \\ movq %[sigprocmask], %%rax
-        \\ xorq %%rsi, %%rsi
-        \\ leaq (%[sigmask_offset])(%%r8), %%rdx
-        \\ movq %[sigset_size], %%r10
+        \\ xorl %%esi, %%esi
+        \\ leaq %[sigmask_offset:c](%%r8), %%rdx
+        \\ movl %[sigset_size], %%r10d
         \\ syscall
-        \\ return:
+        \\0:
         :
-        : [flags_offset] "p" (@offsetOf(ucontext_t, "flags")),
-          [link_offset] "p" (@offsetOf(ucontext_t, "link")),
-          [r8_offset] "p" (comptime gpRegisterOffset(REG.R8)),
-          [r9_offset] "p" (comptime gpRegisterOffset(REG.R9)),
-          [r10_offset] "p" (comptime gpRegisterOffset(REG.R10)),
-          [r11_offset] "p" (comptime gpRegisterOffset(REG.R11)),
-          [r12_offset] "p" (comptime gpRegisterOffset(REG.R12)),
-          [r13_offset] "p" (comptime gpRegisterOffset(REG.R13)),
-          [r14_offset] "p" (comptime gpRegisterOffset(REG.R14)),
-          [r15_offset] "p" (comptime gpRegisterOffset(REG.R15)),
-          [rdi_offset] "p" (comptime gpRegisterOffset(REG.RDI)),
-          [rsi_offset] "p" (comptime gpRegisterOffset(REG.RSI)),
-          [rbp_offset] "p" (comptime gpRegisterOffset(REG.RBP)),
-          [rbx_offset] "p" (comptime gpRegisterOffset(REG.RBX)),
-          [rdx_offset] "p" (comptime gpRegisterOffset(REG.RDX)),
-          [rax_offset] "p" (comptime gpRegisterOffset(REG.RAX)),
-          [rcx_offset] "p" (comptime gpRegisterOffset(REG.RCX)),
-          [rsp_offset] "p" (comptime gpRegisterOffset(REG.RSP)),
-          [rip_offset] "p" (comptime gpRegisterOffset(REG.RIP)),
-          [efl_offset] "p" (comptime gpRegisterOffset(REG.EFL)),
-          [fpstate_offset] "p" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")),
-          [fpmem_offset] "p" (@offsetOf(ucontext_t, "fpregs_mem")),
-          [mxcsr_offset] "p" (@offsetOf(ucontext_t, "fpregs_mem") + @offsetOf(fpstate, "mxcsr")),
+        : [flags_offset] "i" (@offsetOf(ucontext_t, "flags")),
+          [link_offset] "i" (@offsetOf(ucontext_t, "link")),
+          [r8_offset] "i" (comptime gpRegisterOffset(REG.R8)),
+          [r9_offset] "i" (comptime gpRegisterOffset(REG.R9)),
+          [r10_offset] "i" (comptime gpRegisterOffset(REG.R10)),
+          [r11_offset] "i" (comptime gpRegisterOffset(REG.R11)),
+          [r12_offset] "i" (comptime gpRegisterOffset(REG.R12)),
+          [r13_offset] "i" (comptime gpRegisterOffset(REG.R13)),
+          [r14_offset] "i" (comptime gpRegisterOffset(REG.R14)),
+          [r15_offset] "i" (comptime gpRegisterOffset(REG.R15)),
+          [rdi_offset] "i" (comptime gpRegisterOffset(REG.RDI)),
+          [rsi_offset] "i" (comptime gpRegisterOffset(REG.RSI)),
+          [rbp_offset] "i" (comptime gpRegisterOffset(REG.RBP)),
+          [rbx_offset] "i" (comptime gpRegisterOffset(REG.RBX)),
+          [rdx_offset] "i" (comptime gpRegisterOffset(REG.RDX)),
+          [rax_offset] "i" (comptime gpRegisterOffset(REG.RAX)),
+          [rcx_offset] "i" (comptime gpRegisterOffset(REG.RCX)),
+          [rsp_offset] "i" (comptime gpRegisterOffset(REG.RSP)),
+          [rip_offset] "i" (comptime gpRegisterOffset(REG.RIP)),
+          [efl_offset] "i" (comptime gpRegisterOffset(REG.EFL)),
+          [fpstate_offset] "i" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")),
+          [fpmem_offset] "i" (@offsetOf(ucontext_t, "fpregs_mem")),
+          [mxcsr_offset] "i" (@offsetOf(ucontext_t, "fpregs_mem") + @offsetOf(fpstate, "mxcsr")),
           [sigaltstack] "i" (@intFromEnum(linux.SYS.sigaltstack)),
-          [stack_offset] "p" (@offsetOf(ucontext_t, "stack")),
+          [stack_offset] "i" (@offsetOf(ucontext_t, "stack")),
           [sigprocmask] "i" (@intFromEnum(linux.SYS.rt_sigprocmask)),
-          [sigmask_offset] "p" (@offsetOf(ucontext_t, "sigmask")),
+          [sigmask_offset] "i" (@offsetOf(ucontext_t, "sigmask")),
           [sigset_size] "i" (linux.NSIG / 8),
-        : "memory", "rcx", "rdx", "rdi", "rsi", "r8", "r10", "r11"
+        : "cc", "memory", "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r10", "r11"
     );
 }
 
@@ -481,11 +481,13 @@ pub inline fn getcontext(context: *ucontext_t) usize {
     // This method is used so that getContextInternal can control
     // its prologue in order to read RSP from a constant offset
     // The unused &getContextInternal input is required so the function is included in the binary.
+    var clobber_rdi: usize = undefined;
     return asm volatile (
-        \\ call os.linux.x86_64.getContextInternal
-        : [ret] "={rax}" (-> usize),
-        : [context] "{rdi}" (context),
+        \\ callq %[getContextInternal:P]
+        : [_] "={rax}" (-> usize),
+          [_] "={rdi}" (clobber_rdi),
+        : [_] "{rdi}" (context),
           [getContextInternal] "X" (&getContextInternal),
-        : "memory", "rcx", "rdx", "rdi", "rsi", "r8", "r10", "r11"
+        : "cc", "memory", "rcx", "rdx", "rsi", "r8", "r10", "r11"
     );
 }
lib/std/start.zig
@@ -256,38 +256,38 @@ fn EfiMain(handle: uefi.Handle, system_table: *uefi.tables.SystemTable) callconv
 fn _start() callconv(.Naked) noreturn {
     switch (builtin.zig_backend) {
         .stage2_c => {
-            @export(argc_argv_ptr, .{ .name = "argc_argv_ptr" });
-            @export(posixCallMainAndExit, .{ .name = "_posixCallMainAndExit" });
-            switch (native_arch) {
-                .x86_64 => asm volatile (
+            asm volatile (switch (native_arch) {
+                    .x86_64 =>
                     \\ xorl %%ebp, %%ebp
-                    \\ movq %%rsp, argc_argv_ptr
+                    \\ movq %%rsp, %[argc_argv_ptr]
                     \\ andq $-16, %%rsp
-                    \\ call _posixCallMainAndExit
-                ),
-                .x86 => asm volatile (
+                    \\ callq %[posixCallMainAndExit:P]
+                    ,
+                    .x86 =>
                     \\ xorl %%ebp, %%ebp
-                    \\ movl %%esp, argc_argv_ptr
+                    \\ movl %%esp, %[argc_argv_ptr]
                     \\ andl $-16, %%esp
-                    \\ jmp _posixCallMainAndExit
-                ),
-                .aarch64, .aarch64_be => asm volatile (
+                    \\ calll %[posixCallMainAndExit:P]
+                    ,
+                    .aarch64, .aarch64_be =>
                     \\ mov fp, #0
                     \\ mov lr, #0
                     \\ mov x0, sp
-                    \\ adrp x1, argc_argv_ptr
-                    \\ str x0, [x1, :lo12:argc_argv_ptr]
-                    \\ b _posixCallMainAndExit
-                ),
-                .arm, .armeb, .thumb => asm volatile (
+                    \\ str x0, %[argc_argv_ptr]
+                    \\ b %[posixCallMainAndExit]
+                    ,
+                    .arm, .armeb, .thumb =>
                     \\ mov fp, #0
                     \\ mov lr, #0
-                    \\ str sp, argc_argv_ptr
+                    \\ str sp, %[argc_argv_ptr]
                     \\ and sp, #-16
-                    \\ b _posixCallMainAndExit
-                ),
-                else => @compileError("unsupported arch"),
-            }
+                    \\ b %[posixCallMainAndExit]
+                    ,
+                    else => @compileError("unsupported arch"),
+                }
+                : [argc_argv_ptr] "=m" (argc_argv_ptr),
+                : [posixCallMainAndExit] "X" (&posixCallMainAndExit),
+            );
             unreachable;
         },
         else => switch (native_arch) {
lib/std/Thread.zig
@@ -1275,12 +1275,14 @@ const LinuxThreadImpl = struct {
                 .entry_number = os.linux.tls.tls_image.gdt_entry_number,
                 .base_addr = tls_ptr,
                 .limit = 0xfffff,
-                .seg_32bit = 1,
-                .contents = 0, // Data
-                .read_exec_only = 0,
-                .limit_in_pages = 1,
-                .seg_not_present = 0,
-                .useable = 1,
+                .flags = .{
+                    .seg_32bit = 1,
+                    .contents = 0, // Data
+                    .read_exec_only = 0,
+                    .limit_in_pages = 1,
+                    .seg_not_present = 0,
+                    .useable = 1,
+                },
             };
         }
 
src/codegen/c.zig
@@ -30,7 +30,7 @@ pub const CValue = union(enum) {
     /// Address of a local.
     local_ref: LocalIndex,
     /// A constant instruction, to be rendered inline.
-    constant: Air.Inst.Ref,
+    constant: InternPool.Index,
     /// Index into the parameters
     arg: usize,
     /// The array field of a parameter
@@ -302,7 +302,7 @@ pub const Function = struct {
             try f.object.dg.renderValue(writer, ty, val, .StaticInitializer);
             try writer.writeAll(";\n ");
             break :result decl_c_value;
-        } else .{ .constant = ref };
+        } else .{ .constant = val.toIntern() };
 
         gop.value_ptr.* = result;
         return result;
@@ -352,57 +352,63 @@ pub const Function = struct {
 
     fn writeCValue(f: *Function, w: anytype, c_value: CValue, location: ValueRenderLocation) !void {
         switch (c_value) {
-            .constant => |inst| {
-                const mod = f.object.dg.module;
-                const ty = f.typeOf(inst);
-                const val = (try f.air.value(inst, mod)).?;
-                return f.object.dg.renderValue(w, ty, val, location);
-            },
-            .undef => |ty| return f.object.dg.renderValue(w, ty, Value.undef, location),
-            else => return f.object.dg.writeCValue(w, c_value),
+            .constant => |val| try f.object.dg.renderValue(
+                w,
+                f.object.dg.module.intern_pool.typeOf(val).toType(),
+                val.toValue(),
+                location,
+            ),
+            .undef => |ty| try f.object.dg.renderValue(w, ty, Value.undef, location),
+            else => try f.object.dg.writeCValue(w, c_value),
         }
     }
 
     fn writeCValueDeref(f: *Function, w: anytype, c_value: CValue) !void {
         switch (c_value) {
-            .constant => |inst| {
-                const mod = f.object.dg.module;
-                const ty = f.typeOf(inst);
-                const val = (try f.air.value(inst, mod)).?;
+            .constant => |val| {
                 try w.writeAll("(*");
-                try f.object.dg.renderValue(w, ty, val, .Other);
-                return w.writeByte(')');
+                try f.object.dg.renderValue(
+                    w,
+                    f.object.dg.module.intern_pool.typeOf(val).toType(),
+                    val.toValue(),
+                    .Other,
+                );
+                try w.writeByte(')');
             },
-            else => return f.object.dg.writeCValueDeref(w, c_value),
+            else => try f.object.dg.writeCValueDeref(w, c_value),
         }
     }
 
     fn writeCValueMember(f: *Function, w: anytype, c_value: CValue, member: CValue) !void {
         switch (c_value) {
-            .constant => |inst| {
-                const mod = f.object.dg.module;
-                const ty = f.typeOf(inst);
-                const val = (try f.air.value(inst, mod)).?;
-                try f.object.dg.renderValue(w, ty, val, .Other);
+            .constant => |val| {
+                try f.object.dg.renderValue(
+                    w,
+                    f.object.dg.module.intern_pool.typeOf(val).toType(),
+                    val.toValue(),
+                    .Other,
+                );
                 try w.writeByte('.');
-                return f.writeCValue(w, member, .Other);
+                try f.writeCValue(w, member, .Other);
             },
-            else => return f.object.dg.writeCValueMember(w, c_value, member),
+            else => try f.object.dg.writeCValueMember(w, c_value, member),
         }
     }
 
     fn writeCValueDerefMember(f: *Function, w: anytype, c_value: CValue, member: CValue) !void {
         switch (c_value) {
-            .constant => |inst| {
-                const mod = f.object.dg.module;
-                const ty = f.typeOf(inst);
-                const val = (try f.air.value(inst, mod)).?;
+            .constant => |val| {
                 try w.writeByte('(');
-                try f.object.dg.renderValue(w, ty, val, .Other);
+                try f.object.dg.renderValue(
+                    w,
+                    f.object.dg.module.intern_pool.typeOf(val).toType(),
+                    val.toValue(),
+                    .Other,
+                );
                 try w.writeAll(")->");
-                return f.writeCValue(w, member, .Other);
+                try f.writeCValue(w, member, .Other);
             },
-            else => return f.object.dg.writeCValueDerefMember(w, c_value, member),
+            else => try f.object.dg.writeCValueDerefMember(w, c_value, member),
         }
     }
 
@@ -4763,11 +4769,20 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue {
     return .none;
 }
 
-fn asmInputNeedsLocal(constraint: []const u8, value: CValue) bool {
+fn asmInputNeedsLocal(f: *Function, constraint: []const u8, value: CValue) bool {
     return switch (constraint[0]) {
         '{' => true,
         'i', 'r' => false,
-        else => value == .constant,
+        else => switch (value) {
+            .constant => |val| switch (f.object.dg.module.intern_pool.indexToKey(val)) {
+                .ptr => |ptr| switch (ptr.addr) {
+                    .decl => false,
+                    else => true,
+                },
+                else => true,
+            },
+            else => false,
+        },
     };
 }
 
@@ -4848,7 +4863,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
 
             const is_reg = constraint[0] == '{';
             const input_val = try f.resolveInst(input);
-            if (asmInputNeedsLocal(constraint, input_val)) {
+            if (asmInputNeedsLocal(f, constraint, input_val)) {
                 const input_ty = f.typeOf(input);
                 if (is_reg) try writer.writeAll("register ");
                 const alignment = 0;
@@ -4969,7 +4984,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
             const is_reg = constraint[0] == '{';
             const input_val = try f.resolveInst(input);
             try writer.print("{s}(", .{fmtStringLiteral(if (is_reg) "r" else constraint, null)});
-            try f.writeCValue(writer, if (asmInputNeedsLocal(constraint, input_val)) local: {
+            try f.writeCValue(writer, if (asmInputNeedsLocal(f, constraint, input_val)) local: {
                 const input_local = .{ .local = locals_index };
                 locals_index += 1;
                 break :local input_local;
test/behavior/fn.zig
@@ -151,6 +151,7 @@ fn fnWithUnreachable() noreturn {
 
 test "extern struct with stdcallcc fn pointer" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c and builtin.cpu.arch == .x86) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
 
     const S = extern struct {
test/tests.zig
@@ -1053,6 +1053,7 @@ pub fn addModuleTests(b: *std.Build, options: ModuleTestOptions) *Step {
                     // TODO stop violating these pedantic errors. spotted on linux
                     "-Wno-address-of-packed-member",
                     "-Wno-gnu-folding-constant",
+                    "-Wno-incompatible-function-pointer-types",
                     "-Wno-incompatible-pointer-types",
                     "-Wno-overlength-strings",
                     // TODO stop violating these pedantic errors. spotted on darwin