Commit c0df707066

mlugg <mlugg@mlugg.co.uk>
2025-06-03 17:25:16
wasm: get self-hosted compiling, and supporting `separate_thread`
My original goal here was just to get the self-hosted Wasm backend compiling again after the pipeline change, but it turned out that from there it was pretty simple to entirely eliminate the shared state between `codegen.wasm` and `link.Wasm`. As such, this commit not only fixes the backend, but makes it the second backend (after CBE) to support the new 1:N:1 threading model.
1 parent 5ab307c
lib/std/multi_array_list.zig
@@ -135,6 +135,22 @@ pub fn MultiArrayList(comptime T: type) type {
                 self.* = undefined;
             }
 
+            /// Returns a `Slice` representing a range of elements in `s`, analagous to `arr[off..len]`.
+            /// It is illegal to call `deinit` or `toMultiArrayList` on the returned `Slice`.
+            /// Asserts that `off + len <= s.len`.
+            pub fn subslice(s: Slice, off: usize, len: usize) Slice {
+                assert(off + len <= s.len);
+                var ptrs: [fields.len][*]u8 = undefined;
+                inline for (s.ptrs, &ptrs, fields) |in, *out, field| {
+                    out.* = in + (off * @sizeOf(field.type));
+                }
+                return .{
+                    .ptrs = ptrs,
+                    .len = len,
+                    .capacity = len,
+                };
+            }
+
             /// This function is used in the debugger pretty formatters in tools/ to fetch the
             /// child field order and entry type to facilitate fancy debug printing for this type.
             fn dbHelper(self: *Slice, child: *Elem, field: *Field, entry: *Entry) void {
src/arch/wasm/CodeGen.zig
@@ -3,7 +3,6 @@ const builtin = @import("builtin");
 const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
 const testing = std.testing;
-const leb = std.leb;
 const mem = std.mem;
 const log = std.log.scoped(.codegen);
 
@@ -18,12 +17,10 @@ const Compilation = @import("../../Compilation.zig");
 const link = @import("../../link.zig");
 const Air = @import("../../Air.zig");
 const Mir = @import("Mir.zig");
-const Emit = @import("Emit.zig");
 const abi = @import("abi.zig");
 const Alignment = InternPool.Alignment;
 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
 const errUnionErrorOffset = codegen.errUnionErrorOffset;
-const Wasm = link.File.Wasm;
 
 const target_util = @import("../../target.zig");
 const libcFloatPrefix = target_util.libcFloatPrefix;
@@ -78,17 +75,24 @@ simd_immediates: std.ArrayListUnmanaged([16]u8) = .empty,
 /// The Target we're emitting (used to call intInfo)
 target: *const std.Target,
 ptr_size: enum { wasm32, wasm64 },
-wasm: *link.File.Wasm,
 pt: Zcu.PerThread,
 /// List of MIR Instructions
-mir_instructions: *std.MultiArrayList(Mir.Inst),
+mir_instructions: std.MultiArrayList(Mir.Inst),
 /// Contains extra data for MIR
-mir_extra: *std.ArrayListUnmanaged(u32),
-start_mir_extra_off: u32,
-start_locals_off: u32,
+mir_extra: std.ArrayListUnmanaged(u32),
 /// List of all locals' types generated throughout this declaration
 /// used to emit locals count at start of 'code' section.
-locals: *std.ArrayListUnmanaged(std.wasm.Valtype),
+mir_locals: std.ArrayListUnmanaged(std.wasm.Valtype),
+/// Set of all UAVs referenced by this function. Key is the UAV value, value is the alignment.
+/// `.none` means naturally aligned. An explicit alignment is never less than the natural alignment.
+mir_uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment),
+/// Set of all functions whose address this function has taken and which therefore might be called
+/// via a `call_indirect` function.
+mir_indirect_function_set: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, void),
+/// Set of all function types used by this function. These must be interned by the linker.
+mir_func_tys: std.AutoArrayHashMapUnmanaged(InternPool.Index, void),
+/// The number of `error_name_table_ref` instructions emitted.
+error_name_table_ref_count: u32,
 /// When a function is executing, we store the the current stack pointer's value within this local.
 /// This value is then used to restore the stack pointer to the original value at the return of the function.
 initial_stack_value: WValue = .none,
@@ -219,7 +223,7 @@ const WValue = union(enum) {
         if (local_value < reserved + 2) return; // reserved locals may never be re-used. Also accounts for 2 stack locals.
 
         const index = local_value - reserved;
-        const valtype = gen.locals.items[gen.start_locals_off + index];
+        const valtype = gen.mir_locals.items[index];
         switch (valtype) {
             .i32 => gen.free_locals_i32.append(gen.gpa, local_value) catch return, // It's ok to fail any of those, a new local can be allocated instead
             .i64 => gen.free_locals_i64.append(gen.gpa, local_value) catch return,
@@ -716,6 +720,12 @@ pub fn deinit(cg: *CodeGen) void {
     cg.free_locals_f32.deinit(gpa);
     cg.free_locals_f64.deinit(gpa);
     cg.free_locals_v128.deinit(gpa);
+    cg.mir_instructions.deinit(gpa);
+    cg.mir_extra.deinit(gpa);
+    cg.mir_locals.deinit(gpa);
+    cg.mir_uavs.deinit(gpa);
+    cg.mir_indirect_function_set.deinit(gpa);
+    cg.mir_func_tys.deinit(gpa);
     cg.* = undefined;
 }
 
@@ -876,7 +886,7 @@ fn addTag(cg: *CodeGen, tag: Mir.Inst.Tag) error{OutOfMemory}!void {
 }
 
 fn addExtended(cg: *CodeGen, opcode: std.wasm.MiscOpcode) error{OutOfMemory}!void {
-    const extra_index = cg.extraLen();
+    const extra_index: u32 = @intCast(cg.mir_extra.items.len);
     try cg.mir_extra.append(cg.gpa, @intFromEnum(opcode));
     try cg.addInst(.{ .tag = .misc_prefix, .data = .{ .payload = extra_index } });
 }
@@ -889,10 +899,6 @@ fn addLocal(cg: *CodeGen, tag: Mir.Inst.Tag, local: u32) error{OutOfMemory}!void
     try cg.addInst(.{ .tag = tag, .data = .{ .local = local } });
 }
 
-fn addFuncTy(cg: *CodeGen, tag: Mir.Inst.Tag, i: Wasm.FunctionType.Index) error{OutOfMemory}!void {
-    try cg.addInst(.{ .tag = tag, .data = .{ .func_ty = i } });
-}
-
 /// Accepts an unsigned 32bit integer rather than a signed integer to
 /// prevent us from having to bitcast multiple times as most values
 /// within codegen are represented as unsigned rather than signed.
@@ -911,7 +917,7 @@ fn addImm64(cg: *CodeGen, imm: u64) error{OutOfMemory}!void {
 /// Accepts the index into the list of 128bit-immediates
 fn addImm128(cg: *CodeGen, index: u32) error{OutOfMemory}!void {
     const simd_values = cg.simd_immediates.items[index];
-    const extra_index = cg.extraLen();
+    const extra_index: u32 = @intCast(cg.mir_extra.items.len);
     // tag + 128bit value
     try cg.mir_extra.ensureUnusedCapacity(cg.gpa, 5);
     cg.mir_extra.appendAssumeCapacity(@intFromEnum(std.wasm.SimdOpcode.v128_const));
@@ -956,15 +962,13 @@ fn addExtra(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 {
 /// Returns the index into `mir_extra`
 fn addExtraAssumeCapacity(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 {
     const fields = std.meta.fields(@TypeOf(extra));
-    const result = cg.extraLen();
+    const result: u32 = @intCast(cg.mir_extra.items.len);
     inline for (fields) |field| {
         cg.mir_extra.appendAssumeCapacity(switch (field.type) {
             u32 => @field(extra, field.name),
             i32 => @bitCast(@field(extra, field.name)),
             InternPool.Index,
             InternPool.Nav.Index,
-            Wasm.UavsObjIndex,
-            Wasm.UavsExeIndex,
             => @intFromEnum(@field(extra, field.name)),
             else => |field_type| @compileError("Unsupported field type " ++ @typeName(field_type)),
         });
@@ -1034,18 +1038,12 @@ fn emitWValue(cg: *CodeGen, value: WValue) InnerError!void {
         .float32 => |val| try cg.addInst(.{ .tag = .f32_const, .data = .{ .float32 = val } }),
         .float64 => |val| try cg.addFloat64(val),
         .nav_ref => |nav_ref| {
-            const wasm = cg.wasm;
-            const comp = wasm.base.comp;
-            const zcu = comp.zcu.?;
+            const zcu = cg.pt.zcu;
             const ip = &zcu.intern_pool;
             if (ip.getNav(nav_ref.nav_index).isFn(ip)) {
                 assert(nav_ref.offset == 0);
-                const gop = try wasm.zcu_indirect_function_set.getOrPut(comp.gpa, nav_ref.nav_index);
-                if (!gop.found_existing) gop.value_ptr.* = {};
-                try cg.addInst(.{
-                    .tag = .func_ref,
-                    .data = .{ .indirect_function_table_index = @enumFromInt(gop.index) },
-                });
+                try cg.mir_indirect_function_set.put(cg.gpa, nav_ref.nav_index, {});
+                try cg.addInst(.{ .tag = .func_ref, .data = .{ .nav_index = nav_ref.nav_index } });
             } else if (nav_ref.offset == 0) {
                 try cg.addInst(.{ .tag = .nav_ref, .data = .{ .nav_index = nav_ref.nav_index } });
             } else {
@@ -1061,41 +1059,37 @@ fn emitWValue(cg: *CodeGen, value: WValue) InnerError!void {
             }
         },
         .uav_ref => |uav| {
-            const wasm = cg.wasm;
-            const comp = wasm.base.comp;
-            const is_obj = comp.config.output_mode == .Obj;
-            const zcu = comp.zcu.?;
+            const zcu = cg.pt.zcu;
             const ip = &zcu.intern_pool;
-            if (ip.isFunctionType(ip.typeOf(uav.ip_index))) {
-                assert(uav.offset == 0);
-                const owner_nav = ip.toFunc(uav.ip_index).owner_nav;
-                const gop = try wasm.zcu_indirect_function_set.getOrPut(comp.gpa, owner_nav);
-                if (!gop.found_existing) gop.value_ptr.* = {};
-                try cg.addInst(.{
-                    .tag = .func_ref,
-                    .data = .{ .indirect_function_table_index = @enumFromInt(gop.index) },
-                });
-            } else if (uav.offset == 0) {
+            assert(!ip.isFunctionType(ip.typeOf(uav.ip_index)));
+            const gop = try cg.mir_uavs.getOrPut(cg.gpa, uav.ip_index);
+            const this_align: Alignment = a: {
+                if (uav.orig_ptr_ty == .none) break :a .none;
+                const ptr_type = ip.indexToKey(uav.orig_ptr_ty).ptr_type;
+                const this_align = ptr_type.flags.alignment;
+                if (this_align == .none) break :a .none;
+                const abi_align = Type.fromInterned(ptr_type.child).abiAlignment(zcu);
+                if (this_align.compare(.lte, abi_align)) break :a .none;
+                break :a this_align;
+            };
+            if (!gop.found_existing or
+                gop.value_ptr.* == .none or
+                (this_align != .none and this_align.compare(.gt, gop.value_ptr.*)))
+            {
+                gop.value_ptr.* = this_align;
+            }
+            if (uav.offset == 0) {
                 try cg.addInst(.{
                     .tag = .uav_ref,
-                    .data = if (is_obj) .{
-                        .uav_obj = try wasm.refUavObj(uav.ip_index, uav.orig_ptr_ty),
-                    } else .{
-                        .uav_exe = try wasm.refUavExe(uav.ip_index, uav.orig_ptr_ty),
-                    },
+                    .data = .{ .ip_index = uav.ip_index },
                 });
             } else {
                 try cg.addInst(.{
                     .tag = .uav_ref_off,
-                    .data = .{
-                        .payload = if (is_obj) try cg.addExtra(Mir.UavRefOffObj{
-                            .uav_obj = try wasm.refUavObj(uav.ip_index, uav.orig_ptr_ty),
-                            .offset = uav.offset,
-                        }) else try cg.addExtra(Mir.UavRefOffExe{
-                            .uav_exe = try wasm.refUavExe(uav.ip_index, uav.orig_ptr_ty),
-                            .offset = uav.offset,
-                        }),
-                    },
+                    .data = .{ .payload = try cg.addExtra(@as(Mir.UavRefOff, .{
+                        .value = uav.ip_index,
+                        .offset = uav.offset,
+                    })) },
                 });
             }
         },
@@ -1157,106 +1151,12 @@ fn allocLocal(cg: *CodeGen, ty: Type) InnerError!WValue {
 /// to use a zero-initialized local.
 fn ensureAllocLocal(cg: *CodeGen, ty: Type) InnerError!WValue {
     const zcu = cg.pt.zcu;
-    try cg.locals.append(cg.gpa, typeToValtype(ty, zcu, cg.target));
+    try cg.mir_locals.append(cg.gpa, typeToValtype(ty, zcu, cg.target));
     const initial_index = cg.local_index;
     cg.local_index += 1;
     return .{ .local = .{ .value = initial_index, .references = 1 } };
 }
 
-pub const Function = extern struct {
-    /// Index into `Wasm.mir_instructions`.
-    mir_off: u32,
-    /// This is unused except for as a safety slice bound and could be removed.
-    mir_len: u32,
-    /// Index into `Wasm.mir_extra`.
-    mir_extra_off: u32,
-    /// This is unused except for as a safety slice bound and could be removed.
-    mir_extra_len: u32,
-    locals_off: u32,
-    locals_len: u32,
-    prologue: Prologue,
-
-    pub const Prologue = extern struct {
-        flags: Flags,
-        sp_local: u32,
-        stack_size: u32,
-        bottom_stack_local: u32,
-
-        pub const Flags = packed struct(u32) {
-            stack_alignment: Alignment,
-            padding: u26 = 0,
-        };
-
-        pub const none: Prologue = .{
-            .sp_local = 0,
-            .flags = .{ .stack_alignment = .none },
-            .stack_size = 0,
-            .bottom_stack_local = 0,
-        };
-
-        pub fn isNone(p: *const Prologue) bool {
-            return p.flags.stack_alignment != .none;
-        }
-    };
-
-    pub fn lower(f: *Function, wasm: *Wasm, code: *std.ArrayListUnmanaged(u8)) Allocator.Error!void {
-        const gpa = wasm.base.comp.gpa;
-
-        // Write the locals in the prologue of the function body.
-        const locals = wasm.all_zcu_locals.items[f.locals_off..][0..f.locals_len];
-        try code.ensureUnusedCapacity(gpa, 5 + locals.len * 6 + 38);
-
-        std.leb.writeUleb128(code.fixedWriter(), @as(u32, @intCast(locals.len))) catch unreachable;
-        for (locals) |local| {
-            std.leb.writeUleb128(code.fixedWriter(), @as(u32, 1)) catch unreachable;
-            code.appendAssumeCapacity(@intFromEnum(local));
-        }
-
-        // Stack management section of function prologue.
-        const stack_alignment = f.prologue.flags.stack_alignment;
-        if (stack_alignment.toByteUnits()) |align_bytes| {
-            const sp_global: Wasm.GlobalIndex = .stack_pointer;
-            // load stack pointer
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.global_get));
-            std.leb.writeULEB128(code.fixedWriter(), @intFromEnum(sp_global)) catch unreachable;
-            // store stack pointer so we can restore it when we return from the function
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_tee));
-            leb.writeUleb128(code.fixedWriter(), f.prologue.sp_local) catch unreachable;
-            // get the total stack size
-            const aligned_stack: i32 = @intCast(stack_alignment.forward(f.prologue.stack_size));
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
-            leb.writeIleb128(code.fixedWriter(), aligned_stack) catch unreachable;
-            // subtract it from the current stack pointer
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_sub));
-            // Get negative stack alignment
-            const neg_stack_align = @as(i32, @intCast(align_bytes)) * -1;
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
-            leb.writeIleb128(code.fixedWriter(), neg_stack_align) catch unreachable;
-            // Bitwise-and the value to get the new stack pointer to ensure the
-            // pointers are aligned with the abi alignment.
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_and));
-            // The bottom will be used to calculate all stack pointer offsets.
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_tee));
-            leb.writeUleb128(code.fixedWriter(), f.prologue.bottom_stack_local) catch unreachable;
-            // Store the current stack pointer value into the global stack pointer so other function calls will
-            // start from this value instead and not overwrite the current stack.
-            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.global_set));
-            std.leb.writeULEB128(code.fixedWriter(), @intFromEnum(sp_global)) catch unreachable;
-        }
-
-        var emit: Emit = .{
-            .mir = .{
-                .instruction_tags = wasm.mir_instructions.items(.tag)[f.mir_off..][0..f.mir_len],
-                .instruction_datas = wasm.mir_instructions.items(.data)[f.mir_off..][0..f.mir_len],
-                .extra = wasm.mir_extra.items[f.mir_extra_off..][0..f.mir_extra_len],
-            },
-            .wasm = wasm,
-            .code = code,
-        };
-        try emit.lowerToCode();
-    }
-};
-
 pub const Error = error{
     OutOfMemory,
     /// Compiler was asked to operate on a number larger than supported.
@@ -1265,13 +1165,16 @@ pub const Error = error{
     CodegenFail,
 };
 
-pub fn function(
-    wasm: *Wasm,
+pub fn generate(
+    bin_file: *link.File,
     pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
-) Error!Function {
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) Error!Mir {
+    _ = src_loc;
+    _ = bin_file;
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
     const cg = zcu.funcInfo(func_index);
@@ -1279,10 +1182,8 @@ pub fn function(
     const target = &file_scope.mod.?.resolved_target.result;
     const fn_ty = zcu.navValue(cg.owner_nav).typeOf(zcu);
     const fn_info = zcu.typeToFunc(fn_ty).?;
-    const ip = &zcu.intern_pool;
-    const fn_ty_index = try wasm.internFunctionType(fn_info.cc, fn_info.param_types.get(ip), .fromInterned(fn_info.return_type), target);
-    const returns = fn_ty_index.ptr(wasm).returns.slice(wasm);
-    const any_returns = returns.len != 0;
+    const ret_ty: Type = .fromInterned(fn_info.return_type);
+    const any_returns = !firstParamSRet(fn_info.cc, ret_ty, zcu, target) and ret_ty.hasRuntimeBitsIgnoreComptime(zcu);
 
     var cc_result = try resolveCallingConventionValues(zcu, fn_ty, target);
     defer cc_result.deinit(gpa);
@@ -1290,8 +1191,8 @@ pub fn function(
     var code_gen: CodeGen = .{
         .gpa = gpa,
         .pt = pt,
-        .air = air,
-        .liveness = liveness,
+        .air = air.*,
+        .liveness = liveness.*,
         .owner_nav = cg.owner_nav,
         .target = target,
         .ptr_size = switch (target.cpu.arch) {
@@ -1299,31 +1200,33 @@ pub fn function(
             .wasm64 => .wasm64,
             else => unreachable,
         },
-        .wasm = wasm,
         .func_index = func_index,
         .args = cc_result.args,
         .return_value = cc_result.return_value,
         .local_index = cc_result.local_index,
-        .mir_instructions = &wasm.mir_instructions,
-        .mir_extra = &wasm.mir_extra,
-        .locals = &wasm.all_zcu_locals,
-        .start_mir_extra_off = @intCast(wasm.mir_extra.items.len),
-        .start_locals_off = @intCast(wasm.all_zcu_locals.items.len),
+        .mir_instructions = .empty,
+        .mir_extra = .empty,
+        .mir_locals = .empty,
+        .mir_uavs = .empty,
+        .mir_indirect_function_set = .empty,
+        .mir_func_tys = .empty,
+        .error_name_table_ref_count = 0,
     };
     defer code_gen.deinit();
 
-    return functionInner(&code_gen, any_returns) catch |err| switch (err) {
-        error.CodegenFail => return error.CodegenFail,
+    try code_gen.mir_func_tys.putNoClobber(gpa, fn_ty.toIntern(), {});
+
+    return generateInner(&code_gen, any_returns) catch |err| switch (err) {
+        error.CodegenFail,
+        error.OutOfMemory,
+        error.Overflow,
+        => |e| return e,
         else => |e| return code_gen.fail("failed to generate function: {s}", .{@errorName(e)}),
     };
 }
 
-fn functionInner(cg: *CodeGen, any_returns: bool) InnerError!Function {
-    const wasm = cg.wasm;
+fn generateInner(cg: *CodeGen, any_returns: bool) InnerError!Mir {
     const zcu = cg.pt.zcu;
-
-    const start_mir_off: u32 = @intCast(wasm.mir_instructions.len);
-
     try cg.branches.append(cg.gpa, .{});
     // clean up outer branch
     defer {
@@ -1347,20 +1250,25 @@ fn functionInner(cg: *CodeGen, any_returns: bool) InnerError!Function {
     try cg.addTag(.end);
     try cg.addTag(.dbg_epilogue_begin);
 
-    return .{
-        .mir_off = start_mir_off,
-        .mir_len = @intCast(wasm.mir_instructions.len - start_mir_off),
-        .mir_extra_off = cg.start_mir_extra_off,
-        .mir_extra_len = cg.extraLen(),
-        .locals_off = cg.start_locals_off,
-        .locals_len = @intCast(wasm.all_zcu_locals.items.len - cg.start_locals_off),
+    var mir: Mir = .{
+        .instructions = cg.mir_instructions.toOwnedSlice(),
+        .extra = &.{}, // fallible so assigned after errdefer
+        .locals = &.{}, // fallible so assigned after errdefer
         .prologue = if (cg.initial_stack_value == .none) .none else .{
             .sp_local = cg.initial_stack_value.local.value,
             .flags = .{ .stack_alignment = cg.stack_alignment },
             .stack_size = cg.stack_size,
             .bottom_stack_local = cg.bottom_stack_value.local.value,
         },
+        .uavs = cg.mir_uavs.move(),
+        .indirect_function_set = cg.mir_indirect_function_set.move(),
+        .func_tys = cg.mir_func_tys.move(),
+        .error_name_table_ref_count = cg.error_name_table_ref_count,
     };
+    errdefer mir.deinit(cg.gpa);
+    mir.extra = try cg.mir_extra.toOwnedSlice(cg.gpa);
+    mir.locals = try cg.mir_locals.toOwnedSlice(cg.gpa);
+    return mir;
 }
 
 const CallWValues = struct {
@@ -2220,7 +2128,6 @@ fn airRetLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 }
 
 fn airCall(cg: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) InnerError!void {
-    const wasm = cg.wasm;
     if (modifier == .always_tail) return cg.fail("TODO implement tail calls for wasm", .{});
     const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
     const extra = cg.air.extraData(Air.Call, pl_op.payload);
@@ -2277,8 +2184,11 @@ fn airCall(cg: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifie
         const operand = try cg.resolveInst(pl_op.operand);
         try cg.emitWValue(operand);
 
-        const fn_type_index = try wasm.internFunctionType(fn_info.cc, fn_info.param_types.get(ip), .fromInterned(fn_info.return_type), cg.target);
-        try cg.addFuncTy(.call_indirect, fn_type_index);
+        try cg.mir_func_tys.put(cg.gpa, fn_ty.toIntern(), {});
+        try cg.addInst(.{
+            .tag = .call_indirect,
+            .data = .{ .ip_index = fn_ty.toIntern() },
+        });
     }
 
     const result_value = result_value: {
@@ -2449,7 +2359,7 @@ fn store(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErr
                 try cg.emitWValue(lhs);
                 try cg.lowerToStack(rhs);
                 // TODO: Add helper functions for simd opcodes
-                const extra_index = cg.extraLen();
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
                 // stores as := opcode, offset, alignment (opcode::memarg)
                 try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
                     @intFromEnum(std.wasm.SimdOpcode.v128_store),
@@ -2574,7 +2484,7 @@ fn load(cg: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValue
 
     if (ty.zigTypeTag(zcu) == .vector) {
         // TODO: Add helper functions for simd opcodes
-        const extra_index = cg.extraLen();
+        const extra_index: u32 = @intCast(cg.mir_extra.items.len);
         // stores as := opcode, offset, alignment (opcode::memarg)
         try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
             @intFromEnum(std.wasm.SimdOpcode.v128_load),
@@ -4971,7 +4881,7 @@ fn airArrayElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 
                 try cg.emitWValue(array);
 
-                const extra_index = cg.extraLen();
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
                 try cg.mir_extra.appendSlice(cg.gpa, &operands);
                 try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
 
@@ -5123,7 +5033,7 @@ fn airSplat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
                     else => break :blk, // Cannot make use of simd-instructions
                 };
                 try cg.emitWValue(operand);
-                const extra_index: u32 = cg.extraLen();
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
                 // stores as := opcode, offset, alignment (opcode::memarg)
                 try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
                     opcode,
@@ -5142,7 +5052,7 @@ fn airSplat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
                     else => break :blk, // Cannot make use of simd-instructions
                 };
                 try cg.emitWValue(operand);
-                const extra_index = cg.extraLen();
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
                 try cg.mir_extra.append(cg.gpa, opcode);
                 try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
                 return cg.finishAir(inst, .stack, &.{ty_op.operand});
@@ -5246,7 +5156,7 @@ fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         }
         try cg.emitWValue(operand_a);
         try cg.emitWValue(operand_b);
-        const extra_index = cg.extraLen();
+        const extra_index: u32 = @intCast(cg.mir_extra.items.len);
         try cg.mir_extra.appendSlice(cg.gpa, &.{
             @intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle),
             @bitCast(lane_map[0..4].*),
@@ -6016,9 +5926,8 @@ fn airErrorName(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const name_ty = Type.slice_const_u8_sentinel_0;
     const abi_size = name_ty.abiSize(pt.zcu);
 
-    cg.wasm.error_name_table_ref_count += 1;
-
     // Lowers to a i32.const or i64.const with the error table memory address.
+    cg.error_name_table_ref_count += 1;
     try cg.addTag(.error_name_table_ref);
     try cg.emitWValue(operand);
     switch (cg.ptr_size) {
@@ -6046,7 +5955,7 @@ fn airPtrSliceFieldPtr(cg: *CodeGen, inst: Air.Inst.Index, offset: u32) InnerErr
 
 /// NOTE: Allocates place for result on virtual stack, when integer size > 64 bits
 fn intZeroValue(cg: *CodeGen, ty: Type) InnerError!WValue {
-    const zcu = cg.wasm.base.comp.zcu.?;
+    const zcu = cg.pt.zcu;
     const int_info = ty.intInfo(zcu);
     const wasm_bits = toWasmBits(int_info.bits) orelse {
         return cg.fail("TODO: Implement intZeroValue for integer bitsize: {d}", .{int_info.bits});
@@ -7673,7 +7582,3 @@ fn floatCmpIntrinsic(op: std.math.CompareOperator, bits: u16) Mir.Intrinsic {
         },
     };
 }
-
-fn extraLen(cg: *const CodeGen) u32 {
-    return @intCast(cg.mir_extra.items.len - cg.start_mir_extra_off);
-}
src/arch/wasm/Emit.zig
@@ -31,8 +31,8 @@ pub fn lowerToCode(emit: *Emit) Error!void {
     const target = &comp.root_mod.resolved_target.result;
     const is_wasm32 = target.cpu.arch == .wasm32;
 
-    const tags = mir.instruction_tags;
-    const datas = mir.instruction_datas;
+    const tags = mir.instructions.items(.tag);
+    const datas = mir.instructions.items(.data);
     var inst: u32 = 0;
 
     loop: switch (tags[inst]) {
@@ -50,18 +50,19 @@ pub fn lowerToCode(emit: *Emit) Error!void {
         },
         .uav_ref => {
             if (is_obj) {
-                try uavRefOffObj(wasm, code, .{ .uav_obj = datas[inst].uav_obj, .offset = 0 }, is_wasm32);
+                try uavRefObj(wasm, code, datas[inst].ip_index, 0, is_wasm32);
             } else {
-                try uavRefOffExe(wasm, code, .{ .uav_exe = datas[inst].uav_exe, .offset = 0 }, is_wasm32);
+                try uavRefExe(wasm, code, datas[inst].ip_index, 0, is_wasm32);
             }
             inst += 1;
             continue :loop tags[inst];
         },
         .uav_ref_off => {
+            const extra = mir.extraData(Mir.UavRefOff, datas[inst].payload).data;
             if (is_obj) {
-                try uavRefOffObj(wasm, code, mir.extraData(Mir.UavRefOffObj, datas[inst].payload).data, is_wasm32);
+                try uavRefObj(wasm, code, extra.value, extra.offset, is_wasm32);
             } else {
-                try uavRefOffExe(wasm, code, mir.extraData(Mir.UavRefOffExe, datas[inst].payload).data, is_wasm32);
+                try uavRefExe(wasm, code, extra.value, extra.offset, is_wasm32);
             }
             inst += 1;
             continue :loop tags[inst];
@@ -77,11 +78,14 @@ pub fn lowerToCode(emit: *Emit) Error!void {
             continue :loop tags[inst];
         },
         .func_ref => {
+            const indirect_func_idx: Wasm.ZcuIndirectFunctionSetIndex = @enumFromInt(
+                wasm.zcu_indirect_function_set.getIndex(datas[inst].nav_index).?,
+            );
             code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
             if (is_obj) {
                 @panic("TODO");
             } else {
-                leb.writeUleb128(code.fixedWriter(), 1 + @intFromEnum(datas[inst].indirect_function_table_index)) catch unreachable;
+                leb.writeUleb128(code.fixedWriter(), 1 + @intFromEnum(indirect_func_idx)) catch unreachable;
             }
             inst += 1;
             continue :loop tags[inst];
@@ -101,6 +105,7 @@ pub fn lowerToCode(emit: *Emit) Error!void {
             continue :loop tags[inst];
         },
         .error_name_table_ref => {
+            wasm.error_name_table_ref_count += 1;
             try code.ensureUnusedCapacity(gpa, 11);
             const opcode: std.wasm.Opcode = if (is_wasm32) .i32_const else .i64_const;
             code.appendAssumeCapacity(@intFromEnum(opcode));
@@ -176,7 +181,13 @@ pub fn lowerToCode(emit: *Emit) Error!void {
 
         .call_indirect => {
             try code.ensureUnusedCapacity(gpa, 11);
-            const func_ty_index = datas[inst].func_ty;
+            const fn_info = comp.zcu.?.typeToFunc(.fromInterned(datas[inst].ip_index)).?;
+            const func_ty_index = wasm.getExistingFunctionType(
+                fn_info.cc,
+                fn_info.param_types.get(&comp.zcu.?.intern_pool),
+                .fromInterned(fn_info.return_type),
+                target,
+            ).?;
             code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.call_indirect));
             if (is_obj) {
                 try wasm.out_relocs.append(gpa, .{
@@ -912,7 +923,7 @@ fn encodeMemArg(code: *std.ArrayListUnmanaged(u8), mem_arg: Mir.MemArg) void {
     leb.writeUleb128(code.fixedWriter(), mem_arg.offset) catch unreachable;
 }
 
-fn uavRefOffObj(wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), data: Mir.UavRefOffObj, is_wasm32: bool) !void {
+fn uavRefObj(wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), value: InternPool.Index, offset: i32, is_wasm32: bool) !void {
     const comp = wasm.base.comp;
     const gpa = comp.gpa;
     const opcode: std.wasm.Opcode = if (is_wasm32) .i32_const else .i64_const;
@@ -922,14 +933,14 @@ fn uavRefOffObj(wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), data: Mir.UavRef
 
     try wasm.out_relocs.append(gpa, .{
         .offset = @intCast(code.items.len),
-        .pointee = .{ .symbol_index = try wasm.uavSymbolIndex(data.uav_obj.key(wasm).*) },
+        .pointee = .{ .symbol_index = try wasm.uavSymbolIndex(value) },
         .tag = if (is_wasm32) .memory_addr_leb else .memory_addr_leb64,
-        .addend = data.offset,
+        .addend = offset,
     });
     code.appendNTimesAssumeCapacity(0, if (is_wasm32) 5 else 10);
 }
 
-fn uavRefOffExe(wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), data: Mir.UavRefOffExe, is_wasm32: bool) !void {
+fn uavRefExe(wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), value: InternPool.Index, offset: i32, is_wasm32: bool) !void {
     const comp = wasm.base.comp;
     const gpa = comp.gpa;
     const opcode: std.wasm.Opcode = if (is_wasm32) .i32_const else .i64_const;
@@ -937,8 +948,8 @@ fn uavRefOffExe(wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), data: Mir.UavRef
     try code.ensureUnusedCapacity(gpa, 11);
     code.appendAssumeCapacity(@intFromEnum(opcode));
 
-    const addr = wasm.uavAddr(data.uav_exe);
-    leb.writeUleb128(code.fixedWriter(), @as(u32, @intCast(@as(i64, addr) + data.offset))) catch unreachable;
+    const addr = wasm.uavAddr(value);
+    leb.writeUleb128(code.fixedWriter(), @as(u32, @intCast(@as(i64, addr) + offset))) catch unreachable;
 }
 
 fn navRefOff(wasm: *Wasm, code: *std.ArrayListUnmanaged(u8), data: Mir.NavRefOff, is_wasm32: bool) !void {
src/arch/wasm/Mir.zig
@@ -9,16 +9,53 @@
 const Mir = @This();
 const InternPool = @import("../../InternPool.zig");
 const Wasm = @import("../../link/Wasm.zig");
+const Emit = @import("Emit.zig");
+const Alignment = InternPool.Alignment;
 
 const builtin = @import("builtin");
 const std = @import("std");
 const assert = std.debug.assert;
+const leb = std.leb;
 
-instruction_tags: []const Inst.Tag,
-instruction_datas: []const Inst.Data,
+instructions: std.MultiArrayList(Inst).Slice,
 /// A slice of indexes where the meaning of the data is determined by the
 /// `Inst.Tag` value.
 extra: []const u32,
+locals: []const std.wasm.Valtype,
+prologue: Prologue,
+
+/// Not directly used by `Emit`, but the linker needs this to merge it with a global set.
+/// Value is the explicit alignment if greater than natural alignment, `.none` otherwise.
+uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment),
+/// Not directly used by `Emit`, but the linker needs this to merge it with a global set.
+indirect_function_set: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, void),
+/// Not directly used by `Emit`, but the linker needs this to ensure these types are interned.
+func_tys: std.AutoArrayHashMapUnmanaged(InternPool.Index, void),
+/// Not directly used by `Emit`, but the linker needs this to add it to its own refcount.
+error_name_table_ref_count: u32,
+
+pub const Prologue = extern struct {
+    flags: Flags,
+    sp_local: u32,
+    stack_size: u32,
+    bottom_stack_local: u32,
+
+    pub const Flags = packed struct(u32) {
+        stack_alignment: Alignment,
+        padding: u26 = 0,
+    };
+
+    pub const none: Prologue = .{
+        .sp_local = 0,
+        .flags = .{ .stack_alignment = .none },
+        .stack_size = 0,
+        .bottom_stack_local = 0,
+    };
+
+    pub fn isNone(p: *const Prologue) bool {
+        return p.flags.stack_alignment != .none;
+    }
+};
 
 pub const Inst = struct {
     /// The opcode that represents this instruction
@@ -80,7 +117,7 @@ pub const Inst = struct {
         /// Lowers to an i32_const which is the index of the function in the
         /// table section.
         ///
-        /// Uses `indirect_function_table_index`.
+        /// Uses `nav_index`.
         func_ref,
         /// Inserts debug information about the current line and column
         /// of the source code
@@ -123,7 +160,7 @@ pub const Inst = struct {
         /// Calls a function pointer by its function signature
         /// and index into the function table.
         ///
-        /// Uses `func_ty`
+        /// Uses `ip_index`; the `InternPool.Index` is the function type.
         call_indirect,
         /// Calls a function by its index.
         ///
@@ -611,11 +648,7 @@ pub const Inst = struct {
 
         ip_index: InternPool.Index,
         nav_index: InternPool.Nav.Index,
-        func_ty: Wasm.FunctionType.Index,
         intrinsic: Intrinsic,
-        uav_obj: Wasm.UavsObjIndex,
-        uav_exe: Wasm.UavsExeIndex,
-        indirect_function_table_index: Wasm.ZcuIndirectFunctionSetIndex,
 
         comptime {
             switch (builtin.mode) {
@@ -626,10 +659,66 @@ pub const Inst = struct {
     };
 };
 
-pub fn deinit(self: *Mir, gpa: std.mem.Allocator) void {
-    self.instructions.deinit(gpa);
-    gpa.free(self.extra);
-    self.* = undefined;
+pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void {
+    mir.instructions.deinit(gpa);
+    gpa.free(mir.extra);
+    gpa.free(mir.locals);
+    mir.uavs.deinit(gpa);
+    mir.indirect_function_set.deinit(gpa);
+    mir.func_tys.deinit(gpa);
+    mir.* = undefined;
+}
+
+pub fn lower(mir: *const Mir, wasm: *Wasm, code: *std.ArrayListUnmanaged(u8)) std.mem.Allocator.Error!void {
+    const gpa = wasm.base.comp.gpa;
+
+    // Write the locals in the prologue of the function body.
+    try code.ensureUnusedCapacity(gpa, 5 + mir.locals.len * 6 + 38);
+
+    std.leb.writeUleb128(code.fixedWriter(), @as(u32, @intCast(mir.locals.len))) catch unreachable;
+    for (mir.locals) |local| {
+        std.leb.writeUleb128(code.fixedWriter(), @as(u32, 1)) catch unreachable;
+        code.appendAssumeCapacity(@intFromEnum(local));
+    }
+
+    // Stack management section of function prologue.
+    const stack_alignment = mir.prologue.flags.stack_alignment;
+    if (stack_alignment.toByteUnits()) |align_bytes| {
+        const sp_global: Wasm.GlobalIndex = .stack_pointer;
+        // load stack pointer
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.global_get));
+        std.leb.writeULEB128(code.fixedWriter(), @intFromEnum(sp_global)) catch unreachable;
+        // store stack pointer so we can restore it when we return from the function
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_tee));
+        leb.writeUleb128(code.fixedWriter(), mir.prologue.sp_local) catch unreachable;
+        // get the total stack size
+        const aligned_stack: i32 = @intCast(stack_alignment.forward(mir.prologue.stack_size));
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
+        leb.writeIleb128(code.fixedWriter(), aligned_stack) catch unreachable;
+        // subtract it from the current stack pointer
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_sub));
+        // Get negative stack alignment
+        const neg_stack_align = @as(i32, @intCast(align_bytes)) * -1;
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
+        leb.writeIleb128(code.fixedWriter(), neg_stack_align) catch unreachable;
+        // Bitwise-and the value to get the new stack pointer to ensure the
+        // pointers are aligned with the abi alignment.
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_and));
+        // The bottom will be used to calculate all stack pointer offsets.
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_tee));
+        leb.writeUleb128(code.fixedWriter(), mir.prologue.bottom_stack_local) catch unreachable;
+        // Store the current stack pointer value into the global stack pointer so other function calls will
+        // start from this value instead and not overwrite the current stack.
+        code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.global_set));
+        std.leb.writeULEB128(code.fixedWriter(), @intFromEnum(sp_global)) catch unreachable;
+    }
+
+    var emit: Emit = .{
+        .mir = mir.*,
+        .wasm = wasm,
+        .code = code,
+    };
+    try emit.lowerToCode();
 }
 
 pub fn extraData(self: *const Mir, comptime T: type, index: usize) struct { data: T, end: usize } {
@@ -643,6 +732,7 @@ pub fn extraData(self: *const Mir, comptime T: type, index: usize) struct { data
             Wasm.UavsObjIndex,
             Wasm.UavsExeIndex,
             InternPool.Nav.Index,
+            InternPool.Index,
             => @enumFromInt(self.extra[i]),
             else => |field_type| @compileError("Unsupported field type " ++ @typeName(field_type)),
         };
@@ -695,13 +785,8 @@ pub const MemArg = struct {
     alignment: u32,
 };
 
-pub const UavRefOffObj = struct {
-    uav_obj: Wasm.UavsObjIndex,
-    offset: i32,
-};
-
-pub const UavRefOffExe = struct {
-    uav_exe: Wasm.UavsExeIndex,
+pub const UavRefOff = struct {
+    value: InternPool.Index,
     offset: i32,
 };
 
src/link/Wasm/Flush.zig
@@ -9,6 +9,7 @@ const Alignment = Wasm.Alignment;
 const String = Wasm.String;
 const Relocation = Wasm.Relocation;
 const InternPool = @import("../../InternPool.zig");
+const Mir = @import("../../arch/wasm/Mir.zig");
 
 const build_options = @import("build_options");
 
@@ -868,7 +869,21 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
                     .enum_type => {
                         try emitTagNameFunction(wasm, binary_bytes, f.data_segments.get(.__zig_tag_name_table).?, i.value(wasm).tag_name.table_index, ip_index);
                     },
-                    else => try i.value(wasm).function.lower(wasm, binary_bytes),
+                    else => {
+                        const func = i.value(wasm).function;
+                        const mir: Mir = .{
+                            .instructions = wasm.mir_instructions.slice().subslice(func.instructions_off, func.instructions_len),
+                            .extra = wasm.mir_extra.items[func.extra_off..][0..func.extra_len],
+                            .locals = wasm.mir_locals.items[func.locals_off..][0..func.locals_len],
+                            .prologue = func.prologue,
+                            // These fields are unused by `lower`.
+                            .uavs = undefined,
+                            .indirect_function_set = undefined,
+                            .func_tys = undefined,
+                            .error_name_table_ref_count = undefined,
+                        };
+                        try mir.lower(wasm, binary_bytes);
+                    },
                 }
             },
         };
src/link/Wasm.zig
@@ -282,7 +282,7 @@ mir_instructions: std.MultiArrayList(Mir.Inst) = .{},
 /// Corresponds to `mir_instructions`.
 mir_extra: std.ArrayListUnmanaged(u32) = .empty,
 /// All local types for all Zcu functions.
-all_zcu_locals: std.ArrayListUnmanaged(std.wasm.Valtype) = .empty,
+mir_locals: std.ArrayListUnmanaged(std.wasm.Valtype) = .empty,
 
 params_scratch: std.ArrayListUnmanaged(std.wasm.Valtype) = .empty,
 returns_scratch: std.ArrayListUnmanaged(std.wasm.Valtype) = .empty,
@@ -866,9 +866,24 @@ const ZcuDataStarts = struct {
 };
 
 pub const ZcuFunc = union {
-    function: CodeGen.Function,
+    function: Function,
     tag_name: TagName,
 
+    pub const Function = extern struct {
+        /// Index into `Wasm.mir_instructions`.
+        instructions_off: u32,
+        /// This is unused except for as a safety slice bound and could be removed.
+        instructions_len: u32,
+        /// Index into `Wasm.mir_extra`.
+        extra_off: u32,
+        /// This is unused except for as a safety slice bound and could be removed.
+        extra_len: u32,
+        /// Index into `Wasm.mir_locals`.
+        locals_off: u32,
+        locals_len: u32,
+        prologue: Mir.Prologue,
+    };
+
     pub const TagName = extern struct {
         symbol_name: String,
         type_index: FunctionType.Index,
@@ -3107,7 +3122,7 @@ pub fn deinit(wasm: *Wasm) void {
 
     wasm.mir_instructions.deinit(gpa);
     wasm.mir_extra.deinit(gpa);
-    wasm.all_zcu_locals.deinit(gpa);
+    wasm.mir_locals.deinit(gpa);
 
     if (wasm.dwarf) |*dwarf| dwarf.deinit();
 
@@ -3167,33 +3182,96 @@ pub fn deinit(wasm: *Wasm) void {
     wasm.missing_exports.deinit(gpa);
 }
 
-pub fn updateFunc(wasm: *Wasm, pt: Zcu.PerThread, func_index: InternPool.Index, air: Air, liveness: Air.Liveness) !void {
+pub fn updateFunc(
+    wasm: *Wasm,
+    pt: Zcu.PerThread,
+    func_index: InternPool.Index,
+    any_mir: *const codegen.AnyMir,
+    maybe_undef_air: *const Air,
+) !void {
     if (build_options.skip_non_native and builtin.object_format != .wasm) {
         @panic("Attempted to compile for object format that was disabled by build configuration");
     }
 
     dev.check(.wasm_backend);
+    _ = maybe_undef_air; // we (correctly) do not need this
 
+    // This linker implementation only works with codegen backend `.stage2_wasm`.
+    const mir = &any_mir.wasm;
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
-    try wasm.functions.ensureUnusedCapacity(gpa, 1);
-    try wasm.zcu_funcs.ensureUnusedCapacity(gpa, 1);
-
     const ip = &zcu.intern_pool;
+    const is_obj = zcu.comp.config.output_mode == .Obj;
+    const target = &zcu.comp.root_mod.resolved_target.result;
     const owner_nav = zcu.funcInfo(func_index).owner_nav;
     log.debug("updateFunc {}", .{ip.getNav(owner_nav).fqn.fmt(ip)});
 
+    // For Wasm, we do not lower the MIR to code just yet. That lowering happens during `flush`,
+    // after garbage collection, which can affect function and global indexes, which affects the
+    // LEB integer encoding, which affects the output binary size.
+
+    // However, we do move the MIR into a more efficient in-memory representation, where the arrays
+    // for all functions are packed together rather than keeping them each in their own `Mir`.
+    const mir_instructions_off: u32 = @intCast(wasm.mir_instructions.len);
+    const mir_extra_off: u32 = @intCast(wasm.mir_extra.items.len);
+    const mir_locals_off: u32 = @intCast(wasm.mir_locals.items.len);
+    {
+        // Copying MultiArrayList data is a little non-trivial. Resize, then memcpy both slices.
+        const old_len = wasm.mir_instructions.len;
+        try wasm.mir_instructions.resize(gpa, old_len + mir.instructions.len);
+        const dest_slice = wasm.mir_instructions.slice().subslice(old_len, mir.instructions.len);
+        const src_slice = mir.instructions;
+        @memcpy(dest_slice.items(.tag), src_slice.items(.tag));
+        @memcpy(dest_slice.items(.data), src_slice.items(.data));
+    }
+    try wasm.mir_extra.appendSlice(gpa, mir.extra);
+    try wasm.mir_locals.appendSlice(gpa, mir.locals);
+
+    // We also need to populate some global state from `mir`.
+    try wasm.zcu_indirect_function_set.ensureUnusedCapacity(gpa, mir.indirect_function_set.count());
+    for (mir.indirect_function_set.keys()) |nav| wasm.zcu_indirect_function_set.putAssumeCapacity(nav, {});
+    for (mir.func_tys.keys()) |func_ty| {
+        const fn_info = zcu.typeToFunc(.fromInterned(func_ty)).?;
+        _ = try wasm.internFunctionType(fn_info.cc, fn_info.param_types.get(ip), .fromInterned(fn_info.return_type), target);
+    }
+    wasm.error_name_table_ref_count += mir.error_name_table_ref_count;
+    // We need to populate UAV data. In theory, we can lower the UAV values while we fill `mir.uavs`.
+    // However, lowering the data might cause *more* UAVs to be created, and mixing them up would be
+    // a headache. So instead, just write `undefined` placeholder code and use the `ZcuDataStarts`.
     const zds: ZcuDataStarts = .init(wasm);
+    for (mir.uavs.keys(), mir.uavs.values()) |uav_val, uav_align| {
+        if (uav_align != .none) {
+            const gop = try wasm.overaligned_uavs.getOrPut(gpa, uav_val);
+            gop.value_ptr.* = if (gop.found_existing) gop.value_ptr.maxStrict(uav_align) else uav_align;
+        }
+        if (is_obj) {
+            const gop = try wasm.uavs_obj.getOrPut(gpa, uav_val);
+            if (!gop.found_existing) gop.value_ptr.* = undefined; // `zds` handles lowering
+        } else {
+            const gop = try wasm.uavs_exe.getOrPut(gpa, uav_val);
+            if (!gop.found_existing) gop.value_ptr.* = .{
+                .code = undefined, // `zds` handles lowering
+                .count = 0,
+            };
+            gop.value_ptr.count += 1;
+        }
+    }
+    try zds.finish(wasm, pt); // actually generates the UAVs
+
+    try wasm.functions.ensureUnusedCapacity(gpa, 1);
+    try wasm.zcu_funcs.ensureUnusedCapacity(gpa, 1);
 
     // This converts AIR to MIR but does not yet lower to wasm code.
-    // That lowering happens during `flush`, after garbage collection, which
-    // can affect function and global indexes, which affects the LEB integer
-    // encoding, which affects the output binary size.
-    const function = try CodeGen.function(wasm, pt, func_index, air, liveness);
-    wasm.zcu_funcs.putAssumeCapacity(func_index, .{ .function = function });
+    wasm.zcu_funcs.putAssumeCapacity(func_index, .{ .function = .{
+        .instructions_off = mir_instructions_off,
+        .instructions_len = @intCast(mir.instructions.len),
+        .extra_off = mir_extra_off,
+        .extra_len = @intCast(mir.extra.len),
+        .locals_off = mir_locals_off,
+        .locals_len = @intCast(mir.locals.len),
+        .prologue = mir.prologue,
+    } });
     wasm.functions.putAssumeCapacity(.pack(wasm, .{ .zcu_func = @enumFromInt(wasm.zcu_funcs.entries.len - 1) }), {});
-
-    try zds.finish(wasm, pt);
 }
 
 // Generate code for the "Nav", storing it in memory to be later written to
@@ -3988,58 +4066,54 @@ pub fn symbolNameIndex(wasm: *Wasm, name: String) Allocator.Error!SymbolTableInd
     return @enumFromInt(gop.index);
 }
 
-pub fn refUavObj(wasm: *Wasm, ip_index: InternPool.Index, orig_ptr_ty: InternPool.Index) !UavsObjIndex {
-    const comp = wasm.base.comp;
-    const zcu = comp.zcu.?;
-    const ip = &zcu.intern_pool;
-    const gpa = comp.gpa;
-    assert(comp.config.output_mode == .Obj);
-
-    if (orig_ptr_ty != .none) {
-        const abi_alignment = Zcu.Type.fromInterned(ip.typeOf(ip_index)).abiAlignment(zcu);
-        const explicit_alignment = ip.indexToKey(orig_ptr_ty).ptr_type.flags.alignment;
-        if (explicit_alignment.compare(.gt, abi_alignment)) {
-            const gop = try wasm.overaligned_uavs.getOrPut(gpa, ip_index);
-            gop.value_ptr.* = if (gop.found_existing) gop.value_ptr.maxStrict(explicit_alignment) else explicit_alignment;
-        }
-    }
-
-    const gop = try wasm.uavs_obj.getOrPut(gpa, ip_index);
-    if (!gop.found_existing) gop.value_ptr.* = .{
-        // Lowering the value is delayed to avoid recursion.
-        .code = undefined,
-        .relocs = undefined,
-    };
-    return @enumFromInt(gop.index);
-}
-
-pub fn refUavExe(wasm: *Wasm, ip_index: InternPool.Index, orig_ptr_ty: InternPool.Index) !UavsExeIndex {
+pub fn addUavReloc(
+    wasm: *Wasm,
+    reloc_offset: usize,
+    uav_val: InternPool.Index,
+    orig_ptr_ty: InternPool.Index,
+    addend: u32,
+) !void {
     const comp = wasm.base.comp;
     const zcu = comp.zcu.?;
     const ip = &zcu.intern_pool;
     const gpa = comp.gpa;
-    assert(comp.config.output_mode != .Obj);
 
-    if (orig_ptr_ty != .none) {
-        const abi_alignment = Zcu.Type.fromInterned(ip.typeOf(ip_index)).abiAlignment(zcu);
-        const explicit_alignment = ip.indexToKey(orig_ptr_ty).ptr_type.flags.alignment;
-        if (explicit_alignment.compare(.gt, abi_alignment)) {
-            const gop = try wasm.overaligned_uavs.getOrPut(gpa, ip_index);
-            gop.value_ptr.* = if (gop.found_existing) gop.value_ptr.maxStrict(explicit_alignment) else explicit_alignment;
-        }
-    }
-
-    const gop = try wasm.uavs_exe.getOrPut(gpa, ip_index);
-    if (gop.found_existing) {
-        gop.value_ptr.count += 1;
+    @"align": {
+        const ptr_type = ip.indexToKey(orig_ptr_ty).ptr_type;
+        const this_align = ptr_type.flags.alignment;
+        if (this_align == .none) break :@"align";
+        const abi_align = Zcu.Type.fromInterned(ptr_type.child).abiAlignment(zcu);
+        if (this_align.compare(.lte, abi_align)) break :@"align";
+        const gop = try wasm.overaligned_uavs.getOrPut(gpa, uav_val);
+        gop.value_ptr.* = if (gop.found_existing) gop.value_ptr.maxStrict(this_align) else this_align;
+    }
+
+    if (comp.config.output_mode == .Obj) {
+        const gop = try wasm.uavs_obj.getOrPut(gpa, uav_val);
+        if (!gop.found_existing) gop.value_ptr.* = undefined; // to avoid recursion, `ZcuDataStarts` will lower the value later
+        try wasm.out_relocs.append(gpa, .{
+            .offset = @intCast(reloc_offset),
+            .pointee = .{ .symbol_index = try wasm.uavSymbolIndex(uav_val) },
+            .tag = switch (wasm.pointerSize()) {
+                32 => .memory_addr_i32,
+                64 => .memory_addr_i64,
+                else => unreachable,
+            },
+            .addend = @intCast(addend),
+        });
     } else {
-        gop.value_ptr.* = .{
-            // Lowering the value is delayed to avoid recursion.
-            .code = undefined,
-            .count = 1,
+        const gop = try wasm.uavs_exe.getOrPut(gpa, uav_val);
+        if (!gop.found_existing) gop.value_ptr.* = .{
+            .code = undefined, // to avoid recursion, `ZcuDataStarts` will lower the value later
+            .count = 0,
         };
+        gop.value_ptr.count += 1;
+        try wasm.uav_fixups.append(gpa, .{
+            .uavs_exe_index = @enumFromInt(gop.index),
+            .offset = @intCast(reloc_offset),
+            .addend = addend,
+        });
     }
-    return @enumFromInt(gop.index);
 }
 
 pub fn refNavObj(wasm: *Wasm, nav_index: InternPool.Nav.Index) !NavsObjIndex {
@@ -4073,10 +4147,11 @@ pub fn refNavExe(wasm: *Wasm, nav_index: InternPool.Nav.Index) !NavsExeIndex {
 }
 
 /// Asserts it is called after `Flush.data_segments` is fully populated and sorted.
-pub fn uavAddr(wasm: *Wasm, uav_index: UavsExeIndex) u32 {
+pub fn uavAddr(wasm: *Wasm, ip_index: InternPool.Index) u32 {
     assert(wasm.flush_buffer.memory_layout_finished);
     const comp = wasm.base.comp;
     assert(comp.config.output_mode != .Obj);
+    const uav_index: UavsExeIndex = @enumFromInt(wasm.uavs_exe.getIndex(ip_index).?);
     const ds_id: DataSegmentId = .pack(wasm, .{ .uav_exe = uav_index });
     return wasm.flush_buffer.data_segments.get(ds_id).?;
 }
src/codegen.zig
@@ -123,6 +123,7 @@ pub const AnyMir = union {
             .stage2_riscv64,
             .stage2_sparc64,
             .stage2_x86_64,
+            .stage2_wasm,
             .stage2_c,
             => |backend_ct| @field(mir, tag(backend_ct)).deinit(gpa),
         }
@@ -153,6 +154,7 @@ pub fn generateFunction(
         .stage2_riscv64,
         .stage2_sparc64,
         .stage2_x86_64,
+        .stage2_wasm,
         .stage2_c,
         => |backend| {
             dev.check(devFeatureForBackend(backend));
@@ -784,7 +786,6 @@ fn lowerUavRef(
     const comp = lf.comp;
     const target = &comp.root_mod.resolved_target.result;
     const ptr_width_bytes = @divExact(target.ptrBitWidth(), 8);
-    const is_obj = comp.config.output_mode == .Obj;
     const uav_val = uav.val;
     const uav_ty = Type.fromInterned(ip.typeOf(uav_val));
     const is_fn_body = uav_ty.zigTypeTag(zcu) == .@"fn";
@@ -804,21 +805,7 @@ fn lowerUavRef(
             dev.check(link.File.Tag.wasm.devFeature());
             const wasm = lf.cast(.wasm).?;
             assert(reloc_parent == .none);
-            if (is_obj) {
-                try wasm.out_relocs.append(gpa, .{
-                    .offset = @intCast(code.items.len),
-                    .pointee = .{ .symbol_index = try wasm.uavSymbolIndex(uav.val) },
-                    .tag = if (ptr_width_bytes == 4) .memory_addr_i32 else .memory_addr_i64,
-                    .addend = @intCast(offset),
-                });
-            } else {
-                try wasm.uav_fixups.ensureUnusedCapacity(gpa, 1);
-                wasm.uav_fixups.appendAssumeCapacity(.{
-                    .uavs_exe_index = try wasm.refUavExe(uav.val, uav.orig_ty),
-                    .offset = @intCast(code.items.len),
-                    .addend = @intCast(offset),
-                });
-            }
+            try wasm.addUavReloc(code.items.len, uav.val, uav.orig_ty, @intCast(offset));
             code.appendNTimesAssumeCapacity(0, ptr_width_bytes);
             return;
         },
src/Compilation.zig
@@ -3500,7 +3500,7 @@ pub fn saveState(comp: *Compilation) !void {
             // TODO handle the union safety field
             //addBuf(&bufs, mem.sliceAsBytes(wasm.mir_instructions.items(.data)));
             addBuf(&bufs, mem.sliceAsBytes(wasm.mir_extra.items));
-            addBuf(&bufs, mem.sliceAsBytes(wasm.all_zcu_locals.items));
+            addBuf(&bufs, mem.sliceAsBytes(wasm.mir_locals.items));
             addBuf(&bufs, mem.sliceAsBytes(wasm.tag_name_bytes.items));
             addBuf(&bufs, mem.sliceAsBytes(wasm.tag_name_offs.items));
 
src/link.zig
@@ -759,7 +759,6 @@ pub const File = struct {
         switch (base.tag) {
             .lld => unreachable,
             inline else => |tag| {
-                if (tag == .wasm) @panic("MLUGG TODO");
                 if (tag == .spirv) @panic("MLUGG TODO");
                 dev.check(tag.devFeature());
                 return @as(*tag.Type(), @fieldParentPtr("base", base)).updateFunc(pt, func_index, mir, maybe_undef_air);
@@ -1450,12 +1449,12 @@ pub fn doZcuTask(comp: *Compilation, tid: usize, task: ZcuTask) void {
             const nav = zcu.funcInfo(func.func).owner_nav;
             const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid));
             defer pt.deactivate();
-            assert(zcu.llvm_object == null); // LLVM codegen doesn't produce MIR
             switch (func.mir.status.load(.monotonic)) {
                 .pending => unreachable,
                 .ready => {},
                 .failed => return,
             }
+            assert(zcu.llvm_object == null); // LLVM codegen doesn't produce MIR
             const mir = &func.mir.value;
             if (comp.bin_file) |lf| {
                 lf.updateFunc(pt, func.func, mir, func.air) catch |err| switch (err) {
src/target.zig
@@ -851,7 +851,7 @@ pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, compt
         .separate_thread => switch (backend) {
             .stage2_llvm => false,
             // MLUGG TODO
-            .stage2_c => true,
+            .stage2_c, .stage2_wasm => true,
             else => false,
         },
     };