Commit e521879e47

Andrew Kelley <andrew@ziglang.org>
2024-12-06 08:46:46
rewrite wasm/Emit.zig
mainly, rework how relocations works. This is the point at which symbol indexes are known - not before. And don't emit unnecessary relocations! They're only needed when emitting an object file. Changes wasm linker to keep MIR around long-lived so that fixups can be reapplied after linker garbage collection. use labeled switch while we're at it
1 parent b9355ed
src/arch/wasm/CodeGen.zig
@@ -7,6 +7,7 @@ const leb = std.leb;
 const mem = std.mem;
 const log = std.log.scoped(.codegen);
 
+const CodeGen = @This();
 const codegen = @import("../../codegen.zig");
 const Zcu = @import("../../Zcu.zig");
 const InternPool = @import("../../InternPool.zig");
@@ -24,6 +25,98 @@ const abi = @import("abi.zig");
 const Alignment = InternPool.Alignment;
 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
 const errUnionErrorOffset = codegen.errUnionErrorOffset;
+const Wasm = link.File.Wasm;
+
+/// Reference to the function declaration the code
+/// section belongs to
+owner_nav: InternPool.Nav.Index,
+/// Current block depth. Used to calculate the relative difference between a break
+/// and block
+block_depth: u32 = 0,
+air: Air,
+liveness: Liveness,
+gpa: mem.Allocator,
+func_index: InternPool.Index,
+/// Contains a list of current branches.
+/// When we return from a branch, the branch will be popped from this list,
+/// which means branches can only contain references from within its own branch,
+/// or a branch higher (lower index) in the tree.
+branches: std.ArrayListUnmanaged(Branch) = .empty,
+/// Table to save `WValue`'s generated by an `Air.Inst`
+// values: ValueTable,
+/// Mapping from Air.Inst.Index to block ids
+blocks: std.AutoArrayHashMapUnmanaged(Air.Inst.Index, struct {
+    label: u32,
+    value: WValue,
+}) = .{},
+/// Maps `loop` instructions to their label. `br` to here repeats the loop.
+loops: std.AutoHashMapUnmanaged(Air.Inst.Index, u32) = .empty,
+/// The index the next local generated will have
+/// NOTE: arguments share the index with locals therefore the first variable
+/// will have the index that comes after the last argument's index
+local_index: u32,
+/// The index of the current argument.
+/// Used to track which argument is being referenced in `airArg`.
+arg_index: u32 = 0,
+/// List of simd128 immediates. Each value is stored as an array of bytes.
+/// This list will only be populated for 128bit-simd values when the target features
+/// are enabled also.
+simd_immediates: std.ArrayListUnmanaged([16]u8) = .empty,
+/// The Target we're emitting (used to call intInfo)
+target: *const std.Target,
+wasm: *link.File.Wasm,
+pt: Zcu.PerThread,
+/// List of MIR Instructions
+mir_instructions: *std.MultiArrayList(Mir.Inst),
+/// Contains extra data for MIR
+mir_extra: *std.ArrayListUnmanaged(u32),
+/// List of all locals' types generated throughout this declaration
+/// used to emit locals count at start of 'code' section.
+locals: *std.ArrayListUnmanaged(u8),
+/// When a function is executing, we store the the current stack pointer's value within this local.
+/// This value is then used to restore the stack pointer to the original value at the return of the function.
+initial_stack_value: WValue = .none,
+/// The current stack pointer subtracted with the stack size. From this value, we will calculate
+/// all offsets of the stack values.
+bottom_stack_value: WValue = .none,
+/// Arguments of this function declaration
+/// This will be set after `resolveCallingConventionValues`
+args: []WValue,
+/// This will only be `.none` if the function returns void, or returns an immediate.
+/// When it returns a pointer to the stack, the `.local` tag will be active and must be populated
+/// before this function returns its execution to the caller.
+return_value: WValue,
+/// The size of the stack this function occupies. In the function prologue
+/// we will move the stack pointer by this number, forward aligned with the `stack_alignment`.
+stack_size: u32 = 0,
+/// The stack alignment, which is 16 bytes by default. This is specified by the
+/// tool-conventions: https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md
+/// and also what the llvm backend will emit.
+/// However, local variables or the usage of `incoming_stack_alignment` in a `CallingConvention` can overwrite this default.
+stack_alignment: Alignment = .@"16",
+
+// For each individual Wasm valtype we store a seperate free list which
+// allows us to re-use locals that are no longer used. e.g. a temporary local.
+/// A list of indexes which represents a local of valtype `i32`.
+/// It is illegal to store a non-i32 valtype in this list.
+free_locals_i32: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `i64`.
+/// It is illegal to store a non-i64 valtype in this list.
+free_locals_i64: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `f32`.
+/// It is illegal to store a non-f32 valtype in this list.
+free_locals_f32: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `f64`.
+/// It is illegal to store a non-f64 valtype in this list.
+free_locals_f64: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `v127`.
+/// It is illegal to store a non-v128 valtype in this list.
+free_locals_v128: std.ArrayListUnmanaged(u32) = .empty,
+
+/// When in debug mode, this tracks if no `finishAir` was missed.
+/// Forgetting to call `finishAir` will cause the result to not be
+/// stored in our `values` map and therefore cause bugs.
+air_bookkeeping: @TypeOf(bookkeeping_init) = bookkeeping_init,
 
 /// Wasm Value, created when generating an instruction
 const WValue = union(enum) {
@@ -601,104 +694,6 @@ test "Wasm - buildOpcode" {
 /// Hashmap to store generated `WValue` for each `Air.Inst.Ref`
 pub const ValueTable = std.AutoArrayHashMapUnmanaged(Air.Inst.Ref, WValue);
 
-const CodeGen = @This();
-
-/// Reference to the function declaration the code
-/// section belongs to
-owner_nav: InternPool.Nav.Index,
-src_loc: Zcu.LazySrcLoc,
-/// Current block depth. Used to calculate the relative difference between a break
-/// and block
-block_depth: u32 = 0,
-air: Air,
-liveness: Liveness,
-gpa: mem.Allocator,
-debug_output: link.File.DebugInfoOutput,
-func_index: InternPool.Index,
-/// Contains a list of current branches.
-/// When we return from a branch, the branch will be popped from this list,
-/// which means branches can only contain references from within its own branch,
-/// or a branch higher (lower index) in the tree.
-branches: std.ArrayListUnmanaged(Branch) = .empty,
-/// Table to save `WValue`'s generated by an `Air.Inst`
-// values: ValueTable,
-/// Mapping from Air.Inst.Index to block ids
-blocks: std.AutoArrayHashMapUnmanaged(Air.Inst.Index, struct {
-    label: u32,
-    value: WValue,
-}) = .{},
-/// Maps `loop` instructions to their label. `br` to here repeats the loop.
-loops: std.AutoHashMapUnmanaged(Air.Inst.Index, u32) = .empty,
-/// `bytes` contains the wasm bytecode belonging to the 'code' section.
-code: *std.ArrayListUnmanaged(u8),
-/// The index the next local generated will have
-/// NOTE: arguments share the index with locals therefore the first variable
-/// will have the index that comes after the last argument's index
-local_index: u32 = 0,
-/// The index of the current argument.
-/// Used to track which argument is being referenced in `airArg`.
-arg_index: u32 = 0,
-/// List of all locals' types generated throughout this declaration
-/// used to emit locals count at start of 'code' section.
-locals: std.ArrayListUnmanaged(u8),
-/// List of simd128 immediates. Each value is stored as an array of bytes.
-/// This list will only be populated for 128bit-simd values when the target features
-/// are enabled also.
-simd_immediates: std.ArrayListUnmanaged([16]u8) = .empty,
-/// The Target we're emitting (used to call intInfo)
-target: *const std.Target,
-/// Represents the wasm binary file that is being linked.
-bin_file: *link.File.Wasm,
-pt: Zcu.PerThread,
-/// List of MIR Instructions
-mir_instructions: std.MultiArrayList(Mir.Inst) = .{},
-/// Contains extra data for MIR
-mir_extra: std.ArrayListUnmanaged(u32) = .empty,
-/// When a function is executing, we store the the current stack pointer's value within this local.
-/// This value is then used to restore the stack pointer to the original value at the return of the function.
-initial_stack_value: WValue = .none,
-/// The current stack pointer subtracted with the stack size. From this value, we will calculate
-/// all offsets of the stack values.
-bottom_stack_value: WValue = .none,
-/// Arguments of this function declaration
-/// This will be set after `resolveCallingConventionValues`
-args: []WValue = &.{},
-/// This will only be `.none` if the function returns void, or returns an immediate.
-/// When it returns a pointer to the stack, the `.local` tag will be active and must be populated
-/// before this function returns its execution to the caller.
-return_value: WValue = .none,
-/// The size of the stack this function occupies. In the function prologue
-/// we will move the stack pointer by this number, forward aligned with the `stack_alignment`.
-stack_size: u32 = 0,
-/// The stack alignment, which is 16 bytes by default. This is specified by the
-/// tool-conventions: https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md
-/// and also what the llvm backend will emit.
-/// However, local variables or the usage of `incoming_stack_alignment` in a `CallingConvention` can overwrite this default.
-stack_alignment: Alignment = .@"16",
-
-// For each individual Wasm valtype we store a seperate free list which
-// allows us to re-use locals that are no longer used. e.g. a temporary local.
-/// A list of indexes which represents a local of valtype `i32`.
-/// It is illegal to store a non-i32 valtype in this list.
-free_locals_i32: std.ArrayListUnmanaged(u32) = .empty,
-/// A list of indexes which represents a local of valtype `i64`.
-/// It is illegal to store a non-i64 valtype in this list.
-free_locals_i64: std.ArrayListUnmanaged(u32) = .empty,
-/// A list of indexes which represents a local of valtype `f32`.
-/// It is illegal to store a non-f32 valtype in this list.
-free_locals_f32: std.ArrayListUnmanaged(u32) = .empty,
-/// A list of indexes which represents a local of valtype `f64`.
-/// It is illegal to store a non-f64 valtype in this list.
-free_locals_f64: std.ArrayListUnmanaged(u32) = .empty,
-/// A list of indexes which represents a local of valtype `v127`.
-/// It is illegal to store a non-v128 valtype in this list.
-free_locals_v128: std.ArrayListUnmanaged(u32) = .empty,
-
-/// When in debug mode, this tracks if no `finishAir` was missed.
-/// Forgetting to call `finishAir` will cause the result to not be
-/// stored in our `values` map and therefore cause bugs.
-air_bookkeeping: @TypeOf(bookkeeping_init) = bookkeeping_init,
-
 const bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {};
 
 const InnerError = error{
@@ -719,8 +714,6 @@ pub fn deinit(func: *CodeGen) void {
     func.loops.deinit(func.gpa);
     func.locals.deinit(func.gpa);
     func.simd_immediates.deinit(func.gpa);
-    func.mir_instructions.deinit(func.gpa);
-    func.mir_extra.deinit(func.gpa);
     func.free_locals_i32.deinit(func.gpa);
     func.free_locals_i64.deinit(func.gpa);
     func.free_locals_f32.deinit(func.gpa);
@@ -729,9 +722,10 @@ pub fn deinit(func: *CodeGen) void {
     func.* = undefined;
 }
 
-fn fail(func: *CodeGen, comptime fmt: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } {
-    const msg = try Zcu.ErrorMsg.create(func.gpa, func.src_loc, fmt, args);
-    return func.pt.zcu.codegenFailMsg(func.owner_nav, msg);
+fn fail(cg: *CodeGen, comptime fmt: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } {
+    const zcu = cg.pt.zcu;
+    const func = zcu.funcInfo(cg.func_index);
+    return zcu.codegenFail(func.owner_nav, fmt, args);
 }
 
 /// Resolves the `WValue` for the given instruction `inst`
@@ -767,7 +761,7 @@ fn resolveInst(func: *CodeGen, ref: Air.Inst.Ref) InnerError!WValue {
     //
     // In the other cases, we will simply lower the constant to a value that fits
     // into a single local (such as a pointer, integer, bool, etc).
-    const result: WValue = if (isByRef(ty, pt, func.target.*))
+    const result: WValue = if (isByRef(ty, pt, func.target))
         .{ .memory = val.toIntern() }
     else
         try func.lowerConstant(val, ty);
@@ -885,8 +879,12 @@ fn addLabel(func: *CodeGen, tag: Mir.Inst.Tag, label: u32) error{OutOfMemory}!vo
     try func.addInst(.{ .tag = tag, .data = .{ .label = label } });
 }
 
-fn addCallTagName(func: *CodeGen, ip_index: InternPool.Index) error{OutOfMemory}!void {
-    try func.addInst(.{ .tag = .call_tag_name, .data = .{ .ip_index = ip_index } });
+fn addIpIndex(func: *CodeGen, tag: Mir.Inst.Tag, i: InternPool.Index) Allocator.Error!void {
+    try func.addInst(.{ .tag = tag, .data = .{ .ip_index = i } });
+}
+
+fn addNav(func: *CodeGen, tag: Mir.Inst.Tag, i: InternPool.Nav.Index) Allocator.Error!void {
+    try func.addInst(.{ .tag = tag, .data = .{ .nav_index = i } });
 }
 
 /// Accepts an unsigned 32bit integer rather than a signed integer to
@@ -900,7 +898,7 @@ fn addImm32(func: *CodeGen, imm: u32) error{OutOfMemory}!void {
 /// prevent us from having to bitcast multiple times as most values
 /// within codegen are represented as unsigned rather than signed.
 fn addImm64(func: *CodeGen, imm: u64) error{OutOfMemory}!void {
-    const extra_index = try func.addExtra(Mir.Imm64.fromU64(imm));
+    const extra_index = try func.addExtra(Mir.Imm64.init(imm));
     try func.addInst(.{ .tag = .i64_const, .data = .{ .payload = extra_index } });
 }
 
@@ -916,7 +914,7 @@ fn addImm128(func: *CodeGen, index: u32) error{OutOfMemory}!void {
 }
 
 fn addFloat64(func: *CodeGen, float: f64) error{OutOfMemory}!void {
-    const extra_index = try func.addExtra(Mir.Float64.fromFloat64(float));
+    const extra_index = try func.addExtra(Mir.Float64.init(float));
     try func.addInst(.{ .tag = .f64_const, .data = .{ .payload = extra_index } });
 }
 
@@ -956,6 +954,8 @@ fn addExtraAssumeCapacity(func: *CodeGen, extra: anytype) error{OutOfMemory}!u32
     inline for (fields) |field| {
         func.mir_extra.appendAssumeCapacity(switch (field.type) {
             u32 => @field(extra, field.name),
+            i32 => @bitCast(@field(extra, field.name)),
+            InternPool.Index => @intFromEnum(@field(extra, field.name)),
             else => |field_type| @compileError("Unsupported field type " ++ @typeName(field_type)),
         });
     }
@@ -963,11 +963,11 @@ fn addExtraAssumeCapacity(func: *CodeGen, extra: anytype) error{OutOfMemory}!u32
 }
 
 /// Using a given `Type`, returns the corresponding valtype for .auto callconv
-fn typeToValtype(ty: Type, pt: Zcu.PerThread, target: std.Target) std.wasm.Valtype {
+fn typeToValtype(ty: Type, pt: Zcu.PerThread, target: *const std.Target) std.wasm.Valtype {
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
     return switch (ty.zigTypeTag(zcu)) {
-        .float => switch (ty.floatBits(target)) {
+        .float => switch (ty.floatBits(target.*)) {
             16 => .i32, // stored/loaded as u16
             32 => .f32,
             64 => .f64,
@@ -1003,14 +1003,14 @@ fn typeToValtype(ty: Type, pt: Zcu.PerThread, target: std.Target) std.wasm.Valty
 }
 
 /// Using a given `Type`, returns the byte representation of its wasm value type
-fn genValtype(ty: Type, pt: Zcu.PerThread, target: std.Target) u8 {
+fn genValtype(ty: Type, pt: Zcu.PerThread, target: *const std.Target) u8 {
     return @intFromEnum(typeToValtype(ty, pt, target));
 }
 
 /// Using a given `Type`, returns the corresponding wasm value type
 /// Differently from `genValtype` this also allows `void` to create a block
 /// with no return type
-fn genBlockType(ty: Type, pt: Zcu.PerThread, target: std.Target) u8 {
+fn genBlockType(ty: Type, pt: Zcu.PerThread, target: *const std.Target) u8 {
     return switch (ty.ip_index) {
         .void_type, .noreturn_type => std.wasm.block_empty,
         else => genValtype(ty, pt, target),
@@ -1028,15 +1028,17 @@ fn emitWValue(func: *CodeGen, value: WValue) InnerError!void {
         .imm128 => |val| try func.addImm128(val),
         .float32 => |val| try func.addInst(.{ .tag = .f32_const, .data = .{ .float32 = val } }),
         .float64 => |val| try func.addFloat64(val),
-        .memory => |ptr| {
-            const extra_index = try func.addExtra(Mir.Memory{ .pointer = ptr, .offset = 0 });
-            try func.addInst(.{ .tag = .memory_address, .data = .{ .payload = extra_index } });
-        },
-        .memory_offset => |mem_off| {
-            const extra_index = try func.addExtra(Mir.Memory{ .pointer = mem_off.pointer, .offset = mem_off.offset });
-            try func.addInst(.{ .tag = .memory_address, .data = .{ .payload = extra_index } });
-        },
-        .function_index => |index| try func.addLabel(.function_index, index), // write function index and generate relocation
+        .memory => |ptr| try func.addInst(.{ .tag = .uav_ref, .data = .{ .ip_index = ptr } }),
+        .memory_offset => |mo| try func.addInst(.{
+            .tag = .uav_ref_off,
+            .data = .{
+                .payload = try func.addExtra(Mir.UavRefOff{
+                    .ip_index = mo.pointer,
+                    .offset = @intCast(mo.offset), // TODO should not be an assert
+                }),
+            },
+        }),
+        .function_index => |index| try func.addIpIndex(.function_index, index),
         .stack_offset => try func.addLabel(.local_get, func.bottom_stack_value.local.value), // caller must ensure to address the offset
     }
 }
@@ -1075,7 +1077,7 @@ fn getResolvedInst(func: *CodeGen, ref: Air.Inst.Ref) *WValue {
 /// Returns a corresponding `Wvalue` with `local` as active tag
 fn allocLocal(func: *CodeGen, ty: Type) InnerError!WValue {
     const pt = func.pt;
-    const valtype = typeToValtype(ty, pt, func.target.*);
+    const valtype = typeToValtype(ty, pt, func.target);
     const index_or_null = switch (valtype) {
         .i32 => func.free_locals_i32.popOrNull(),
         .i64 => func.free_locals_i64.popOrNull(),
@@ -1095,7 +1097,7 @@ fn allocLocal(func: *CodeGen, ty: Type) InnerError!WValue {
 /// to use a zero-initialized local.
 fn ensureAllocLocal(func: *CodeGen, ty: Type) InnerError!WValue {
     const pt = func.pt;
-    try func.locals.append(func.gpa, genValtype(ty, pt, func.target.*));
+    try func.locals.append(func.gpa, genValtype(ty, pt, func.target));
     const initial_index = func.local_index;
     func.local_index += 1;
     return .{ .local = .{ .value = initial_index, .references = 1 } };
@@ -1107,7 +1109,7 @@ fn genFunctype(
     params: []const InternPool.Index,
     return_type: Type,
     pt: Zcu.PerThread,
-    target: std.Target,
+    target: *const std.Target,
 ) !link.File.Wasm.FunctionType.Index {
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
@@ -1162,150 +1164,206 @@ fn genFunctype(
     });
 }
 
-pub fn generate(
-    bin_file: *link.File,
+pub const Function = extern struct {
+    /// Index into `Wasm.mir_instructions`.
+    mir_off: u32,
+    /// This is unused except for as a safety slice bound and could be removed.
+    mir_len: u32,
+    /// Index into `Wasm.mir_extra`.
+    mir_extra_off: u32,
+    /// This is unused except for as a safety slice bound and could be removed.
+    mir_extra_len: u32,
+    locals_off: u32,
+    locals_len: u32,
+    prologue: Prologue,
+
+    pub const Prologue = extern struct {
+        flags: Flags,
+        sp_local: u32,
+        stack_size: u32,
+        bottom_stack_local: u32,
+
+        pub const Flags = packed struct(u32) {
+            stack_alignment: Alignment,
+            padding: u26 = 0,
+        };
+
+        pub const none: Prologue = .{
+            .sp_local = 0,
+            .flags = .{ .stack_alignment = .none },
+            .stack_size = 0,
+            .bottom_stack_local = 0,
+        };
+
+        pub fn isNone(p: *const Prologue) bool {
+            return p.flags.stack_alignment != .none;
+        }
+    };
+
+    pub fn lower(f: *Function, wasm: *const Wasm, code: *std.ArrayList(u8)) Allocator.Error!void {
+        const gpa = wasm.base.comp.gpa;
+
+        // Write the locals in the prologue of the function body.
+        const locals = wasm.all_zcu_locals[f.locals_off..][0..f.locals_len];
+        try code.ensureUnusedCapacity(gpa, 5 + locals.len * 6 + 38);
+
+        std.leb.writeUleb128(code.writer(gpa), @as(u32, @intCast(locals.len))) catch unreachable;
+        for (locals) |local| {
+            std.leb.writeUleb128(code.writer(gpa), @as(u32, 1)) catch unreachable;
+            code.appendAssumeCapacity(local);
+        }
+
+        // Stack management section of function prologue.
+        const stack_alignment = f.prologue.flags.stack_alignment;
+        if (stack_alignment.toByteUnits()) |align_bytes| {
+            const sp_global = try wasm.stackPointerGlobalIndex();
+            // load stack pointer
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.global_get));
+            std.leb.writeULEB128(code.writer(gpa), @intFromEnum(sp_global)) catch unreachable;
+            // store stack pointer so we can restore it when we return from the function
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_tee));
+            leb.writeUleb128(code.writer(gpa), f.prologue.sp_local) catch unreachable;
+            // get the total stack size
+            const aligned_stack: i32 = @intCast(f.stack_alignment.forward(f.prologue.stack_size));
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
+            leb.writeIleb128(code.writer(gpa), aligned_stack) catch unreachable;
+            // subtract it from the current stack pointer
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_sub));
+            // Get negative stack alignment
+            const neg_stack_align = @as(i32, @intCast(align_bytes)) * -1;
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
+            leb.writeIleb128(code.writer(gpa), neg_stack_align) catch unreachable;
+            // Bitwise-and the value to get the new stack pointer to ensure the
+            // pointers are aligned with the abi alignment.
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_and));
+            // The bottom will be used to calculate all stack pointer offsets.
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.local_tee));
+            leb.writeUleb128(code.writer(gpa), f.prologue.bottom_stack_local) catch unreachable;
+            // Store the current stack pointer value into the global stack pointer so other function calls will
+            // start from this value instead and not overwrite the current stack.
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.global_set));
+            std.leb.writeULEB128(code.writer(gpa), @intFromEnum(sp_global)) catch unreachable;
+        }
+
+        var emit: Emit = .{
+            .mir = .{
+                .instruction_tags = wasm.mir_instructions.items(.tag)[f.mir_off..][0..f.mir_len],
+                .instruction_datas = wasm.mir_instructions.items(.data)[f.mir_off..][0..f.mir_len],
+                .extra = wasm.mir_extra[f.mir_extra_off..][0..f.mir_extra_len],
+            },
+            .wasm = wasm,
+            .code = code,
+        };
+        try emit.lowerToCode();
+    }
+};
+
+pub const Error = error{
+    OutOfMemory,
+    /// Compiler was asked to operate on a number larger than supported.
+    Overflow,
+    /// Indicates the error is already stored in Zcu `failed_codegen`.
+    CodegenFail,
+};
+
+pub fn function(
+    wasm: *Wasm,
     pt: Zcu.PerThread,
-    src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
     air: Air,
     liveness: Liveness,
-    code: *std.ArrayListUnmanaged(u8),
-    debug_output: link.File.DebugInfoOutput,
-) codegen.CodeGenError!void {
+) Error!Function {
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
     const func = zcu.funcInfo(func_index);
     const file_scope = zcu.navFileScope(func.owner_nav);
     const target = &file_scope.mod.resolved_target.result;
+    const fn_ty = zcu.navValue(func.owner_nav).typeOf(zcu);
+    const fn_info = zcu.typeToFunc(fn_ty).?;
+    const ip = &zcu.intern_pool;
+    const fn_ty_index = try genFunctype(wasm, fn_info.cc, fn_info.param_types.get(ip), Type.fromInterned(fn_info.return_type), pt, target);
+    const returns = fn_ty_index.ptr(wasm).returns.slice(wasm);
+    const any_returns = returns.len != 0;
+
+    var cc_result = try resolveCallingConventionValues(pt, fn_ty, target);
+    defer cc_result.deinit(gpa);
+
     var code_gen: CodeGen = .{
         .gpa = gpa,
         .pt = pt,
         .air = air,
         .liveness = liveness,
-        .code = code,
         .owner_nav = func.owner_nav,
-        .src_loc = src_loc,
-        .locals = .{},
         .target = target,
-        .bin_file = bin_file.cast(.wasm).?,
-        .debug_output = debug_output,
+        .wasm = wasm,
         .func_index = func_index,
+        .args = cc_result.args,
+        .return_value = cc_result.return_value,
+        .local_index = cc_result.local_index,
+        .mir_instructions = &wasm.mir_instructions,
+        .mir_extra = &wasm.mir_extra,
+        .locals = &wasm.all_zcu_locals,
     };
     defer code_gen.deinit();
 
-    genFunc(&code_gen) catch |err| switch (err) {
+    return functionInner(&code_gen, any_returns) catch |err| switch (err) {
         error.CodegenFail => return error.CodegenFail,
         else => |e| return code_gen.fail("failed to generate function: {s}", .{@errorName(e)}),
     };
 }
 
-fn genFunc(func: *CodeGen) InnerError!void {
-    const wasm = func.bin_file;
-    const pt = func.pt;
+fn functionInner(cg: *CodeGen, any_returns: bool) InnerError!Function {
+    const wasm = cg.wasm;
+    const pt = cg.pt;
     const zcu = pt.zcu;
-    const ip = &zcu.intern_pool;
-    const fn_ty = zcu.navValue(func.owner_nav).typeOf(zcu);
-    const fn_info = zcu.typeToFunc(fn_ty).?;
-    const fn_ty_index = try genFunctype(wasm, fn_info.cc, fn_info.param_types.get(ip), Type.fromInterned(fn_info.return_type), pt, func.target.*);
 
-    var cc_result = try func.resolveCallingConventionValues(fn_ty);
-    defer cc_result.deinit(func.gpa);
+    const start_mir_off: u32 = @intCast(wasm.mir_instructions.len);
+    const start_mir_extra_off: u32 = @intCast(wasm.mir_extra.items.len);
+    const start_locals_off: u32 = @intCast(wasm.all_zcu_locals.items.len);
 
-    func.args = cc_result.args;
-    func.return_value = cc_result.return_value;
-
-    try func.addTag(.dbg_prologue_end);
-
-    try func.branches.append(func.gpa, .{});
+    try cg.branches.append(cg.gpa, .{});
     // clean up outer branch
     defer {
-        var outer_branch = func.branches.pop();
-        outer_branch.deinit(func.gpa);
-        assert(func.branches.items.len == 0); // missing branch merge
+        var outer_branch = cg.branches.pop();
+        outer_branch.deinit(cg.gpa);
+        assert(cg.branches.items.len == 0); // missing branch merge
     }
     // Generate MIR for function body
-    try func.genBody(func.air.getMainBody());
+    try cg.genBody(cg.air.getMainBody());
 
     // In case we have a return value, but the last instruction is a noreturn (such as a while loop)
     // we emit an unreachable instruction to tell the stack validator that part will never be reached.
-    const returns = fn_ty_index.ptr(wasm).returns.slice(wasm);
-    if (returns.len != 0 and func.air.instructions.len > 0) {
-        const inst: Air.Inst.Index = @enumFromInt(func.air.instructions.len - 1);
-        const last_inst_ty = func.typeOfIndex(inst);
+    if (any_returns and cg.air.instructions.len > 0) {
+        const inst: Air.Inst.Index = @enumFromInt(cg.air.instructions.len - 1);
+        const last_inst_ty = cg.typeOfIndex(inst);
         if (!last_inst_ty.hasRuntimeBitsIgnoreComptime(zcu) or last_inst_ty.isNoReturn(zcu)) {
-            try func.addTag(.@"unreachable");
+            try cg.addTag(.@"unreachable");
         }
     }
     // End of function body
-    try func.addTag(.end);
-
-    try func.addTag(.dbg_epilogue_begin);
-
-    // check if we have to initialize and allocate anything into the stack frame.
-    // If so, create enough stack space and insert the instructions at the front of the list.
-    if (func.initial_stack_value != .none) {
-        var prologue = std.ArrayList(Mir.Inst).init(func.gpa);
-        defer prologue.deinit();
-
-        const sp = @intFromEnum(wasm.zig_object.?.stack_pointer_sym);
-        // load stack pointer
-        try prologue.append(.{ .tag = .global_get, .data = .{ .label = sp } });
-        // store stack pointer so we can restore it when we return from the function
-        try prologue.append(.{ .tag = .local_tee, .data = .{ .label = func.initial_stack_value.local.value } });
-        // get the total stack size
-        const aligned_stack = func.stack_alignment.forward(func.stack_size);
-        try prologue.append(.{ .tag = .i32_const, .data = .{ .imm32 = @intCast(aligned_stack) } });
-        // subtract it from the current stack pointer
-        try prologue.append(.{ .tag = .i32_sub, .data = .{ .tag = {} } });
-        // Get negative stack alignment
-        try prologue.append(.{ .tag = .i32_const, .data = .{ .imm32 = @as(i32, @intCast(func.stack_alignment.toByteUnits().?)) * -1 } });
-        // Bitwise-and the value to get the new stack pointer to ensure the pointers are aligned with the abi alignment
-        try prologue.append(.{ .tag = .i32_and, .data = .{ .tag = {} } });
-        // store the current stack pointer as the bottom, which will be used to calculate all stack pointer offsets
-        try prologue.append(.{ .tag = .local_tee, .data = .{ .label = func.bottom_stack_value.local.value } });
-        // Store the current stack pointer value into the global stack pointer so other function calls will
-        // start from this value instead and not overwrite the current stack.
-        try prologue.append(.{ .tag = .global_set, .data = .{ .label = sp } });
-
-        // reserve space and insert all prologue instructions at the front of the instruction list
-        // We insert them in reserve order as there is no insertSlice in multiArrayList.
-        try func.mir_instructions.ensureUnusedCapacity(func.gpa, prologue.items.len);
-        for (prologue.items, 0..) |_, index| {
-            const inst = prologue.items[prologue.items.len - 1 - index];
-            func.mir_instructions.insertAssumeCapacity(0, inst);
-        }
-    }
-
-    var mir: Mir = .{
-        .instructions = func.mir_instructions.toOwnedSlice(),
-        .extra = try func.mir_extra.toOwnedSlice(func.gpa),
-    };
-    defer mir.deinit(func.gpa);
-
-    var emit: Emit = .{
-        .mir = mir,
-        .bin_file = wasm,
-        .code = func.code,
-        .locals = func.locals.items,
-        .owner_nav = func.owner_nav,
-        .dbg_output = func.debug_output,
-        .prev_di_line = 0,
-        .prev_di_column = 0,
-        .prev_di_offset = 0,
-    };
-
-    emit.emitMir() catch |err| switch (err) {
-        error.EmitFail => {
-            func.err_msg = emit.error_msg.?;
-            return error.CodegenFail;
+    try cg.addTag(.end);
+    try cg.addTag(.dbg_epilogue_begin);
+
+    return .{
+        .mir_off = start_mir_off,
+        .mir_len = @intCast(wasm.mir_instructions.len - start_mir_off),
+        .mir_extra_off = start_mir_extra_off,
+        .mir_extra_len = @intCast(wasm.mir_extra.items.len - start_mir_extra_off),
+        .locals_off = start_locals_off,
+        .locals_len = @intCast(wasm.all_zcu_locals.items.len - start_locals_off),
+        .prologue = if (cg.initial_stack_value == .none) .none else .{
+            .sp_local = cg.initial_stack_value.local.value,
+            .flags = .{ .stack_alignment = cg.stack_alignment },
+            .stack_size = cg.stack_size,
+            .bottom_stack_local = cg.bottom_stack_value.local.value,
         },
-        else => |e| return e,
     };
 }
 
 const CallWValues = struct {
     args: []WValue,
     return_value: WValue,
+    local_index: u32,
 
     fn deinit(values: *CallWValues, gpa: Allocator) void {
         gpa.free(values.args);
@@ -1313,28 +1371,34 @@ const CallWValues = struct {
     }
 };
 
-fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWValues {
-    const pt = func.pt;
+fn resolveCallingConventionValues(
+    pt: Zcu.PerThread,
+    fn_ty: Type,
+    target: *const std.Target,
+) Allocator.Error!CallWValues {
     const zcu = pt.zcu;
+    const gpa = zcu.gpa;
     const ip = &zcu.intern_pool;
     const fn_info = zcu.typeToFunc(fn_ty).?;
     const cc = fn_info.cc;
+
     var result: CallWValues = .{
         .args = &.{},
         .return_value = .none,
+        .local_index = 0,
     };
     if (cc == .naked) return result;
 
-    var args = std.ArrayList(WValue).init(func.gpa);
+    var args = std.ArrayList(WValue).init(gpa);
     defer args.deinit();
 
     // Check if we store the result as a pointer to the stack rather than
     // by value
-    if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, func.target.*)) {
+    if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, target)) {
         // the sret arg will be passed as first argument, therefore we
         // set the `return_value` before allocating locals for regular args.
-        result.return_value = .{ .local = .{ .value = func.local_index, .references = 1 } };
-        func.local_index += 1;
+        result.return_value = .{ .local = .{ .value = result.local_index, .references = 1 } };
+        result.local_index += 1;
     }
 
     switch (cc) {
@@ -1344,8 +1408,8 @@ fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWV
                     continue;
                 }
 
-                try args.append(.{ .local = .{ .value = func.local_index, .references = 1 } });
-                func.local_index += 1;
+                try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
+                result.local_index += 1;
             }
         },
         .wasm_watc => {
@@ -1353,18 +1417,23 @@ fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWV
                 const ty_classes = abi.classifyType(Type.fromInterned(ty), zcu);
                 for (ty_classes) |class| {
                     if (class == .none) continue;
-                    try args.append(.{ .local = .{ .value = func.local_index, .references = 1 } });
-                    func.local_index += 1;
+                    try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
+                    result.local_index += 1;
                 }
             }
         },
-        else => return func.fail("calling convention '{s}' not supported for Wasm", .{@tagName(cc)}),
+        else => unreachable, // Frontend is responsible for emitting an error earlier.
     }
     result.args = try args.toOwnedSlice();
     return result;
 }
 
-fn firstParamSRet(cc: std.builtin.CallingConvention, return_type: Type, pt: Zcu.PerThread, target: std.Target) bool {
+fn firstParamSRet(
+    cc: std.builtin.CallingConvention,
+    return_type: Type,
+    pt: Zcu.PerThread,
+    target: *const std.Target,
+) bool {
     switch (cc) {
         .@"inline" => unreachable,
         .auto => return isByRef(return_type, pt, target),
@@ -1466,8 +1535,7 @@ fn restoreStackPointer(func: *CodeGen) !void {
     // Get the original stack pointer's value
     try func.emitWValue(func.initial_stack_value);
 
-    // save its value in the global stack pointer
-    try func.addLabel(.global_set, @intFromEnum(func.bin_file.zig_object.?.stack_pointer_sym));
+    try func.addTag(.global_set_sp);
 }
 
 /// From a given type, will create space on the virtual stack to store the value of such type.
@@ -1675,7 +1743,7 @@ fn arch(func: *const CodeGen) std.Target.Cpu.Arch {
 
 /// For a given `Type`, will return true when the type will be passed
 /// by reference, rather than by value
-fn isByRef(ty: Type, pt: Zcu.PerThread, target: std.Target) bool {
+fn isByRef(ty: Type, pt: Zcu.PerThread, target: *const std.Target) bool {
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
     switch (ty.zigTypeTag(zcu)) {
@@ -1716,7 +1784,7 @@ fn isByRef(ty: Type, pt: Zcu.PerThread, target: std.Target) bool {
         .vector => return determineSimdStoreStrategy(ty, zcu, target) == .unrolled,
         .int => return ty.intInfo(zcu).bits > 64,
         .@"enum" => return ty.intInfo(zcu).bits > 64,
-        .float => return ty.floatBits(target) > 64,
+        .float => return ty.floatBits(target.*) > 64,
         .error_union => {
             const pl_ty = ty.errorUnionPayload(zcu);
             if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
@@ -1747,7 +1815,7 @@ const SimdStoreStrategy = enum {
 /// This means when a given type is 128 bits and either the simd128 or relaxed-simd
 /// features are enabled, the function will return `.direct`. This would allow to store
 /// it using a instruction, rather than an unrolled version.
-fn determineSimdStoreStrategy(ty: Type, zcu: *Zcu, target: std.Target) SimdStoreStrategy {
+fn determineSimdStoreStrategy(ty: Type, zcu: *Zcu, target: *const std.Target) SimdStoreStrategy {
     assert(ty.zigTypeTag(zcu) == .vector);
     if (ty.bitSize(zcu) != 128) return .unrolled;
     const hasFeature = std.Target.wasm.featureSetHas;
@@ -2076,7 +2144,7 @@ fn airRet(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
                     .op = .load,
                     .width = @as(u8, @intCast(scalar_type.abiSize(zcu) * 8)),
                     .signedness = if (scalar_type.isSignedInt(zcu)) .signed else .unsigned,
-                    .valtype1 = typeToValtype(scalar_type, pt, func.target.*),
+                    .valtype1 = typeToValtype(scalar_type, pt, func.target),
                 });
                 try func.addMemArg(Mir.Inst.Tag.fromOpcode(opcode), .{
                     .offset = operand.offset(),
@@ -2109,7 +2177,7 @@ fn airRetPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         }
 
         const fn_info = zcu.typeToFunc(zcu.navValue(func.owner_nav).typeOf(zcu)).?;
-        if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, func.target.*)) {
+        if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, func.target)) {
             break :result func.return_value;
         }
 
@@ -2131,7 +2199,7 @@ fn airRetLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         if (ret_ty.isError(zcu)) {
             try func.addImm32(0);
         }
-    } else if (!firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, func.target.*)) {
+    } else if (!firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, func.target)) {
         // leave on the stack
         _ = try func.load(operand, ret_ty, 0);
     }
@@ -2142,7 +2210,7 @@ fn airRetLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 }
 
 fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) InnerError!void {
-    const wasm = func.bin_file;
+    const wasm = func.wasm;
     if (modifier == .always_tail) return func.fail("TODO implement tail calls for wasm", .{});
     const pl_op = func.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
     const extra = func.air.extraData(Air.Call, pl_op.payload);
@@ -2159,7 +2227,7 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif
     };
     const ret_ty = fn_ty.fnReturnType(zcu);
     const fn_info = zcu.typeToFunc(fn_ty).?;
-    const first_param_sret = firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, func.target.*);
+    const first_param_sret = firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), pt, func.target);
 
     const callee: ?InternPool.Nav.Index = blk: {
         const func_val = (try func.air.value(pl_op.operand, pt)) orelse break :blk null;
@@ -2199,7 +2267,7 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif
         const operand = try func.resolveInst(pl_op.operand);
         try func.emitWValue(operand);
 
-        const fn_type_index = try genFunctype(wasm, fn_info.cc, fn_info.param_types.get(ip), Type.fromInterned(fn_info.return_type), pt, func.target.*);
+        const fn_type_index = try genFunctype(wasm, fn_info.cc, fn_info.param_types.get(ip), Type.fromInterned(fn_info.return_type), pt, func.target);
         try func.addLabel(.call_indirect, @intFromEnum(fn_type_index));
     }
 
@@ -2260,7 +2328,7 @@ fn airStore(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void
         // load the value, and then shift+or the rhs into the result location.
         const int_elem_ty = try pt.intType(.unsigned, ptr_info.packed_offset.host_size * 8);
 
-        if (isByRef(int_elem_ty, pt, func.target.*)) {
+        if (isByRef(int_elem_ty, pt, func.target)) {
             return func.fail("TODO: airStore for pointers to bitfields with backing type larger than 64bits", .{});
         }
 
@@ -2326,11 +2394,11 @@ fn store(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerE
             const len = @as(u32, @intCast(abi_size));
             return func.memcpy(lhs, rhs, .{ .imm32 = len });
         },
-        .@"struct", .array, .@"union" => if (isByRef(ty, pt, func.target.*)) {
+        .@"struct", .array, .@"union" => if (isByRef(ty, pt, func.target)) {
             const len = @as(u32, @intCast(abi_size));
             return func.memcpy(lhs, rhs, .{ .imm32 = len });
         },
-        .vector => switch (determineSimdStoreStrategy(ty, zcu, func.target.*)) {
+        .vector => switch (determineSimdStoreStrategy(ty, zcu, func.target)) {
             .unrolled => {
                 const len: u32 = @intCast(abi_size);
                 return func.memcpy(lhs, rhs, .{ .imm32 = len });
@@ -2388,7 +2456,7 @@ fn store(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerE
     // into lhs, so we calculate that and emit that instead
     try func.lowerToStack(rhs);
 
-    const valtype = typeToValtype(ty, pt, func.target.*);
+    const valtype = typeToValtype(ty, pt, func.target);
     const opcode = buildOpcode(.{
         .valtype1 = valtype,
         .width = @as(u8, @intCast(abi_size * 8)),
@@ -2417,7 +2485,7 @@ fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return func.finishAir(inst, .none, &.{ty_op.operand});
 
     const result = result: {
-        if (isByRef(ty, pt, func.target.*)) {
+        if (isByRef(ty, pt, func.target)) {
             const new_local = try func.allocStack(ty);
             try func.store(new_local, operand, ty, 0);
             break :result new_local;
@@ -2467,7 +2535,7 @@ fn load(func: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValu
 
     const abi_size: u8 = @intCast(ty.abiSize(zcu));
     const opcode = buildOpcode(.{
-        .valtype1 = typeToValtype(ty, pt, func.target.*),
+        .valtype1 = typeToValtype(ty, pt, func.target),
         .width = abi_size * 8,
         .op = .load,
         .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned,
@@ -2517,19 +2585,6 @@ fn airArg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         func.arg_index += 1;
     }
 
-    switch (func.debug_output) {
-        .dwarf => |dwarf| {
-            const name = func.air.instructions.items(.data)[@intFromEnum(inst)].arg.name;
-            if (name != .none) try dwarf.genLocalDebugInfo(
-                .local_arg,
-                name.toSlice(func.air),
-                arg_ty,
-                .{ .wasm_ext = .{ .local = arg.local.value } },
-            );
-        },
-        else => {},
-    }
-
     return func.finishAir(inst, arg, &.{});
 }
 
@@ -2577,7 +2632,7 @@ fn binOp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!
         return func.floatOp(float_op, ty, &.{ lhs, rhs });
     }
 
-    if (isByRef(ty, pt, func.target.*)) {
+    if (isByRef(ty, pt, func.target)) {
         if (ty.zigTypeTag(zcu) == .int) {
             return func.binOpBigInt(lhs, rhs, ty, op);
         } else {
@@ -2590,7 +2645,7 @@ fn binOp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!
 
     const opcode: std.wasm.Opcode = buildOpcode(.{
         .op = op,
-        .valtype1 = typeToValtype(ty, pt, func.target.*),
+        .valtype1 = typeToValtype(ty, pt, func.target),
         .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned,
     });
     try func.emitWValue(lhs);
@@ -2854,7 +2909,7 @@ fn floatOp(func: *CodeGen, float_op: FloatOp, ty: Type, args: []const WValue) In
             for (args) |operand| {
                 try func.emitWValue(operand);
             }
-            const opcode = buildOpcode(.{ .op = op, .valtype1 = typeToValtype(ty, pt, func.target.*) });
+            const opcode = buildOpcode(.{ .op = op, .valtype1 = typeToValtype(ty, pt, func.target) });
             try func.addTag(Mir.Inst.Tag.fromOpcode(opcode));
             return .stack;
         }
@@ -3141,8 +3196,8 @@ fn lowerNavRef(func: *CodeGen, nav_index: InternPool.Nav.Index, offset: u32) Inn
         return .{ .imm32 = 0xaaaaaaaa };
     }
 
-    const atom_index = try func.bin_file.getOrCreateAtomForNav(pt, nav_index);
-    const atom = func.bin_file.getAtom(atom_index);
+    const atom_index = try func.wasm.getOrCreateAtomForNav(pt, nav_index);
+    const atom = func.wasm.getAtom(atom_index);
 
     const target_sym_index = @intFromEnum(atom.sym_index);
     if (ip.isFunctionType(nav_ty)) {
@@ -3156,7 +3211,7 @@ fn lowerNavRef(func: *CodeGen, nav_index: InternPool.Nav.Index, offset: u32) Inn
 fn lowerConstant(func: *CodeGen, val: Value, ty: Type) InnerError!WValue {
     const pt = func.pt;
     const zcu = pt.zcu;
-    assert(!isByRef(ty, pt, func.target.*));
+    assert(!isByRef(ty, pt, func.target));
     const ip = &zcu.intern_pool;
     if (val.isUndefDeep(zcu)) return func.emitUndefined(ty);
 
@@ -3267,7 +3322,7 @@ fn lowerConstant(func: *CodeGen, val: Value, ty: Type) InnerError!WValue {
         .aggregate => switch (ip.indexToKey(ty.ip_index)) {
             .array_type => return func.fail("Wasm TODO: LowerConstant for {}", .{ty.fmt(pt)}),
             .vector_type => {
-                assert(determineSimdStoreStrategy(ty, zcu, func.target.*) == .direct);
+                assert(determineSimdStoreStrategy(ty, zcu, func.target) == .direct);
                 var buf: [16]u8 = undefined;
                 val.writeToMemory(pt, &buf) catch unreachable;
                 return func.storeSimdImmd(buf);
@@ -3398,11 +3453,11 @@ fn airBlock(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 
 fn lowerBlock(func: *CodeGen, inst: Air.Inst.Index, block_ty: Type, body: []const Air.Inst.Index) InnerError!void {
     const pt = func.pt;
-    const wasm_block_ty = genBlockType(block_ty, pt, func.target.*);
+    const wasm_block_ty = genBlockType(block_ty, pt, func.target);
 
     // if wasm_block_ty is non-empty, we create a register to store the temporary value
     const block_result: WValue = if (wasm_block_ty != std.wasm.block_empty) blk: {
-        const ty: Type = if (isByRef(block_ty, pt, func.target.*)) Type.u32 else block_ty;
+        const ty: Type = if (isByRef(block_ty, pt, func.target)) Type.u32 else block_ty;
         break :blk try func.ensureAllocLocal(ty); // make sure it's a clean local as it may never get overwritten
     } else .none;
 
@@ -3527,7 +3582,7 @@ fn cmp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: std.math.CompareO
         }
     } else if (ty.isAnyFloat()) {
         return func.cmpFloat(ty, lhs, rhs, op);
-    } else if (isByRef(ty, pt, func.target.*)) {
+    } else if (isByRef(ty, pt, func.target)) {
         return func.cmpBigInt(lhs, rhs, ty, op);
     }
 
@@ -3545,7 +3600,7 @@ fn cmp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: std.math.CompareO
     try func.lowerToStack(rhs);
 
     const opcode: std.wasm.Opcode = buildOpcode(.{
-        .valtype1 = typeToValtype(ty, pt, func.target.*),
+        .valtype1 = typeToValtype(ty, pt, func.target),
         .op = switch (op) {
             .lt => .lt,
             .lte => .le,
@@ -3612,7 +3667,7 @@ fn airCmpVector(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 fn airCmpLtErrorsLen(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const un_op = func.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
     const operand = try func.resolveInst(un_op);
-    const sym_index = try func.bin_file.getGlobalSymbol("__zig_errors_len", null);
+    const sym_index = try func.wasm.getGlobalSymbol("__zig_errors_len", null);
     const errors_len: WValue = .{ .memory = @intFromEnum(sym_index) };
 
     try func.emitWValue(operand);
@@ -3758,7 +3813,7 @@ fn airBitcast(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
             break :result try func.bitcast(wanted_ty, given_ty, operand);
         }
 
-        if (isByRef(given_ty, pt, func.target.*) and !isByRef(wanted_ty, pt, func.target.*)) {
+        if (isByRef(given_ty, pt, func.target) and !isByRef(wanted_ty, pt, func.target)) {
             const loaded_memory = try func.load(operand, wanted_ty, 0);
             if (needs_wrapping) {
                 break :result try func.wrapOperand(loaded_memory, wanted_ty);
@@ -3766,7 +3821,7 @@ fn airBitcast(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
                 break :result loaded_memory;
             }
         }
-        if (!isByRef(given_ty, pt, func.target.*) and isByRef(wanted_ty, pt, func.target.*)) {
+        if (!isByRef(given_ty, pt, func.target) and isByRef(wanted_ty, pt, func.target)) {
             const stack_memory = try func.allocStack(wanted_ty);
             try func.store(stack_memory, operand, given_ty, 0);
             if (needs_wrapping) {
@@ -3796,8 +3851,8 @@ fn bitcast(func: *CodeGen, wanted_ty: Type, given_ty: Type, operand: WValue) Inn
 
     const opcode = buildOpcode(.{
         .op = .reinterpret,
-        .valtype1 = typeToValtype(wanted_ty, pt, func.target.*),
-        .valtype2 = typeToValtype(given_ty, pt, func.target.*),
+        .valtype1 = typeToValtype(wanted_ty, pt, func.target),
+        .valtype2 = typeToValtype(given_ty, pt, func.target),
     });
     try func.emitWValue(operand);
     try func.addTag(Mir.Inst.Tag.fromOpcode(opcode));
@@ -3919,8 +3974,8 @@ fn airStructFieldVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
                 break :result try func.trunc(shifted_value, field_ty, backing_ty);
             },
             .@"union" => result: {
-                if (isByRef(struct_ty, pt, func.target.*)) {
-                    if (!isByRef(field_ty, pt, func.target.*)) {
+                if (isByRef(struct_ty, pt, func.target)) {
+                    if (!isByRef(field_ty, pt, func.target)) {
                         break :result try func.load(operand, field_ty, 0);
                     } else {
                         const new_stack_val = try func.allocStack(field_ty);
@@ -3946,7 +4001,7 @@ fn airStructFieldVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
             const offset = std.math.cast(u32, struct_ty.structFieldOffset(field_index, zcu)) orelse {
                 return func.fail("Field type '{}' too big to fit into stack frame", .{field_ty.fmt(pt)});
             };
-            if (isByRef(field_ty, pt, func.target.*)) {
+            if (isByRef(field_ty, pt, func.target)) {
                 switch (operand) {
                     .stack_offset => |stack_offset| {
                         break :result .{ .stack_offset = .{ .value = stack_offset.value + offset, .references = 1 } };
@@ -4209,7 +4264,7 @@ fn airUnwrapErrUnionPayload(func: *CodeGen, inst: Air.Inst.Index, op_is_ptr: boo
         }
 
         const pl_offset = @as(u32, @intCast(errUnionPayloadOffset(payload_ty, zcu)));
-        if (op_is_ptr or isByRef(payload_ty, pt, func.target.*)) {
+        if (op_is_ptr or isByRef(payload_ty, pt, func.target)) {
             break :result try func.buildPointerOffset(operand, pl_offset, .new);
         }
 
@@ -4436,7 +4491,7 @@ fn airOptionalPayload(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         const operand = try func.resolveInst(ty_op.operand);
         if (opt_ty.optionalReprIsPayload(zcu)) break :result func.reuseOperand(ty_op.operand, operand);
 
-        if (isByRef(payload_ty, pt, func.target.*)) {
+        if (isByRef(payload_ty, pt, func.target)) {
             break :result try func.buildPointerOffset(operand, 0, .new);
         }
 
@@ -4570,7 +4625,7 @@ fn airSliceElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     try func.addTag(.i32_mul);
     try func.addTag(.i32_add);
 
-    const elem_result = if (isByRef(elem_ty, pt, func.target.*))
+    const elem_result = if (isByRef(elem_ty, pt, func.target))
         .stack
     else
         try func.load(.stack, elem_ty, 0);
@@ -4729,7 +4784,7 @@ fn airPtrElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     try func.addTag(.i32_mul);
     try func.addTag(.i32_add);
 
-    const elem_result = if (isByRef(elem_ty, pt, func.target.*))
+    const elem_result = if (isByRef(elem_ty, pt, func.target))
         .stack
     else
         try func.load(.stack, elem_ty, 0);
@@ -4780,7 +4835,7 @@ fn airPtrBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
         else => ptr_ty.childType(zcu),
     };
 
-    const valtype = typeToValtype(Type.usize, pt, func.target.*);
+    const valtype = typeToValtype(Type.usize, pt, func.target);
     const mul_opcode = buildOpcode(.{ .valtype1 = valtype, .op = .mul });
     const bin_opcode = buildOpcode(.{ .valtype1 = valtype, .op = op });
 
@@ -4927,7 +4982,7 @@ fn airArrayElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const elem_ty = array_ty.childType(zcu);
     const elem_size = elem_ty.abiSize(zcu);
 
-    if (isByRef(array_ty, pt, func.target.*)) {
+    if (isByRef(array_ty, pt, func.target)) {
         try func.lowerToStack(array);
         try func.emitWValue(index);
         try func.addImm32(@intCast(elem_size));
@@ -4970,7 +5025,7 @@ fn airArrayElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         }
     }
 
-    const elem_result = if (isByRef(elem_ty, pt, func.target.*))
+    const elem_result = if (isByRef(elem_ty, pt, func.target))
         .stack
     else
         try func.load(.stack, elem_ty, 0);
@@ -5014,8 +5069,8 @@ fn airIntFromFloat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     try func.emitWValue(operand);
     const op = buildOpcode(.{
         .op = .trunc,
-        .valtype1 = typeToValtype(dest_ty, pt, func.target.*),
-        .valtype2 = typeToValtype(op_ty, pt, func.target.*),
+        .valtype1 = typeToValtype(dest_ty, pt, func.target),
+        .valtype2 = typeToValtype(op_ty, pt, func.target),
         .signedness = dest_info.signedness,
     });
     try func.addTag(Mir.Inst.Tag.fromOpcode(op));
@@ -5059,8 +5114,8 @@ fn airFloatFromInt(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     try func.emitWValue(operand);
     const op = buildOpcode(.{
         .op = .convert,
-        .valtype1 = typeToValtype(dest_ty, pt, func.target.*),
-        .valtype2 = typeToValtype(op_ty, pt, func.target.*),
+        .valtype1 = typeToValtype(dest_ty, pt, func.target),
+        .valtype2 = typeToValtype(op_ty, pt, func.target),
         .signedness = op_info.signedness,
     });
     try func.addTag(Mir.Inst.Tag.fromOpcode(op));
@@ -5076,7 +5131,7 @@ fn airSplat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const ty = func.typeOfIndex(inst);
     const elem_ty = ty.childType(zcu);
 
-    if (determineSimdStoreStrategy(ty, zcu, func.target.*) == .direct) blk: {
+    if (determineSimdStoreStrategy(ty, zcu, func.target) == .direct) blk: {
         switch (operand) {
             // when the operand lives in the linear memory section, we can directly
             // load and splat the value at once. Meaning we do not first have to load
@@ -5160,7 +5215,7 @@ fn airShuffle(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const elem_size = child_ty.abiSize(zcu);
 
     // TODO: One of them could be by ref; handle in loop
-    if (isByRef(func.typeOf(extra.a), pt, func.target.*) or isByRef(inst_ty, pt, func.target.*)) {
+    if (isByRef(func.typeOf(extra.a), pt, func.target) or isByRef(inst_ty, pt, func.target)) {
         const result = try func.allocStack(inst_ty);
 
         for (0..mask_len) |index| {
@@ -5236,7 +5291,7 @@ fn airAggregateInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
                 // When the element type is by reference, we must copy the entire
                 // value. It is therefore safer to move the offset pointer and store
                 // each value individually, instead of using store offsets.
-                if (isByRef(elem_ty, pt, func.target.*)) {
+                if (isByRef(elem_ty, pt, func.target)) {
                     // copy stack pointer into a temporary local, which is
                     // moved for each element to store each value in the right position.
                     const offset = try func.buildPointerOffset(result, 0, .new);
@@ -5266,7 +5321,7 @@ fn airAggregateInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
             },
             .@"struct" => switch (result_ty.containerLayout(zcu)) {
                 .@"packed" => {
-                    if (isByRef(result_ty, pt, func.target.*)) {
+                    if (isByRef(result_ty, pt, func.target)) {
                         return func.fail("TODO: airAggregateInit for packed structs larger than 64 bits", .{});
                     }
                     const packed_struct = zcu.typeToPackedStruct(result_ty).?;
@@ -5369,15 +5424,15 @@ fn airUnionInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
             if (layout.tag_size == 0) {
                 break :result .none;
             }
-            assert(!isByRef(union_ty, pt, func.target.*));
+            assert(!isByRef(union_ty, pt, func.target));
             break :result tag_int;
         }
 
-        if (isByRef(union_ty, pt, func.target.*)) {
+        if (isByRef(union_ty, pt, func.target)) {
             const result_ptr = try func.allocStack(union_ty);
             const payload = try func.resolveInst(extra.init);
             if (layout.tag_align.compare(.gte, layout.payload_align)) {
-                if (isByRef(field_ty, pt, func.target.*)) {
+                if (isByRef(field_ty, pt, func.target)) {
                     const payload_ptr = try func.buildPointerOffset(result_ptr, layout.tag_size, .new);
                     try func.store(payload_ptr, payload, field_ty, 0);
                 } else {
@@ -5458,7 +5513,7 @@ fn cmpOptionals(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op:
 
     _ = try func.load(lhs, payload_ty, 0);
     _ = try func.load(rhs, payload_ty, 0);
-    const opcode = buildOpcode(.{ .op = .ne, .valtype1 = typeToValtype(payload_ty, pt, func.target.*) });
+    const opcode = buildOpcode(.{ .op = .ne, .valtype1 = typeToValtype(payload_ty, pt, func.target) });
     try func.addTag(Mir.Inst.Tag.fromOpcode(opcode));
     try func.addLabel(.br_if, 0);
 
@@ -5910,7 +5965,7 @@ fn airErrorName(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     // As the names are global and the slice elements are constant, we do not have
     // to make a copy of the ptr+value but can point towards them directly.
     const pt = func.pt;
-    const error_table_symbol = try func.bin_file.getErrorTableSymbol(pt);
+    const error_table_symbol = try func.wasm.getErrorTableSymbol(pt);
     const name_ty = Type.slice_const_u8_sentinel_0;
     const abi_size = name_ty.abiSize(pt.zcu);
 
@@ -5943,7 +5998,7 @@ fn airPtrSliceFieldPtr(func: *CodeGen, inst: Air.Inst.Index, offset: u32) InnerE
 
 /// NOTE: Allocates place for result on virtual stack, when integer size > 64 bits
 fn intZeroValue(func: *CodeGen, ty: Type) InnerError!WValue {
-    const zcu = func.bin_file.base.comp.zcu.?;
+    const zcu = func.wasm.base.comp.zcu.?;
     const int_info = ty.intInfo(zcu);
     const wasm_bits = toWasmBits(int_info.bits) orelse {
         return func.fail("TODO: Implement intZeroValue for integer bitsize: {d}", .{int_info.bits});
@@ -6379,8 +6434,6 @@ fn airCtz(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
 }
 
 fn airDbgStmt(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
-    if (func.debug_output != .dwarf) return func.finishAir(inst, .none, &.{});
-
     const dbg_stmt = func.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt;
     try func.addInst(.{ .tag = .dbg_line, .data = .{
         .payload = try func.addExtra(Mir.DbgLineColumn{
@@ -6405,26 +6458,7 @@ fn airDbgVar(
     is_ptr: bool,
 ) InnerError!void {
     _ = is_ptr;
-    if (func.debug_output != .dwarf) return func.finishAir(inst, .none, &.{});
-
-    const pl_op = func.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
-    const ty = func.typeOf(pl_op.operand);
-    const operand = try func.resolveInst(pl_op.operand);
-
-    log.debug("airDbgVar: %{d}: {}, {}", .{ inst, ty.fmtDebug(), operand });
-
-    const name: Air.NullTerminatedString = @enumFromInt(pl_op.payload);
-    log.debug(" var name = ({s})", .{name.toSlice(func.air)});
-
-    const loc: link.File.Dwarf.Loc = switch (operand) {
-        .local => |local| .{ .wasm_ext = .{ .local = local.value } },
-        else => blk: {
-            log.debug("TODO generate debug info for {}", .{operand});
-            break :blk .empty;
-        },
-    };
-    try func.debug_output.dwarf.genLocalDebugInfo(local_tag, name.toSlice(func.air), ty, loc);
-
+    _ = local_tag;
     return func.finishAir(inst, .none, &.{});
 }
 
@@ -6500,7 +6534,7 @@ fn lowerTry(
     }
 
     const pl_offset: u32 = @intCast(errUnionPayloadOffset(pl_ty, zcu));
-    if (isByRef(pl_ty, pt, func.target.*)) {
+    if (isByRef(pl_ty, pt, func.target)) {
         return buildPointerOffset(func, err_union, pl_offset, .new);
     }
     const payload = try func.load(err_union, pl_ty, pl_offset);
@@ -7074,15 +7108,15 @@ fn callIntrinsic(
     args: []const WValue,
 ) InnerError!WValue {
     assert(param_types.len == args.len);
-    const wasm = func.bin_file;
+    const wasm = func.wasm;
     const pt = func.pt;
     const zcu = pt.zcu;
-    const func_type_index = try genFunctype(wasm, .{ .wasm_watc = .{} }, param_types, return_type, pt, func.target.*);
+    const func_type_index = try genFunctype(wasm, .{ .wasm_watc = .{} }, param_types, return_type, pt, func.target);
     const func_index = wasm.getOutputFunction(try wasm.internString(name), func_type_index);
 
     // Always pass over C-ABI
 
-    const want_sret_param = firstParamSRet(.{ .wasm_watc = .{} }, return_type, pt, func.target.*);
+    const want_sret_param = firstParamSRet(.{ .wasm_watc = .{} }, return_type, pt, func.target);
     // if we want return as first param, we allocate a pointer to stack,
     // and emit it as our first argument
     const sret = if (want_sret_param) blk: {
@@ -7121,7 +7155,7 @@ fn airTagName(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
     const result_ptr = try func.allocStack(func.typeOfIndex(inst));
     try func.lowerToStack(result_ptr);
     try func.emitWValue(operand);
-    try func.addCallTagName(enum_ty.toIntern());
+    try func.addIpIndex(.call_tag_name, enum_ty.toIntern());
 
     return func.finishAir(inst, result_ptr, &.{un_op});
 }
@@ -7265,7 +7299,7 @@ fn airCmpxchg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         break :val ptr_val;
     };
 
-    const result = if (isByRef(result_ty, pt, func.target.*)) val: {
+    const result = if (isByRef(result_ty, pt, func.target)) val: {
         try func.emitWValue(cmp_result);
         try func.addImm32(~@as(u32, 0));
         try func.addTag(.i32_xor);
src/arch/wasm/Emit.zig
@@ -1,9 +1,9 @@
-//! Contains all logic to lower wasm MIR into its binary
-//! or textual representation.
-
 const Emit = @This();
+
 const std = @import("std");
-const leb128 = std.leb;
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const leb = std.leb;
 
 const Mir = @import("Mir.zig");
 const link = @import("../../link.zig");
@@ -11,660 +11,611 @@ const Zcu = @import("../../Zcu.zig");
 const InternPool = @import("../../InternPool.zig");
 const codegen = @import("../../codegen.zig");
 
-/// Contains our list of instructions
 mir: Mir,
-/// Reference to the Wasm module linker
-bin_file: *link.File.Wasm,
-/// Possible error message. When set, the value is allocated and
-/// must be freed manually.
-error_msg: ?*Zcu.ErrorMsg = null,
-/// The binary representation that will be emit by this module.
-code: *std.ArrayList(u8),
-/// List of allocated locals.
-locals: []const u8,
-/// The declaration that code is being generated for.
-owner_nav: InternPool.Nav.Index,
-
-// Debug information
-/// Holds the debug information for this emission
-dbg_output: link.File.DebugInfoOutput,
-/// Previous debug info line
-prev_di_line: u32,
-/// Previous debug info column
-prev_di_column: u32,
-/// Previous offset relative to code section
-prev_di_offset: u32,
-
-const InnerError = error{
+wasm: *link.File.Wasm,
+/// The binary representation that will be emitted by this module.
+code: *std.ArrayListUnmanaged(u8),
+
+pub const Error = error{
     OutOfMemory,
-    EmitFail,
 };
 
-pub fn emitMir(emit: *Emit) InnerError!void {
-    const mir_tags = emit.mir.instructions.items(.tag);
-    // write the locals in the prologue of the function body
-    // before we emit the function body when lowering MIR
-    try emit.emitLocals();
-
-    for (mir_tags, 0..) |tag, index| {
-        const inst = @as(u32, @intCast(index));
-        switch (tag) {
-            // block instructions
-            .block => try emit.emitBlock(tag, inst),
-            .loop => try emit.emitBlock(tag, inst),
-
-            .dbg_line => try emit.emitDbgLine(inst),
-            .dbg_epilogue_begin => try emit.emitDbgEpilogueBegin(),
-            .dbg_prologue_end => try emit.emitDbgPrologueEnd(),
-
-            // branch instructions
-            .br_if => try emit.emitLabel(tag, inst),
-            .br_table => try emit.emitBrTable(inst),
-            .br => try emit.emitLabel(tag, inst),
-
-            // relocatables
-            .call => try emit.emitCall(inst),
-            .call_indirect => try emit.emitCallIndirect(inst),
-            .global_get => try emit.emitGlobal(tag, inst),
-            .global_set => try emit.emitGlobal(tag, inst),
-            .function_index => try emit.emitFunctionIndex(inst),
-            .memory_address => try emit.emitMemAddress(inst),
-
-            // immediates
-            .f32_const => try emit.emitFloat32(inst),
-            .f64_const => try emit.emitFloat64(inst),
-            .i32_const => try emit.emitImm32(inst),
-            .i64_const => try emit.emitImm64(inst),
-
-            // memory instructions
-            .i32_load => try emit.emitMemArg(tag, inst),
-            .i64_load => try emit.emitMemArg(tag, inst),
-            .f32_load => try emit.emitMemArg(tag, inst),
-            .f64_load => try emit.emitMemArg(tag, inst),
-            .i32_load8_s => try emit.emitMemArg(tag, inst),
-            .i32_load8_u => try emit.emitMemArg(tag, inst),
-            .i32_load16_s => try emit.emitMemArg(tag, inst),
-            .i32_load16_u => try emit.emitMemArg(tag, inst),
-            .i64_load8_s => try emit.emitMemArg(tag, inst),
-            .i64_load8_u => try emit.emitMemArg(tag, inst),
-            .i64_load16_s => try emit.emitMemArg(tag, inst),
-            .i64_load16_u => try emit.emitMemArg(tag, inst),
-            .i64_load32_s => try emit.emitMemArg(tag, inst),
-            .i64_load32_u => try emit.emitMemArg(tag, inst),
-            .i32_store => try emit.emitMemArg(tag, inst),
-            .i64_store => try emit.emitMemArg(tag, inst),
-            .f32_store => try emit.emitMemArg(tag, inst),
-            .f64_store => try emit.emitMemArg(tag, inst),
-            .i32_store8 => try emit.emitMemArg(tag, inst),
-            .i32_store16 => try emit.emitMemArg(tag, inst),
-            .i64_store8 => try emit.emitMemArg(tag, inst),
-            .i64_store16 => try emit.emitMemArg(tag, inst),
-            .i64_store32 => try emit.emitMemArg(tag, inst),
-
-            // Instructions with an index that do not require relocations
-            .local_get => try emit.emitLabel(tag, inst),
-            .local_set => try emit.emitLabel(tag, inst),
-            .local_tee => try emit.emitLabel(tag, inst),
-            .memory_grow => try emit.emitLabel(tag, inst),
-            .memory_size => try emit.emitLabel(tag, inst),
-
-            // no-ops
-            .end => try emit.emitTag(tag),
-            .@"return" => try emit.emitTag(tag),
-            .@"unreachable" => try emit.emitTag(tag),
-
-            .select => try emit.emitTag(tag),
-
-            // arithmetic
-            .i32_eqz => try emit.emitTag(tag),
-            .i32_eq => try emit.emitTag(tag),
-            .i32_ne => try emit.emitTag(tag),
-            .i32_lt_s => try emit.emitTag(tag),
-            .i32_lt_u => try emit.emitTag(tag),
-            .i32_gt_s => try emit.emitTag(tag),
-            .i32_gt_u => try emit.emitTag(tag),
-            .i32_le_s => try emit.emitTag(tag),
-            .i32_le_u => try emit.emitTag(tag),
-            .i32_ge_s => try emit.emitTag(tag),
-            .i32_ge_u => try emit.emitTag(tag),
-            .i64_eqz => try emit.emitTag(tag),
-            .i64_eq => try emit.emitTag(tag),
-            .i64_ne => try emit.emitTag(tag),
-            .i64_lt_s => try emit.emitTag(tag),
-            .i64_lt_u => try emit.emitTag(tag),
-            .i64_gt_s => try emit.emitTag(tag),
-            .i64_gt_u => try emit.emitTag(tag),
-            .i64_le_s => try emit.emitTag(tag),
-            .i64_le_u => try emit.emitTag(tag),
-            .i64_ge_s => try emit.emitTag(tag),
-            .i64_ge_u => try emit.emitTag(tag),
-            .f32_eq => try emit.emitTag(tag),
-            .f32_ne => try emit.emitTag(tag),
-            .f32_lt => try emit.emitTag(tag),
-            .f32_gt => try emit.emitTag(tag),
-            .f32_le => try emit.emitTag(tag),
-            .f32_ge => try emit.emitTag(tag),
-            .f64_eq => try emit.emitTag(tag),
-            .f64_ne => try emit.emitTag(tag),
-            .f64_lt => try emit.emitTag(tag),
-            .f64_gt => try emit.emitTag(tag),
-            .f64_le => try emit.emitTag(tag),
-            .f64_ge => try emit.emitTag(tag),
-            .i32_add => try emit.emitTag(tag),
-            .i32_sub => try emit.emitTag(tag),
-            .i32_mul => try emit.emitTag(tag),
-            .i32_div_s => try emit.emitTag(tag),
-            .i32_div_u => try emit.emitTag(tag),
-            .i32_and => try emit.emitTag(tag),
-            .i32_or => try emit.emitTag(tag),
-            .i32_xor => try emit.emitTag(tag),
-            .i32_shl => try emit.emitTag(tag),
-            .i32_shr_s => try emit.emitTag(tag),
-            .i32_shr_u => try emit.emitTag(tag),
-            .i64_add => try emit.emitTag(tag),
-            .i64_sub => try emit.emitTag(tag),
-            .i64_mul => try emit.emitTag(tag),
-            .i64_div_s => try emit.emitTag(tag),
-            .i64_div_u => try emit.emitTag(tag),
-            .i64_and => try emit.emitTag(tag),
-            .i64_or => try emit.emitTag(tag),
-            .i64_xor => try emit.emitTag(tag),
-            .i64_shl => try emit.emitTag(tag),
-            .i64_shr_s => try emit.emitTag(tag),
-            .i64_shr_u => try emit.emitTag(tag),
-            .f32_abs => try emit.emitTag(tag),
-            .f32_neg => try emit.emitTag(tag),
-            .f32_ceil => try emit.emitTag(tag),
-            .f32_floor => try emit.emitTag(tag),
-            .f32_trunc => try emit.emitTag(tag),
-            .f32_nearest => try emit.emitTag(tag),
-            .f32_sqrt => try emit.emitTag(tag),
-            .f32_add => try emit.emitTag(tag),
-            .f32_sub => try emit.emitTag(tag),
-            .f32_mul => try emit.emitTag(tag),
-            .f32_div => try emit.emitTag(tag),
-            .f32_min => try emit.emitTag(tag),
-            .f32_max => try emit.emitTag(tag),
-            .f32_copysign => try emit.emitTag(tag),
-            .f64_abs => try emit.emitTag(tag),
-            .f64_neg => try emit.emitTag(tag),
-            .f64_ceil => try emit.emitTag(tag),
-            .f64_floor => try emit.emitTag(tag),
-            .f64_trunc => try emit.emitTag(tag),
-            .f64_nearest => try emit.emitTag(tag),
-            .f64_sqrt => try emit.emitTag(tag),
-            .f64_add => try emit.emitTag(tag),
-            .f64_sub => try emit.emitTag(tag),
-            .f64_mul => try emit.emitTag(tag),
-            .f64_div => try emit.emitTag(tag),
-            .f64_min => try emit.emitTag(tag),
-            .f64_max => try emit.emitTag(tag),
-            .f64_copysign => try emit.emitTag(tag),
-            .i32_wrap_i64 => try emit.emitTag(tag),
-            .i64_extend_i32_s => try emit.emitTag(tag),
-            .i64_extend_i32_u => try emit.emitTag(tag),
-            .i32_extend8_s => try emit.emitTag(tag),
-            .i32_extend16_s => try emit.emitTag(tag),
-            .i64_extend8_s => try emit.emitTag(tag),
-            .i64_extend16_s => try emit.emitTag(tag),
-            .i64_extend32_s => try emit.emitTag(tag),
-            .f32_demote_f64 => try emit.emitTag(tag),
-            .f64_promote_f32 => try emit.emitTag(tag),
-            .i32_reinterpret_f32 => try emit.emitTag(tag),
-            .i64_reinterpret_f64 => try emit.emitTag(tag),
-            .f32_reinterpret_i32 => try emit.emitTag(tag),
-            .f64_reinterpret_i64 => try emit.emitTag(tag),
-            .i32_trunc_f32_s => try emit.emitTag(tag),
-            .i32_trunc_f32_u => try emit.emitTag(tag),
-            .i32_trunc_f64_s => try emit.emitTag(tag),
-            .i32_trunc_f64_u => try emit.emitTag(tag),
-            .i64_trunc_f32_s => try emit.emitTag(tag),
-            .i64_trunc_f32_u => try emit.emitTag(tag),
-            .i64_trunc_f64_s => try emit.emitTag(tag),
-            .i64_trunc_f64_u => try emit.emitTag(tag),
-            .f32_convert_i32_s => try emit.emitTag(tag),
-            .f32_convert_i32_u => try emit.emitTag(tag),
-            .f32_convert_i64_s => try emit.emitTag(tag),
-            .f32_convert_i64_u => try emit.emitTag(tag),
-            .f64_convert_i32_s => try emit.emitTag(tag),
-            .f64_convert_i32_u => try emit.emitTag(tag),
-            .f64_convert_i64_s => try emit.emitTag(tag),
-            .f64_convert_i64_u => try emit.emitTag(tag),
-            .i32_rem_s => try emit.emitTag(tag),
-            .i32_rem_u => try emit.emitTag(tag),
-            .i64_rem_s => try emit.emitTag(tag),
-            .i64_rem_u => try emit.emitTag(tag),
-            .i32_popcnt => try emit.emitTag(tag),
-            .i64_popcnt => try emit.emitTag(tag),
-            .i32_clz => try emit.emitTag(tag),
-            .i32_ctz => try emit.emitTag(tag),
-            .i64_clz => try emit.emitTag(tag),
-            .i64_ctz => try emit.emitTag(tag),
-
-            .misc_prefix => try emit.emitExtended(inst),
-            .simd_prefix => try emit.emitSimd(inst),
-            .atomics_prefix => try emit.emitAtomic(inst),
-        }
-    }
-}
-
-fn offset(self: Emit) u32 {
-    return @as(u32, @intCast(self.code.items.len));
-}
-
-fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError {
-    @branchHint(.cold);
-    std.debug.assert(emit.error_msg == null);
-    const wasm = emit.bin_file;
+pub fn lowerToCode(emit: *Emit) Error!void {
+    const mir = &emit.mir;
+    const code = emit.code;
+    const wasm = emit.wasm;
     const comp = wasm.base.comp;
-    const zcu = comp.zcu.?;
     const gpa = comp.gpa;
-    emit.error_msg = try Zcu.ErrorMsg.create(gpa, zcu.navSrcLoc(emit.owner_nav), format, args);
-    return error.EmitFail;
-}
-
-fn emitLocals(emit: *Emit) !void {
-    const writer = emit.code.writer();
-    try leb128.writeUleb128(writer, @as(u32, @intCast(emit.locals.len)));
-    // emit the actual locals amount
-    for (emit.locals) |local| {
-        try leb128.writeUleb128(writer, @as(u32, 1));
-        try writer.writeByte(local);
-    }
-}
+    const is_obj = comp.config.output_mode == .Obj;
 
-fn emitTag(emit: *Emit, tag: Mir.Inst.Tag) !void {
-    try emit.code.append(@intFromEnum(tag));
-}
+    const tags = mir.instructions.items(.tag);
+    const datas = mir.instructions.items(.data);
+    var inst: u32 = 0;
 
-fn emitBlock(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) !void {
-    const block_type = emit.mir.instructions.items(.data)[inst].block_type;
-    try emit.code.append(@intFromEnum(tag));
-    try emit.code.append(block_type);
-}
+    loop: switch (tags[inst]) {
+        .block, .loop => {
+            const block_type = datas[inst].block_type;
+            try code.ensureUnusedCapacity(gpa, 2);
+            code.appendAssumeCapacity(@intFromEnum(tags[inst]));
+            code.appendAssumeCapacity(block_type);
 
-fn emitBrTable(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const extra = emit.mir.extraData(Mir.JumpTable, extra_index);
-    const labels = emit.mir.extra[extra.end..][0..extra.data.length];
-    const writer = emit.code.writer();
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.br_table));
-    try leb128.writeUleb128(writer, extra.data.length - 1); // Default label is not part of length/depth
-    for (labels) |label| {
-        try leb128.writeUleb128(writer, label);
-    }
-}
+        .uav_ref => {
+            try uavRefOff(wasm, code, .{ .ip_index = datas[inst].ip_index, .offset = 0 });
 
-fn emitLabel(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) !void {
-    const label = emit.mir.instructions.items(.data)[inst].label;
-    try emit.code.append(@intFromEnum(tag));
-    try leb128.writeUleb128(emit.code.writer(), label);
-}
+            inst += 1;
+            continue :loop tags[inst];
+        },
+        .uav_ref_off => {
+            try uavRefOff(wasm, code, mir.extraData(Mir.UavRefOff, datas[inst].payload).data);
 
-fn emitGlobal(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) !void {
-    const wasm = emit.bin_file;
-    const comp = wasm.base.comp;
-    const gpa = comp.gpa;
-    const label = emit.mir.instructions.items(.data)[inst].label;
-    try emit.code.append(@intFromEnum(tag));
-    var buf: [5]u8 = undefined;
-    leb128.writeUnsignedFixed(5, &buf, label);
-    const global_offset = emit.offset();
-    try emit.code.appendSlice(&buf);
-
-    const zo = wasm.zig_object.?;
-    try zo.relocs.append(gpa, .{
-        .nav_index = emit.nav_index,
-        .index = label,
-        .offset = global_offset,
-        .tag = .GLOBAL_INDEX_LEB,
-    });
-}
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-fn emitImm32(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const value: i32 = emit.mir.instructions.items(.data)[inst].imm32;
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.i32_const));
-    try leb128.writeIleb128(emit.code.writer(), value);
-}
+        .dbg_line => {
+            inst += 1;
+            continue :loop tags[inst];
+        },
+        .dbg_epilogue_begin => {
+            return;
+        },
 
-fn emitImm64(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const value = emit.mir.extraData(Mir.Imm64, extra_index);
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.i64_const));
-    try leb128.writeIleb128(emit.code.writer(), @as(i64, @bitCast(value.data.toU64())));
-}
+        .br_if, .br, .memory_grow, .memory_size => {
+            try code.ensureUnusedCapacity(gpa, 11);
+            code.appendAssumeCapacity(@intFromEnum(tags[inst]));
+            leb.writeUleb128(code.fixedWriter(), datas[inst].label) catch unreachable;
 
-fn emitFloat32(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const value: f32 = emit.mir.instructions.items(.data)[inst].float32;
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.f32_const));
-    try emit.code.writer().writeInt(u32, @bitCast(value), .little);
-}
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-fn emitFloat64(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const value = emit.mir.extraData(Mir.Float64, extra_index);
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.f64_const));
-    try emit.code.writer().writeInt(u64, value.data.toU64(), .little);
-}
+        .local_get, .local_set, .local_tee => {
+            try code.ensureUnusedCapacity(gpa, 11);
+            code.appendAssumeCapacity(@intFromEnum(tags[inst]));
+            leb.writeUleb128(code.fixedWriter(), datas[inst].local) catch unreachable;
 
-fn emitMemArg(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const mem_arg = emit.mir.extraData(Mir.MemArg, extra_index).data;
-    try emit.code.append(@intFromEnum(tag));
-    try encodeMemArg(mem_arg, emit.code.writer());
-}
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-fn encodeMemArg(mem_arg: Mir.MemArg, writer: anytype) !void {
-    // wasm encodes alignment as power of 2, rather than natural alignment
-    const encoded_alignment = @ctz(mem_arg.alignment);
-    try leb128.writeUleb128(writer, encoded_alignment);
-    try leb128.writeUleb128(writer, mem_arg.offset);
-}
+        .br_table => {
+            const extra_index = mir.instructions.items(.data)[inst].payload;
+            const extra = mir.extraData(Mir.JumpTable, extra_index);
+            const labels = mir.extra[extra.end..][0..extra.data.length];
+            try code.ensureUnusedCapacity(gpa, 11 + 10 * labels.len);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.br_table));
+            // -1 because default label is not part of length/depth.
+            leb.writeUleb128(code.fixedWriter(), extra.data.length - 1) catch unreachable;
+            for (labels) |label| leb.writeUleb128(code.fixedWriter(), label) catch unreachable;
+
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-fn emitCall(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const wasm = emit.bin_file;
-    const comp = wasm.base.comp;
-    const gpa = comp.gpa;
-    const label = emit.mir.instructions.items(.data)[inst].label;
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.call));
-    const call_offset = emit.offset();
-    var buf: [5]u8 = undefined;
-    leb128.writeUnsignedFixed(5, &buf, label);
-    try emit.code.appendSlice(&buf);
-
-    const zo = wasm.zig_object.?;
-    try zo.relocs.append(gpa, .{
-        .offset = call_offset,
-        .index = label,
-        .tag = .FUNCTION_INDEX_LEB,
-    });
-}
+        .call_nav => {
+            try code.ensureUnusedCapacity(gpa, 6);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.call));
+            if (is_obj) {
+                try wasm.out_relocs.append(gpa, .{
+                    .offset = @intCast(code.items.len),
+                    .index = try wasm.navSymbolIndex(datas[inst].nav_index),
+                    .tag = .FUNCTION_INDEX_LEB,
+                    .addend = 0,
+                });
+                code.appendNTimesAssumeCapacity(0, 5);
+            } else {
+                const func_index = try wasm.navFunctionIndex(datas[inst].nav_index);
+                leb.writeUleb128(code.fixedWriter(), @intFromEnum(func_index)) catch unreachable;
+            }
+
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-fn emitCallIndirect(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const wasm = emit.bin_file;
-    const type_index = emit.mir.instructions.items(.data)[inst].label;
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.call_indirect));
-    // NOTE: If we remove unused function types in the future for incremental
-    // linking, we must also emit a relocation for this `type_index`
-    const call_offset = emit.offset();
-    var buf: [5]u8 = undefined;
-    leb128.writeUnsignedFixed(5, &buf, type_index);
-    try emit.code.appendSlice(&buf);
-
-    const zo = wasm.zig_object.?;
-    try zo.relocs.append(wasm.base.comp.gpa, .{
-        .offset = call_offset,
-        .index = type_index,
-        .tag = .TYPE_INDEX_LEB,
-    });
-
-    try leb128.writeUleb128(emit.code.writer(), @as(u32, 0)); // TODO: Emit relocation for table index
-}
+        .call_indirect => {
+            try code.ensureUnusedCapacity(gpa, 11);
+            const func_ty_index = datas[inst].func_ty;
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.call_indirect));
+            if (is_obj) {
+                try wasm.out_relocs.append(gpa, .{
+                    .offset = @intCast(code.items.len),
+                    .index = func_ty_index,
+                    .tag = .TYPE_INDEX_LEB,
+                    .addend = 0,
+                });
+                code.appendNTimesAssumeCapacity(0, 5);
+            } else {
+                leb.writeUleb128(code.fixedWriter(), @intFromEnum(func_ty_index)) catch unreachable;
+            }
+            leb.writeUleb128(code.fixedWriter(), @as(u32, 0)) catch unreachable; // table index
+
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-fn emitFunctionIndex(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const wasm = emit.bin_file;
-    const comp = wasm.base.comp;
-    const gpa = comp.gpa;
-    const symbol_index = emit.mir.instructions.items(.data)[inst].label;
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.i32_const));
-    const index_offset = emit.offset();
-    var buf: [5]u8 = undefined;
-    leb128.writeUnsignedFixed(5, &buf, symbol_index);
-    try emit.code.appendSlice(&buf);
-
-    const zo = wasm.zig_object.?;
-    try zo.relocs.append(gpa, .{
-        .offset = index_offset,
-        .index = symbol_index,
-        .tag = .TABLE_INDEX_SLEB,
-    });
-}
+        .global_set => {
+            try code.ensureUnusedCapacity(gpa, 6);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.global_set));
+            if (is_obj) {
+                try wasm.out_relocs.append(gpa, .{
+                    .offset = @intCast(code.items.len),
+                    .index = try wasm.stackPointerSymbolIndex(),
+                    .tag = .GLOBAL_INDEX_LEB,
+                    .addend = 0,
+                });
+                code.appendNTimesAssumeCapacity(0, 5);
+            } else {
+                const sp_global = try wasm.stackPointerGlobalIndex();
+                std.leb.writeULEB128(code.fixedWriter(), @intFromEnum(sp_global)) catch unreachable;
+            }
+
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-fn emitMemAddress(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const wasm = emit.bin_file;
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const mem = emit.mir.extraData(Mir.Memory, extra_index).data;
-    const mem_offset = emit.offset() + 1;
-    const comp = wasm.base.comp;
-    const gpa = comp.gpa;
-    const target = comp.root_mod.resolved_target.result;
-    const is_wasm32 = target.cpu.arch == .wasm32;
-    if (is_wasm32) {
-        try emit.code.append(@intFromEnum(std.wasm.Opcode.i32_const));
-        var buf: [5]u8 = undefined;
-        leb128.writeUnsignedFixed(5, &buf, mem.pointer);
-        try emit.code.appendSlice(&buf);
-    } else {
-        try emit.code.append(@intFromEnum(std.wasm.Opcode.i64_const));
-        var buf: [10]u8 = undefined;
-        leb128.writeUnsignedFixed(10, &buf, mem.pointer);
-        try emit.code.appendSlice(&buf);
-    }
+        .function_index => {
+            try code.ensureUnusedCapacity(gpa, 6);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
+            if (is_obj) {
+                try wasm.out_relocs.append(gpa, .{
+                    .offset = @intCast(code.items.len),
+                    .index = try wasm.functionSymbolIndex(datas[inst].ip_index),
+                    .tag = .TABLE_INDEX_SLEB,
+                    .addend = 0,
+                });
+                code.appendNTimesAssumeCapacity(0, 5);
+            } else {
+                const func_index = try wasm.functionIndex(datas[inst].ip_index);
+                std.leb.writeULEB128(code.fixedWriter(), @intFromEnum(func_index)) catch unreachable;
+            }
+
+            inst += 1;
+            continue :loop tags[inst];
+        },
 
-    const zo = wasm.zig_object.?;
-    try zo.relocs.append(gpa, .{
-        .offset = mem_offset,
-        .index = mem.pointer,
-        .tag = if (is_wasm32) .MEMORY_ADDR_LEB else .MEMORY_ADDR_LEB64,
-        .addend = @as(i32, @intCast(mem.offset)),
-    });
-}
+        .f32_const => {
+            try code.ensureUnusedCapacity(gpa, 5);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.f32_const));
+            std.mem.writeInt(u32, code.addManyAsArrayAssumeCapacity(4), @bitCast(datas[inst].float32), .little);
 
-fn emitExtended(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const opcode = emit.mir.extra[extra_index];
-    const writer = emit.code.writer();
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.misc_prefix));
-    try leb128.writeUleb128(writer, opcode);
-    switch (@as(std.wasm.MiscOpcode, @enumFromInt(opcode))) {
-        // bulk-memory opcodes
-        .data_drop => {
-            const segment = emit.mir.extra[extra_index + 1];
-            try leb128.writeUleb128(writer, segment);
+            inst += 1;
+            continue :loop tags[inst];
         },
-        .memory_init => {
-            const segment = emit.mir.extra[extra_index + 1];
-            try leb128.writeUleb128(writer, segment);
-            try leb128.writeUleb128(writer, @as(u32, 0)); // memory index
+
+        .f64_const => {
+            try code.ensureUnusedCapacity(gpa, 9);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.f64_const));
+            const float64 = mir.extraData(Mir.Float64, datas[inst].payload).data;
+            std.mem.writeInt(u64, code.addManyAsArrayAssumeCapacity(8), float64.toInt(), .little);
+
+            inst += 1;
+            continue :loop tags[inst];
         },
-        .memory_fill => {
-            try leb128.writeUleb128(writer, @as(u32, 0)); // memory index
+        .i32_const => {
+            try code.ensureUnusedCapacity(gpa, 6);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i32_const));
+            leb.writeIleb128(code.fixedWriter(), datas[inst].imm32) catch unreachable;
+
+            inst += 1;
+            continue :loop tags[inst];
         },
-        .memory_copy => {
-            try leb128.writeUleb128(writer, @as(u32, 0)); // dst memory index
-            try leb128.writeUleb128(writer, @as(u32, 0)); // src memory index
+        .i64_const => {
+            try code.ensureUnusedCapacity(gpa, 11);
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.i64_const));
+            const int64: i64 = @bitCast(mir.extraData(Mir.Imm64, datas[inst].payload).data.toInt());
+            leb.writeIleb128(code.writer(), int64) catch unreachable;
+
+            inst += 1;
+            continue :loop tags[inst];
         },
 
-        // nontrapping-float-to-int-conversion opcodes
-        .i32_trunc_sat_f32_s,
-        .i32_trunc_sat_f32_u,
-        .i32_trunc_sat_f64_s,
-        .i32_trunc_sat_f64_u,
-        .i64_trunc_sat_f32_s,
-        .i64_trunc_sat_f32_u,
-        .i64_trunc_sat_f64_s,
-        .i64_trunc_sat_f64_u,
-        => {}, // opcode already written
-        else => |tag| return emit.fail("TODO: Implement extension instruction: {s}\n", .{@tagName(tag)}),
-    }
-}
-
-fn emitSimd(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const opcode = emit.mir.extra[extra_index];
-    const writer = emit.code.writer();
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.simd_prefix));
-    try leb128.writeUleb128(writer, opcode);
-    switch (@as(std.wasm.SimdOpcode, @enumFromInt(opcode))) {
-        .v128_store,
-        .v128_load,
-        .v128_load8_splat,
-        .v128_load16_splat,
-        .v128_load32_splat,
-        .v128_load64_splat,
-        => {
-            const mem_arg = emit.mir.extraData(Mir.MemArg, extra_index + 1).data;
-            try encodeMemArg(mem_arg, writer);
-        },
-        .v128_const,
-        .i8x16_shuffle,
+        .i32_load,
+        .i64_load,
+        .f32_load,
+        .f64_load,
+        .i32_load8_s,
+        .i32_load8_u,
+        .i32_load16_s,
+        .i32_load16_u,
+        .i64_load8_s,
+        .i64_load8_u,
+        .i64_load16_s,
+        .i64_load16_u,
+        .i64_load32_s,
+        .i64_load32_u,
+        .i32_store,
+        .i64_store,
+        .f32_store,
+        .f64_store,
+        .i32_store8,
+        .i32_store16,
+        .i64_store8,
+        .i64_store16,
+        .i64_store32,
         => {
-            const simd_value = emit.mir.extra[extra_index + 1 ..][0..4];
-            try writer.writeAll(std.mem.asBytes(simd_value));
+            try code.ensureUnusedCapacity(gpa, 1 + 20);
+            code.appendAssumeCapacity(@intFromEnum(tags[inst]));
+            encodeMemArg(code, mir.extraData(Mir.MemArg, datas[inst]).data);
+            inst += 1;
+            continue :loop tags[inst];
         },
-        .i8x16_extract_lane_s,
-        .i8x16_extract_lane_u,
-        .i8x16_replace_lane,
-        .i16x8_extract_lane_s,
-        .i16x8_extract_lane_u,
-        .i16x8_replace_lane,
-        .i32x4_extract_lane,
-        .i32x4_replace_lane,
-        .i64x2_extract_lane,
-        .i64x2_replace_lane,
-        .f32x4_extract_lane,
-        .f32x4_replace_lane,
-        .f64x2_extract_lane,
-        .f64x2_replace_lane,
+
+        .end,
+        .@"return",
+        .@"unreachable",
+        .select,
+        .i32_eqz,
+        .i32_eq,
+        .i32_ne,
+        .i32_lt_s,
+        .i32_lt_u,
+        .i32_gt_s,
+        .i32_gt_u,
+        .i32_le_s,
+        .i32_le_u,
+        .i32_ge_s,
+        .i32_ge_u,
+        .i64_eqz,
+        .i64_eq,
+        .i64_ne,
+        .i64_lt_s,
+        .i64_lt_u,
+        .i64_gt_s,
+        .i64_gt_u,
+        .i64_le_s,
+        .i64_le_u,
+        .i64_ge_s,
+        .i64_ge_u,
+        .f32_eq,
+        .f32_ne,
+        .f32_lt,
+        .f32_gt,
+        .f32_le,
+        .f32_ge,
+        .f64_eq,
+        .f64_ne,
+        .f64_lt,
+        .f64_gt,
+        .f64_le,
+        .f64_ge,
+        .i32_add,
+        .i32_sub,
+        .i32_mul,
+        .i32_div_s,
+        .i32_div_u,
+        .i32_and,
+        .i32_or,
+        .i32_xor,
+        .i32_shl,
+        .i32_shr_s,
+        .i32_shr_u,
+        .i64_add,
+        .i64_sub,
+        .i64_mul,
+        .i64_div_s,
+        .i64_div_u,
+        .i64_and,
+        .i64_or,
+        .i64_xor,
+        .i64_shl,
+        .i64_shr_s,
+        .i64_shr_u,
+        .f32_abs,
+        .f32_neg,
+        .f32_ceil,
+        .f32_floor,
+        .f32_trunc,
+        .f32_nearest,
+        .f32_sqrt,
+        .f32_add,
+        .f32_sub,
+        .f32_mul,
+        .f32_div,
+        .f32_min,
+        .f32_max,
+        .f32_copysign,
+        .f64_abs,
+        .f64_neg,
+        .f64_ceil,
+        .f64_floor,
+        .f64_trunc,
+        .f64_nearest,
+        .f64_sqrt,
+        .f64_add,
+        .f64_sub,
+        .f64_mul,
+        .f64_div,
+        .f64_min,
+        .f64_max,
+        .f64_copysign,
+        .i32_wrap_i64,
+        .i64_extend_i32_s,
+        .i64_extend_i32_u,
+        .i32_extend8_s,
+        .i32_extend16_s,
+        .i64_extend8_s,
+        .i64_extend16_s,
+        .i64_extend32_s,
+        .f32_demote_f64,
+        .f64_promote_f32,
+        .i32_reinterpret_f32,
+        .i64_reinterpret_f64,
+        .f32_reinterpret_i32,
+        .f64_reinterpret_i64,
+        .i32_trunc_f32_s,
+        .i32_trunc_f32_u,
+        .i32_trunc_f64_s,
+        .i32_trunc_f64_u,
+        .i64_trunc_f32_s,
+        .i64_trunc_f32_u,
+        .i64_trunc_f64_s,
+        .i64_trunc_f64_u,
+        .f32_convert_i32_s,
+        .f32_convert_i32_u,
+        .f32_convert_i64_s,
+        .f32_convert_i64_u,
+        .f64_convert_i32_s,
+        .f64_convert_i32_u,
+        .f64_convert_i64_s,
+        .f64_convert_i64_u,
+        .i32_rem_s,
+        .i32_rem_u,
+        .i64_rem_s,
+        .i64_rem_u,
+        .i32_popcnt,
+        .i64_popcnt,
+        .i32_clz,
+        .i32_ctz,
+        .i64_clz,
+        .i64_ctz,
         => {
-            try writer.writeByte(@as(u8, @intCast(emit.mir.extra[extra_index + 1])));
+            try code.append(gpa, @intFromEnum(tags[inst]));
+            inst += 1;
+            continue :loop tags[inst];
         },
-        .i8x16_splat,
-        .i16x8_splat,
-        .i32x4_splat,
-        .i64x2_splat,
-        .f32x4_splat,
-        .f64x2_splat,
-        => {}, // opcode already written
-        else => |tag| return emit.fail("TODO: Implement simd instruction: {s}", .{@tagName(tag)}),
-    }
-}
 
-fn emitAtomic(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const opcode = emit.mir.extra[extra_index];
-    const writer = emit.code.writer();
-    try emit.code.append(@intFromEnum(std.wasm.Opcode.atomics_prefix));
-    try leb128.writeUleb128(writer, opcode);
-    switch (@as(std.wasm.AtomicsOpcode, @enumFromInt(opcode))) {
-        .i32_atomic_load,
-        .i64_atomic_load,
-        .i32_atomic_load8_u,
-        .i32_atomic_load16_u,
-        .i64_atomic_load8_u,
-        .i64_atomic_load16_u,
-        .i64_atomic_load32_u,
-        .i32_atomic_store,
-        .i64_atomic_store,
-        .i32_atomic_store8,
-        .i32_atomic_store16,
-        .i64_atomic_store8,
-        .i64_atomic_store16,
-        .i64_atomic_store32,
-        .i32_atomic_rmw_add,
-        .i64_atomic_rmw_add,
-        .i32_atomic_rmw8_add_u,
-        .i32_atomic_rmw16_add_u,
-        .i64_atomic_rmw8_add_u,
-        .i64_atomic_rmw16_add_u,
-        .i64_atomic_rmw32_add_u,
-        .i32_atomic_rmw_sub,
-        .i64_atomic_rmw_sub,
-        .i32_atomic_rmw8_sub_u,
-        .i32_atomic_rmw16_sub_u,
-        .i64_atomic_rmw8_sub_u,
-        .i64_atomic_rmw16_sub_u,
-        .i64_atomic_rmw32_sub_u,
-        .i32_atomic_rmw_and,
-        .i64_atomic_rmw_and,
-        .i32_atomic_rmw8_and_u,
-        .i32_atomic_rmw16_and_u,
-        .i64_atomic_rmw8_and_u,
-        .i64_atomic_rmw16_and_u,
-        .i64_atomic_rmw32_and_u,
-        .i32_atomic_rmw_or,
-        .i64_atomic_rmw_or,
-        .i32_atomic_rmw8_or_u,
-        .i32_atomic_rmw16_or_u,
-        .i64_atomic_rmw8_or_u,
-        .i64_atomic_rmw16_or_u,
-        .i64_atomic_rmw32_or_u,
-        .i32_atomic_rmw_xor,
-        .i64_atomic_rmw_xor,
-        .i32_atomic_rmw8_xor_u,
-        .i32_atomic_rmw16_xor_u,
-        .i64_atomic_rmw8_xor_u,
-        .i64_atomic_rmw16_xor_u,
-        .i64_atomic_rmw32_xor_u,
-        .i32_atomic_rmw_xchg,
-        .i64_atomic_rmw_xchg,
-        .i32_atomic_rmw8_xchg_u,
-        .i32_atomic_rmw16_xchg_u,
-        .i64_atomic_rmw8_xchg_u,
-        .i64_atomic_rmw16_xchg_u,
-        .i64_atomic_rmw32_xchg_u,
-
-        .i32_atomic_rmw_cmpxchg,
-        .i64_atomic_rmw_cmpxchg,
-        .i32_atomic_rmw8_cmpxchg_u,
-        .i32_atomic_rmw16_cmpxchg_u,
-        .i64_atomic_rmw8_cmpxchg_u,
-        .i64_atomic_rmw16_cmpxchg_u,
-        .i64_atomic_rmw32_cmpxchg_u,
-        => {
-            const mem_arg = emit.mir.extraData(Mir.MemArg, extra_index + 1).data;
-            try encodeMemArg(mem_arg, writer);
+        .misc_prefix => {
+            try code.ensureUnusedCapacity(gpa, 6 + 6);
+            const extra_index = datas[inst].payload;
+            const opcode = mir.extra[extra_index];
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.misc_prefix));
+            leb.writeUleb128(code.fixedWriter(), opcode) catch unreachable;
+            switch (@as(std.wasm.MiscOpcode, @enumFromInt(opcode))) {
+                // bulk-memory opcodes
+                .data_drop => {
+                    const segment = mir.extra[extra_index + 1];
+                    leb.writeUleb128(code.fixedWriter(), segment) catch unreachable;
+
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                .memory_init => {
+                    const segment = mir.extra[extra_index + 1];
+                    leb.writeUleb128(code.fixedWriter(), segment) catch unreachable;
+                    leb.writeUleb128(code.fixedWriter(), @as(u32, 0)) catch unreachable; // memory index
+
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                .memory_fill => {
+                    leb.writeUleb128(code.fixedWriter(), @as(u32, 0)) catch unreachable; // memory index
+
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                .memory_copy => {
+                    leb.writeUleb128(code.fixedWriter(), @as(u32, 0)) catch unreachable; // dst memory index
+                    leb.writeUleb128(code.fixedWriter(), @as(u32, 0)) catch unreachable; // src memory index
+
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+
+                // nontrapping-float-to-int-conversion opcodes
+                .i32_trunc_sat_f32_s,
+                .i32_trunc_sat_f32_u,
+                .i32_trunc_sat_f64_s,
+                .i32_trunc_sat_f64_u,
+                .i64_trunc_sat_f32_s,
+                .i64_trunc_sat_f32_u,
+                .i64_trunc_sat_f64_s,
+                .i64_trunc_sat_f64_u,
+                => {
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+
+                _ => unreachable,
+            }
+            unreachable;
+        },
+        .simd_prefix => {
+            try code.ensureUnusedCapacity(gpa, 6 + 20);
+            const extra_index = mir.instructions.items(.data)[inst].payload;
+            const opcode = mir.extra[extra_index];
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.simd_prefix));
+            leb.writeUleb128(code.fixedWriter(), opcode) catch unreachable;
+            switch (@as(std.wasm.SimdOpcode, @enumFromInt(opcode))) {
+                .v128_store,
+                .v128_load,
+                .v128_load8_splat,
+                .v128_load16_splat,
+                .v128_load32_splat,
+                .v128_load64_splat,
+                => {
+                    encodeMemArg(code, mir.extraData(Mir.MemArg, extra_index + 1).data);
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                .v128_const, .i8x16_shuffle => {
+                    code.appendSliceAssumeCapacity(std.mem.asBytes(mir.extra[extra_index + 1 ..][0..4]));
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                .i8x16_extract_lane_s,
+                .i8x16_extract_lane_u,
+                .i8x16_replace_lane,
+                .i16x8_extract_lane_s,
+                .i16x8_extract_lane_u,
+                .i16x8_replace_lane,
+                .i32x4_extract_lane,
+                .i32x4_replace_lane,
+                .i64x2_extract_lane,
+                .i64x2_replace_lane,
+                .f32x4_extract_lane,
+                .f32x4_replace_lane,
+                .f64x2_extract_lane,
+                .f64x2_replace_lane,
+                => {
+                    code.appendAssumeCapacity(@intCast(mir.extra[extra_index + 1]));
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                .i8x16_splat,
+                .i16x8_splat,
+                .i32x4_splat,
+                .i64x2_splat,
+                .f32x4_splat,
+                .f64x2_splat,
+                => {
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                _ => unreachable,
+            }
+            unreachable;
         },
-        .atomic_fence => {
-            // TODO: When multi-memory proposal is accepted and implemented in the compiler,
-            // change this to (user-)specified index, rather than hardcode it to memory index 0.
-            const memory_index: u32 = 0;
-            try leb128.writeUleb128(writer, memory_index);
+        .atomics_prefix => {
+            try code.ensureUnusedCapacity(gpa, 6 + 20);
+
+            const extra_index = mir.instructions.items(.data)[inst].payload;
+            const opcode = mir.extra[extra_index];
+            code.appendAssumeCapacity(@intFromEnum(std.wasm.Opcode.atomics_prefix));
+            leb.writeUleb128(code.fixedWriter(), opcode) catch unreachable;
+            switch (@as(std.wasm.AtomicsOpcode, @enumFromInt(opcode))) {
+                .i32_atomic_load,
+                .i64_atomic_load,
+                .i32_atomic_load8_u,
+                .i32_atomic_load16_u,
+                .i64_atomic_load8_u,
+                .i64_atomic_load16_u,
+                .i64_atomic_load32_u,
+                .i32_atomic_store,
+                .i64_atomic_store,
+                .i32_atomic_store8,
+                .i32_atomic_store16,
+                .i64_atomic_store8,
+                .i64_atomic_store16,
+                .i64_atomic_store32,
+                .i32_atomic_rmw_add,
+                .i64_atomic_rmw_add,
+                .i32_atomic_rmw8_add_u,
+                .i32_atomic_rmw16_add_u,
+                .i64_atomic_rmw8_add_u,
+                .i64_atomic_rmw16_add_u,
+                .i64_atomic_rmw32_add_u,
+                .i32_atomic_rmw_sub,
+                .i64_atomic_rmw_sub,
+                .i32_atomic_rmw8_sub_u,
+                .i32_atomic_rmw16_sub_u,
+                .i64_atomic_rmw8_sub_u,
+                .i64_atomic_rmw16_sub_u,
+                .i64_atomic_rmw32_sub_u,
+                .i32_atomic_rmw_and,
+                .i64_atomic_rmw_and,
+                .i32_atomic_rmw8_and_u,
+                .i32_atomic_rmw16_and_u,
+                .i64_atomic_rmw8_and_u,
+                .i64_atomic_rmw16_and_u,
+                .i64_atomic_rmw32_and_u,
+                .i32_atomic_rmw_or,
+                .i64_atomic_rmw_or,
+                .i32_atomic_rmw8_or_u,
+                .i32_atomic_rmw16_or_u,
+                .i64_atomic_rmw8_or_u,
+                .i64_atomic_rmw16_or_u,
+                .i64_atomic_rmw32_or_u,
+                .i32_atomic_rmw_xor,
+                .i64_atomic_rmw_xor,
+                .i32_atomic_rmw8_xor_u,
+                .i32_atomic_rmw16_xor_u,
+                .i64_atomic_rmw8_xor_u,
+                .i64_atomic_rmw16_xor_u,
+                .i64_atomic_rmw32_xor_u,
+                .i32_atomic_rmw_xchg,
+                .i64_atomic_rmw_xchg,
+                .i32_atomic_rmw8_xchg_u,
+                .i32_atomic_rmw16_xchg_u,
+                .i64_atomic_rmw8_xchg_u,
+                .i64_atomic_rmw16_xchg_u,
+                .i64_atomic_rmw32_xchg_u,
+
+                .i32_atomic_rmw_cmpxchg,
+                .i64_atomic_rmw_cmpxchg,
+                .i32_atomic_rmw8_cmpxchg_u,
+                .i32_atomic_rmw16_cmpxchg_u,
+                .i64_atomic_rmw8_cmpxchg_u,
+                .i64_atomic_rmw16_cmpxchg_u,
+                .i64_atomic_rmw32_cmpxchg_u,
+                => {
+                    const mem_arg = mir.extraData(Mir.MemArg, extra_index + 1).data;
+                    encodeMemArg(code, mem_arg);
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+                .atomic_fence => {
+                    // Hard-codes memory index 0 since multi-memory proposal is
+                    // not yet accepted nor implemented.
+                    const memory_index: u32 = 0;
+                    leb.writeUleb128(code.fixedWriter(), memory_index) catch unreachable;
+                    inst += 1;
+                    continue :loop tags[inst];
+                },
+            }
+            unreachable;
         },
-        else => |tag| return emit.fail("TODO: Implement atomic instruction: {s}", .{@tagName(tag)}),
     }
+    unreachable;
 }
 
-fn emitMemFill(emit: *Emit) !void {
-    try emit.code.append(0xFC);
-    try emit.code.append(0x0B);
-    // When multi-memory proposal reaches phase 4, we
-    // can emit a different memory index here.
-    // For now we will always emit index 0.
-    try leb128.writeUleb128(emit.code.writer(), @as(u32, 0));
-}
-
-fn emitDbgLine(emit: *Emit, inst: Mir.Inst.Index) !void {
-    const extra_index = emit.mir.instructions.items(.data)[inst].payload;
-    const dbg_line = emit.mir.extraData(Mir.DbgLineColumn, extra_index).data;
-    try emit.dbgAdvancePCAndLine(dbg_line.line, dbg_line.column);
-}
-
-fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) !void {
-    if (emit.dbg_output != .dwarf) return;
-
-    const delta_line = @as(i32, @intCast(line)) - @as(i32, @intCast(emit.prev_di_line));
-    const delta_pc = emit.offset() - emit.prev_di_offset;
-    // TODO: This must emit a relocation to calculate the offset relative
-    // to the code section start.
-    try emit.dbg_output.dwarf.advancePCAndLine(delta_line, delta_pc);
-
-    emit.prev_di_line = line;
-    emit.prev_di_column = column;
-    emit.prev_di_offset = emit.offset();
-}
-
-fn emitDbgPrologueEnd(emit: *Emit) !void {
-    if (emit.dbg_output != .dwarf) return;
-
-    try emit.dbg_output.dwarf.setPrologueEnd();
-    try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+/// Assert 20 unused capacity.
+fn encodeMemArg(code: *std.ArrayListUnmanaged(u8), mem_arg: Mir.MemArg) void {
+    assert(code.unusedCapacitySlice().len >= 20);
+    // Wasm encodes alignment as power of 2, rather than natural alignment.
+    const encoded_alignment = @ctz(mem_arg.alignment);
+    leb.writeUleb128(code.fixedWriter(), encoded_alignment) catch unreachable;
+    leb.writeUleb128(code.fixedWriter(), mem_arg.offset) catch unreachable;
 }
 
-fn emitDbgEpilogueBegin(emit: *Emit) !void {
-    if (emit.dbg_output != .dwarf) return;
+fn uavRefOff(wasm: *link.File.Wasm, code: *std.ArrayListUnmanaged(u8), data: Mir.UavRefOff) !void {
+    const comp = wasm.base.comp;
+    const gpa = comp.gpa;
+    const target = comp.root_mod.resolved_target.result;
+    const is_wasm32 = target.cpu.arch == .wasm32;
+    const is_obj = comp.config.output_mode == .Obj;
+    const opcode: std.wasm.Opcode = if (is_wasm32) .i32_const else .i64_const;
+
+    try code.ensureUnusedCapacity(gpa, 11);
+    code.appendAssumeCapacity(@intFromEnum(opcode));
+
+    // If outputting an object file, this needs to be a relocation, since global
+    // constant data may be mixed with other object files in the final link.
+    if (is_obj) {
+        try wasm.out_relocs.append(gpa, .{
+            .offset = @intCast(code.items.len),
+            .index = try wasm.uavSymbolIndex(data.ip_index),
+            .tag = if (is_wasm32) .MEMORY_ADDR_LEB else .MEMORY_ADDR_LEB64,
+            .addend = data.offset,
+        });
+        code.appendNTimesAssumeCapacity(0, if (is_wasm32) 5 else 10);
+        return;
+    }
 
-    try emit.dbg_output.dwarf.setEpilogueBegin();
-    try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+    // When linking into the final binary, no relocation mechanism is necessary.
+    const addr: i64 = try wasm.uavAddr(data.ip_index);
+    leb.writeUleb128(code.fixedWriter(), addr + data.offset) catch unreachable;
 }
src/arch/wasm/Mir.zig
@@ -10,10 +10,12 @@ const Mir = @This();
 const InternPool = @import("../../InternPool.zig");
 const Wasm = @import("../../link/Wasm.zig");
 
+const builtin = @import("builtin");
 const std = @import("std");
+const assert = std.debug.assert;
 
-/// A struct of array that represents each individual wasm
-instructions: std.MultiArrayList(Inst).Slice,
+instruction_tags: []const Inst.Tag,
+instruction_datas: []const Inst.Data,
 /// A slice of indexes where the meaning of the data is determined by the
 /// `Inst.Tag` value.
 extra: []const u32,
@@ -28,13 +30,7 @@ pub const Inst = struct {
     /// The position of a given MIR isntruction with the instruction list.
     pub const Index = u32;
 
-    /// Contains all possible wasm opcodes the Zig compiler may emit
-    /// Rather than re-using std.wasm.Opcode, we only declare the opcodes
-    /// we need, and also use this possibility to document how to access
-    /// their payload.
-    ///
-    /// Note: Uses its actual opcode value representation to easily convert
-    /// to and from its binary representation.
+    /// Some tags match wasm opcode values to facilitate trivial lowering.
     pub const Tag = enum(u8) {
         /// Uses `nop`
         @"unreachable" = 0x00,
@@ -46,19 +42,27 @@ pub const Inst = struct {
         ///
         /// Type of the loop is given in data `block_type`
         loop = 0x03,
+        /// Lowers to an i32_const (wasm32) or i64_const (wasm64) which is the
+        /// memory address of an unnamed constant. When emitting an object
+        /// file, this adds a relocation.
+        ///
+        /// Data is `ip_index`.
+        uav_ref,
+        /// Lowers to an i32_const (wasm32) or i64_const (wasm64) which is the
+        /// memory address of an unnamed constant, offset by an integer value.
+        /// When emitting an object file, this adds a relocation.
+        ///
+        /// Data is `payload` pointing to a `UavRefOff`.
+        uav_ref_off,
         /// Inserts debug information about the current line and column
         /// of the source code
         ///
         /// Uses `payload` of which the payload type is `DbgLineColumn`
         dbg_line = 0x06,
-        /// Emits epilogue begin debug information
+        /// Emits epilogue begin debug information. Marks the end of the function.
         ///
         /// Uses `nop`
         dbg_epilogue_begin = 0x07,
-        /// Emits prologue end debug information
-        ///
-        /// Uses `nop`
-        dbg_prologue_end = 0x08,
         /// Represents the end of a function body or an initialization expression
         ///
         /// Payload is `nop`
@@ -80,13 +84,13 @@ pub const Inst = struct {
         ///
         /// Uses `nop`
         @"return" = 0x0F,
+        /// Calls a function using `nav_index`.
+        call_nav,
         /// Calls a function pointer by its function signature
         /// and index into the function table.
         ///
-        /// Uses `label`
+        /// Uses `func_ty`
         call_indirect = 0x11,
-        /// Calls a function using `nav_index`.
-        call_nav,
         /// Calls a function using `func_index`.
         call_func,
         /// Calls a function by its index.
@@ -94,9 +98,11 @@ pub const Inst = struct {
         /// The function is the auto-generated tag name function for the type
         /// provided in `ip_index`.
         call_tag_name,
-        /// Contains a symbol to a function pointer
-        /// uses `label`
+        /// Lowers to an i32_const containing the index of a function.
+        /// When emitting an object file, this adds a relocation.
+        /// Uses `ip_index`.
         function_index,
+
         /// Pops three values from the stack and pushes
         /// the first or second value dependent on the third value.
         /// Uses `tag`
@@ -115,15 +121,11 @@ pub const Inst = struct {
         ///
         /// Uses `label`
         local_tee = 0x22,
-        /// Loads a (mutable) global at given index onto the stack
+        /// Pops a value from the stack and sets the stack pointer global.
+        /// The value must be the same type as the stack pointer global.
         ///
-        /// Uses `label`
-        global_get = 0x23,
-        /// Pops a value from the stack and sets the global at given index.
-        /// Note: Both types must be equal and global must be marked mutable.
-        ///
-        /// Uses `label`.
-        global_set = 0x24,
+        /// Uses `tag` (no additional data).
+        global_set_sp,
         /// Loads a 32-bit integer from memory (data section) onto the stack
         /// Pops the value from the stack which represents the offset into memory.
         ///
@@ -259,19 +261,19 @@ pub const Inst = struct {
         /// Loads a 32-bit signed immediate value onto the stack
         ///
         /// Uses `imm32`
-        i32_const = 0x41,
+        i32_const,
         /// Loads a i64-bit signed immediate value onto the stack
         ///
         /// uses `payload` of type `Imm64`
-        i64_const = 0x42,
+        i64_const,
         /// Loads a 32-bit float value onto the stack.
         ///
         /// Uses `float32`
-        f32_const = 0x43,
+        f32_const,
         /// Loads a 64-bit float value onto the stack.
         ///
         /// Uses `payload` of type `Float64`
-        f64_const = 0x44,
+        f64_const,
         /// Uses `tag`
         i32_eqz = 0x45,
         /// Uses `tag`
@@ -525,25 +527,19 @@ pub const Inst = struct {
         ///
         /// The `data` field depends on the extension instruction and
         /// may contain additional data.
-        misc_prefix = 0xFC,
+        misc_prefix,
         /// The instruction consists of a simd opcode.
         /// The actual simd-opcode is found at payload's index.
         ///
         /// The `data` field depends on the simd instruction and
         /// may contain additional data.
-        simd_prefix = 0xFD,
+        simd_prefix,
         /// The instruction consists of an atomics opcode.
         /// The actual atomics-opcode is found at payload's index.
         ///
         /// The `data` field depends on the atomics instruction and
         /// may contain additional data.
         atomics_prefix = 0xFE,
-        /// Contains a symbol to a memory address
-        /// Uses `label`
-        ///
-        /// Note: This uses `0xFF` as value as it is unused and not reserved
-        /// by the wasm specification, making it safe to use.
-        memory_address = 0xFF,
 
         /// From a given wasm opcode, returns a MIR tag.
         pub fn fromOpcode(opcode: std.wasm.Opcode) Tag {
@@ -563,30 +559,38 @@ pub const Inst = struct {
         /// Uses no additional data
         tag: void,
         /// Contains the result type of a block
-        ///
-        /// Used by `block` and `loop`
         block_type: u8,
-        /// Contains an u32 index into a wasm section entry, such as a local.
-        /// Note: This is not an index to another instruction.
-        ///
-        /// Used by e.g. `local_get`, `local_set`, etc.
+        /// Label: Each structured control instruction introduces an implicit label.
+        /// Labels are targets for branch instructions that reference them with
+        /// label indices. Unlike with other index spaces, indexing of labels
+        /// is relative by nesting depth, that is, label 0 refers to the
+        /// innermost structured control instruction enclosing the referring
+        /// branch instruction, while increasing indices refer to those farther
+        /// out. Consequently, labels can only be referenced from within the
+        /// associated structured control instruction.
         label: u32,
+        /// Local: The index space for locals is only accessible inside a function and
+        /// includes the parameters of that function, which precede the local
+        /// variables.
+        local: u32,
         /// A 32-bit immediate value.
-        ///
-        /// Used by `i32_const`
         imm32: i32,
         /// A 32-bit float value
-        ///
-        /// Used by `f32_float`
         float32: f32,
         /// Index into `extra`. Meaning of what can be found there is context-dependent.
-        ///
-        /// Used by e.g. `br_table`
         payload: u32,
 
         ip_index: InternPool.Index,
         nav_index: InternPool.Nav.Index,
         func_index: Wasm.FunctionIndex,
+        func_ty: Wasm.FunctionType.Index,
+
+        comptime {
+            switch (builtin.mode) {
+                .Debug, .ReleaseSafe => {},
+                .ReleaseFast, .ReleaseSmall => assert(@sizeOf(Data) == 4),
+            }
+        }
     };
 };
 
@@ -616,28 +620,19 @@ pub const JumpTable = struct {
     length: u32,
 };
 
-/// Stores an unsigned 64bit integer
-/// into a 32bit most significant bits field
-/// and a 32bit least significant bits field.
-///
-/// This uses an unsigned integer rather than a signed integer
-/// as we can easily store those into `extra`
 pub const Imm64 = struct {
     msb: u32,
     lsb: u32,
 
-    pub fn fromU64(imm: u64) Imm64 {
+    pub fn init(full: u64) Imm64 {
         return .{
-            .msb = @as(u32, @truncate(imm >> 32)),
-            .lsb = @as(u32, @truncate(imm)),
+            .msb = @truncate(full >> 32),
+            .lsb = @truncate(full),
         };
     }
 
-    pub fn toU64(self: Imm64) u64 {
-        var result: u64 = 0;
-        result |= @as(u64, self.msb) << 32;
-        result |= @as(u64, self.lsb);
-        return result;
+    pub fn toInt(i: Imm64) u64 {
+        return (@as(u64, i.msb) << 32) | @as(u64, i.lsb);
     }
 };
 
@@ -645,23 +640,16 @@ pub const Float64 = struct {
     msb: u32,
     lsb: u32,
 
-    pub fn fromFloat64(float: f64) Float64 {
-        const tmp = @as(u64, @bitCast(float));
+    pub fn init(f: f64) Float64 {
+        const int: u64 = @bitCast(f);
         return .{
-            .msb = @as(u32, @truncate(tmp >> 32)),
-            .lsb = @as(u32, @truncate(tmp)),
+            .msb = @truncate(int >> 32),
+            .lsb = @truncate(int),
         };
     }
 
-    pub fn toF64(self: Float64) f64 {
-        @as(f64, @bitCast(self.toU64()));
-    }
-
-    pub fn toU64(self: Float64) u64 {
-        var result: u64 = 0;
-        result |= @as(u64, self.msb) << 32;
-        result |= @as(u64, self.lsb);
-        return result;
+    pub fn toInt(f: Float64) u64 {
+        return (@as(u64, f.msb) << 32) | @as(u64, f.lsb);
     }
 };
 
@@ -670,11 +658,9 @@ pub const MemArg = struct {
     alignment: u32,
 };
 
-/// Represents a memory address, which holds both the pointer
-/// or the parent pointer and the offset to it.
-pub const Memory = struct {
-    pointer: u32,
-    offset: u32,
+pub const UavRefOff = struct {
+    ip_index: InternPool.Index,
+    offset: i32,
 };
 
 /// Maps a source line with wasm bytecode
src/arch/x86_64/CodeGen.zig
@@ -1040,7 +1040,7 @@ pub fn generateLazy(
     emit.emitMir() catch |err| switch (err) {
         error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?),
         error.InvalidInstruction => return function.fail("failed to find a viable x86 instruction (Zig compiler bug)", .{}),
-        error.CannotEncode => return function.fail("failed to find encode x86 instruction (Zig compiler bug)", .{}),
+        error.CannotEncode => return function.fail("failed to encode x86 instruction (Zig compiler bug)", .{}),
         else => |e| return function.fail("failed to emit MIR: {s}", .{@errorName(e)}),
     };
 }
src/codegen/llvm.zig
@@ -1838,11 +1838,11 @@ pub const Object = struct {
         o: *Object,
         zcu: *Zcu,
         exported_value: InternPool.Index,
-        export_indices: []const u32,
+        export_indices: []const Zcu.Export.Index,
     ) link.File.UpdateExportsError!void {
         const gpa = zcu.gpa;
         const ip = &zcu.intern_pool;
-        const main_exp_name = try o.builder.strtabString(zcu.all_exports.items[export_indices[0]].opts.name.toSlice(ip));
+        const main_exp_name = try o.builder.strtabString(export_indices[0].ptr(zcu).opts.name.toSlice(ip));
         const global_index = i: {
             const gop = try o.uav_map.getOrPut(gpa, exported_value);
             if (gop.found_existing) {
@@ -1873,11 +1873,11 @@ pub const Object = struct {
         o: *Object,
         zcu: *Zcu,
         global_index: Builder.Global.Index,
-        export_indices: []const u32,
+        export_indices: []const Zcu.Export.Index,
     ) link.File.UpdateExportsError!void {
         const comp = zcu.comp;
         const ip = &zcu.intern_pool;
-        const first_export = zcu.all_exports.items[export_indices[0]];
+        const first_export = export_indices[0].ptr(zcu);
 
         // We will rename this global to have a name matching `first_export`.
         // Successive exports become aliases.
@@ -1934,7 +1934,7 @@ pub const Object = struct {
         // Until then we iterate over existing aliases and make them point
         // to the correct decl, or otherwise add a new alias. Old aliases are leaked.
         for (export_indices[1..]) |export_idx| {
-            const exp = zcu.all_exports.items[export_idx];
+            const exp = export_idx.ptr(zcu);
             const exp_name = try o.builder.strtabString(exp.opts.name.toSlice(ip));
             if (o.builder.getGlobal(exp_name)) |global| {
                 switch (global.ptrConst(&o.builder).kind) {
src/link/Elf/ZigObject.zig
@@ -1758,7 +1758,7 @@ pub fn updateExports(
             break :blk self.navs.getPtr(nav).?;
         },
         .uav => |uav| self.uavs.getPtr(uav) orelse blk: {
-            const first_exp = zcu.all_exports.items[export_indices[0]];
+            const first_exp = export_indices[0].ptr(zcu);
             const res = try self.lowerUav(elf_file, pt, uav, .none, first_exp.src);
             switch (res) {
                 .mcv => {},
@@ -1779,7 +1779,7 @@ pub fn updateExports(
     const esym_shndx = self.symtab.items(.shndx)[esym_index];
 
     for (export_indices) |export_idx| {
-        const exp = zcu.all_exports.items[export_idx];
+        const exp = export_idx.ptr(zcu);
         if (exp.opts.section.unwrap()) |section_name| {
             if (!section_name.eqlSlice(".text", &zcu.intern_pool)) {
                 try zcu.failed_exports.ensureUnusedCapacity(zcu.gpa, 1);
src/link/MachO/ZigObject.zig
@@ -1259,7 +1259,7 @@ pub fn updateExports(
             break :blk self.navs.getPtr(nav).?;
         },
         .uav => |uav| self.uavs.getPtr(uav) orelse blk: {
-            const first_exp = zcu.all_exports.items[export_indices[0]];
+            const first_exp = export_indices[0].ptr(zcu);
             const res = try self.lowerUav(macho_file, pt, uav, .none, first_exp.src);
             switch (res) {
                 .mcv => {},
@@ -1279,7 +1279,7 @@ pub fn updateExports(
     const nlist = self.symtab.items(.nlist)[nlist_idx];
 
     for (export_indices) |export_idx| {
-        const exp = zcu.all_exports.items[export_idx];
+        const exp = export_idx.ptr(zcu);
         if (exp.opts.section.unwrap()) |section_name| {
             if (!section_name.eqlSlice("__text", &zcu.intern_pool)) {
                 try zcu.failed_exports.ensureUnusedCapacity(zcu.gpa, 1);
src/link/C.zig
@@ -469,7 +469,7 @@ pub fn flushModule(self: *C, arena: Allocator, tid: Zcu.PerThread.Id, prog_node:
         defer export_names.deinit(gpa);
         try export_names.ensureTotalCapacity(gpa, @intCast(zcu.single_exports.count()));
         for (zcu.single_exports.values()) |export_index| {
-            export_names.putAssumeCapacity(zcu.all_exports.items[export_index].opts.name, {});
+            export_names.putAssumeCapacity(export_index.ptr(zcu).opts.name, {});
         }
         for (zcu.multi_exports.values()) |info| {
             try export_names.ensureUnusedCapacity(gpa, info.len);
src/link/Coff.zig
@@ -1478,7 +1478,7 @@ pub fn updateExports(
     coff: *Coff,
     pt: Zcu.PerThread,
     exported: Zcu.Exported,
-    export_indices: []const u32,
+    export_indices: []const Zcu.Export.Index,
 ) link.File.UpdateExportsError!void {
     if (build_options.skip_non_native and builtin.object_format != .coff) {
         @panic("Attempted to compile for object format that was disabled by build configuration");
@@ -1493,7 +1493,7 @@ pub fn updateExports(
         // Even in the case of LLVM, we need to notice certain exported symbols in order to
         // detect the default subsystem.
         for (export_indices) |export_idx| {
-            const exp = zcu.all_exports.items[export_idx];
+            const exp = export_idx.ptr(zcu);
             const exported_nav_index = switch (exp.exported) {
                 .nav => |nav| nav,
                 .uav => continue,
@@ -1536,7 +1536,7 @@ pub fn updateExports(
             break :blk coff.navs.getPtr(nav).?;
         },
         .uav => |uav| coff.uavs.getPtr(uav) orelse blk: {
-            const first_exp = zcu.all_exports.items[export_indices[0]];
+            const first_exp = export_indices[0].ptr(zcu);
             const res = try coff.lowerUav(pt, uav, .none, first_exp.src);
             switch (res) {
                 .mcv => {},
@@ -1555,7 +1555,7 @@ pub fn updateExports(
     const atom = coff.getAtom(atom_index);
 
     for (export_indices) |export_idx| {
-        const exp = zcu.all_exports.items[export_idx];
+        const exp = export_idx.ptr(zcu);
         log.debug("adding new export '{}'", .{exp.opts.name.fmt(&zcu.intern_pool)});
 
         if (exp.opts.section.toSlice(&zcu.intern_pool)) |section_name| {
src/link/Plan9.zig
@@ -345,6 +345,7 @@ fn putFn(self: *Plan9, nav_index: InternPool.Nav.Index, out: FnNavOutput) !void
         try a.writer().writeInt(u16, 1, .big);
 
         // getting the full file path
+        // TODO don't call getcwd here, that is inappropriate
         var buf: [std.fs.max_path_bytes]u8 = undefined;
         const full_path = try std.fs.path.join(arena, &.{
             file.mod.root.root_dir.path orelse try std.posix.getcwd(&buf),
@@ -415,7 +416,7 @@ pub fn updateFunc(
     };
     defer dbg_info_output.dbg_line.deinit();
 
-    const res = try codegen.generateFunction(
+    try codegen.generateFunction(
         &self.base,
         pt,
         zcu.navSrcLoc(func.owner_nav),
@@ -425,10 +426,7 @@ pub fn updateFunc(
         &code_buffer,
         .{ .plan9 = &dbg_info_output },
     );
-    const code = switch (res) {
-        .ok => try code_buffer.toOwnedSlice(),
-        .fail => |em| return zcu.failed_codegen.put(gpa, func.owner_nav, em),
-    };
+    const code = try code_buffer.toOwnedSlice();
     self.getAtomPtr(atom_idx).code = .{
         .code_ptr = null,
         .other = .{ .nav_index = func.owner_nav },
@@ -439,7 +437,9 @@ pub fn updateFunc(
         .start_line = dbg_info_output.start_line.?,
         .end_line = dbg_info_output.end_line,
     };
-    try self.putFn(func.owner_nav, out);
+    // The awkward error handling here is due to putFn calling `std.posix.getcwd` which it should not do.
+    self.putFn(func.owner_nav, out) catch |err|
+        return zcu.codegenFail(func.owner_nav, "failed to put fn: {s}", .{@errorName(err)});
     return self.updateFinish(pt, func.owner_nav);
 }
 
@@ -915,25 +915,25 @@ pub fn flushModule(
 }
 fn addNavExports(
     self: *Plan9,
-    mod: *Zcu,
+    zcu: *Zcu,
     nav_index: InternPool.Nav.Index,
-    export_indices: []const u32,
+    export_indices: []const Zcu.Export.Index,
 ) !void {
     const gpa = self.base.comp.gpa;
     const metadata = self.navs.getPtr(nav_index).?;
     const atom = self.getAtom(metadata.index);
 
     for (export_indices) |export_idx| {
-        const exp = mod.all_exports.items[export_idx];
-        const exp_name = exp.opts.name.toSlice(&mod.intern_pool);
+        const exp = export_idx.ptr(zcu);
+        const exp_name = exp.opts.name.toSlice(&zcu.intern_pool);
         // plan9 does not support custom sections
         if (exp.opts.section.unwrap()) |section_name| {
-            if (!section_name.eqlSlice(".text", &mod.intern_pool) and
-                !section_name.eqlSlice(".data", &mod.intern_pool))
+            if (!section_name.eqlSlice(".text", &zcu.intern_pool) and
+                !section_name.eqlSlice(".data", &zcu.intern_pool))
             {
-                try mod.failed_exports.put(mod.gpa, export_idx, try Zcu.ErrorMsg.create(
+                try zcu.failed_exports.put(zcu.gpa, export_idx, try Zcu.ErrorMsg.create(
                     gpa,
-                    mod.navSrcLoc(nav_index),
+                    zcu.navSrcLoc(nav_index),
                     "plan9 does not support extra sections",
                     .{},
                 ));
@@ -1252,7 +1252,7 @@ pub fn writeSyms(self: *Plan9, buf: *std.ArrayList(u8)) !void {
             try self.writeSym(writer, sym);
             if (self.nav_exports.get(nav_index)) |export_indices| {
                 for (export_indices) |export_idx| {
-                    const exp = zcu.all_exports.items[export_idx];
+                    const exp = export_idx.ptr(zcu);
                     if (nav_metadata.getExport(self, exp.opts.name.toSlice(ip))) |exp_i| {
                         try self.writeSym(writer, self.syms.items[exp_i]);
                     }
@@ -1291,7 +1291,7 @@ pub fn writeSyms(self: *Plan9, buf: *std.ArrayList(u8)) !void {
                 try self.writeSym(writer, sym);
                 if (self.nav_exports.get(nav_index)) |export_indices| {
                     for (export_indices) |export_idx| {
-                        const exp = zcu.all_exports.items[export_idx];
+                        const exp = export_idx.ptr(zcu);
                         if (nav_metadata.getExport(self, exp.opts.name.toSlice(ip))) |exp_i| {
                             const s = self.syms.items[exp_i];
                             if (mem.eql(u8, s.name, "_start"))
src/link/Wasm.zig
@@ -30,6 +30,7 @@ const log = std.log.scoped(.link);
 const mem = std.mem;
 
 const Air = @import("../Air.zig");
+const Mir = @import("../arch/wasm/Mir.zig");
 const CodeGen = @import("../arch/wasm/CodeGen.zig");
 const Compilation = @import("../Compilation.zig");
 const Dwarf = @import("Dwarf.zig");
@@ -151,6 +152,7 @@ dump_argv_list: std.ArrayListUnmanaged([]const u8),
 preloaded_strings: PreloadedStrings,
 
 navs: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, Nav) = .empty,
+zcu_funcs: std.AutoArrayHashMapUnmanaged(InternPool.Index, ZcuFunc) = .empty,
 nav_exports: std.AutoArrayHashMapUnmanaged(NavExport, Zcu.Export.Index) = .empty,
 uav_exports: std.AutoArrayHashMapUnmanaged(UavExport, Zcu.Export.Index) = .empty,
 imports: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, void) = .empty,
@@ -203,6 +205,13 @@ table_imports: std.AutoArrayHashMapUnmanaged(String, ObjectTableImportIndex) = .
 
 any_exports_updated: bool = true,
 
+/// All MIR instructions for all Zcu functions.
+mir_instructions: std.MultiArrayList(Mir.Inst) = .{},
+/// Corresponds to `mir_instructions`.
+mir_extra: std.ArrayListUnmanaged(u32) = .empty,
+/// All local types for all Zcu functions.
+all_zcu_locals: std.ArrayListUnmanaged(u8) = .empty,
+
 /// Index into `objects`.
 pub const ObjectIndex = enum(u32) {
     _,
@@ -439,6 +448,24 @@ pub const Nav = extern struct {
     };
 };
 
+pub const ZcuFunc = extern struct {
+    function: CodeGen.Function,
+
+    /// Index into `zcu_funcs`.
+    /// Note that swapRemove is sometimes performed on `zcu_funcs`.
+    pub const Index = enum(u32) {
+        _,
+
+        pub fn key(i: @This(), wasm: *const Wasm) *InternPool.Index {
+            return &wasm.zcu_funcs.keys()[@intFromEnum(i)];
+        }
+
+        pub fn value(i: @This(), wasm: *const Wasm) *ZcuFunc {
+            return &wasm.zcu_funcs.values()[@intFromEnum(i)];
+        }
+    };
+};
+
 pub const NavExport = extern struct {
     name: String,
     nav_index: InternPool.Nav.Index,
@@ -932,7 +959,7 @@ pub const ValtypeList = enum(u32) {
     }
 
     pub fn slice(index: ValtypeList, wasm: *const Wasm) []const std.wasm.Valtype {
-        return @bitCast(String.slice(@enumFromInt(@intFromEnum(index)), wasm));
+        return @ptrCast(String.slice(@enumFromInt(@intFromEnum(index)), wasm));
     }
 };
 
@@ -1430,12 +1457,17 @@ pub fn deinit(wasm: *Wasm) void {
     if (wasm.llvm_object) |llvm_object| llvm_object.deinit();
 
     wasm.navs.deinit(gpa);
+    wasm.zcu_funcs.deinit(gpa);
     wasm.nav_exports.deinit(gpa);
     wasm.uav_exports.deinit(gpa);
     wasm.imports.deinit(gpa);
 
     wasm.flush_buffer.deinit(gpa);
 
+    wasm.mir_instructions.deinit(gpa);
+    wasm.mir_extra.deinit(gpa);
+    wasm.all_zcu_locals.deinit(gpa);
+
     if (wasm.dwarf) |*dwarf| dwarf.deinit();
 
     wasm.object_function_imports.deinit(gpa);
@@ -1474,49 +1506,11 @@ pub fn updateFunc(wasm: *Wasm, pt: Zcu.PerThread, func_index: InternPool.Index,
     }
     if (wasm.llvm_object) |llvm_object| return llvm_object.updateFunc(pt, func_index, air, liveness);
 
-    const zcu = pt.zcu;
-    const gpa = zcu.gpa;
-    const func = pt.zcu.funcInfo(func_index);
-    const nav_index = func.owner_nav;
-
-    const code_start: u32 = @intCast(wasm.string_bytes.items.len);
-    const relocs_start: u32 = @intCast(wasm.relocations.len);
-    wasm.string_bytes_lock.lock();
-
     dev.check(.wasm_backend);
-    try CodeGen.generate(
-        &wasm.base,
-        pt,
-        zcu.navSrcLoc(nav_index),
-        func_index,
-        air,
-        liveness,
-        &wasm.string_bytes,
-        .none,
-    );
-
-    const code_len: u32 = @intCast(wasm.string_bytes.items.len - code_start);
-    const relocs_len: u32 = @intCast(wasm.relocations.len - relocs_start);
-    wasm.string_bytes_lock.unlock();
-
-    const code: Nav.Code = .{
-        .off = code_start,
-        .len = code_len,
-    };
 
-    const gop = try wasm.navs.getOrPut(gpa, nav_index);
-    if (gop.found_existing) {
-        @panic("TODO reuse these resources");
-    } else {
-        _ = wasm.imports.swapRemove(nav_index);
-    }
-    gop.value_ptr.* = .{
-        .code = code,
-        .relocs = .{
-            .off = relocs_start,
-            .len = relocs_len,
-        },
-    };
+    try wasm.zcu_funcs.put(pt.zcu.gpa, func_index, .{
+        .function = try CodeGen.function(wasm, pt, func_index, air, liveness),
+    });
 }
 
 // Generate code for the "Nav", storing it in memory to be later written to
@@ -1642,8 +1636,8 @@ pub fn updateExports(
         const exp = export_idx.ptr(zcu);
         const name = try wasm.internString(exp.opts.name.toSlice(ip));
         switch (exported) {
-            .nav => |nav_index| wasm.nav_exports.put(gpa, .{ .nav_index = nav_index, .name = name }, export_idx),
-            .uav => |uav_index| wasm.uav_exports.put(gpa, .{ .uav_index = uav_index, .name = name }, export_idx),
+            .nav => |nav_index| try wasm.nav_exports.put(gpa, .{ .nav_index = nav_index, .name = name }, export_idx),
+            .uav => |uav_index| try wasm.uav_exports.put(gpa, .{ .uav_index = uav_index, .name = name }, export_idx),
         }
     }
     wasm.any_exports_updated = true;
@@ -1713,9 +1707,9 @@ pub fn prelink(wasm: *Wasm, prog_node: std.Progress.Node) link.File.FlushError!v
                     continue;
                 }
             }
-            try wasm.missing_exports.put(exp_name_interned, {});
+            try missing_exports.put(gpa, exp_name_interned, {});
         }
-        wasm.missing_exports_init = try gpa.dupe(String, wasm.missing_exports.keys());
+        wasm.missing_exports_init = try gpa.dupe(String, missing_exports.keys());
     }
 
     if (wasm.entry_name.unwrap()) |entry_name| {
@@ -2347,14 +2341,6 @@ fn linkWithLLD(wasm: *Wasm, arena: Allocator, tid: Zcu.PerThread.Id, prog_node:
     }
 }
 
-/// Returns the symbol index of the error name table.
-///
-/// When the symbol does not yet exist, it will create a new one instead.
-pub fn getErrorTableSymbol(wasm: *Wasm, pt: Zcu.PerThread) !u32 {
-    const sym_index = try wasm.zig_object.?.getErrorTableSymbol(wasm, pt);
-    return @intFromEnum(sym_index);
-}
-
 fn defaultEntrySymbolName(
     preloaded_strings: *const PreloadedStrings,
     wasi_exec_model: std.builtin.WasiExecModel,