master
   1const std = @import("std");
   2const builtin = @import("builtin");
   3const Allocator = std.mem.Allocator;
   4const assert = std.debug.assert;
   5const testing = std.testing;
   6const mem = std.mem;
   7const log = std.log.scoped(.codegen);
   8
   9const CodeGen = @This();
  10const codegen = @import("../../codegen.zig");
  11const Zcu = @import("../../Zcu.zig");
  12const InternPool = @import("../../InternPool.zig");
  13const Decl = Zcu.Decl;
  14const Type = @import("../../Type.zig");
  15const Value = @import("../../Value.zig");
  16const Compilation = @import("../../Compilation.zig");
  17const link = @import("../../link.zig");
  18const Air = @import("../../Air.zig");
  19const Mir = @import("Mir.zig");
  20const abi = @import("../../codegen/wasm/abi.zig");
  21const Alignment = InternPool.Alignment;
  22const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
  23const errUnionErrorOffset = codegen.errUnionErrorOffset;
  24
  25const target_util = @import("../../target.zig");
  26const libcFloatPrefix = target_util.libcFloatPrefix;
  27const libcFloatSuffix = target_util.libcFloatSuffix;
  28const compilerRtFloatAbbrev = target_util.compilerRtFloatAbbrev;
  29const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;
  30
  31pub fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
  32    return comptime &.initMany(&.{
  33        .expand_intcast_safe,
  34        .expand_int_from_float_safe,
  35        .expand_int_from_float_optimized_safe,
  36        .expand_add_safe,
  37        .expand_sub_safe,
  38        .expand_mul_safe,
  39    });
  40}
  41
  42/// Reference to the function declaration the code
  43/// section belongs to
  44owner_nav: InternPool.Nav.Index,
  45/// Current block depth. Used to calculate the relative difference between a break
  46/// and block
  47block_depth: u32 = 0,
  48air: Air,
  49liveness: Air.Liveness,
  50gpa: mem.Allocator,
  51func_index: InternPool.Index,
  52/// Contains a list of current branches.
  53/// When we return from a branch, the branch will be popped from this list,
  54/// which means branches can only contain references from within its own branch,
  55/// or a branch higher (lower index) in the tree.
  56branches: std.ArrayList(Branch) = .empty,
  57/// Table to save `WValue`'s generated by an `Air.Inst`
  58// values: ValueTable,
  59/// Mapping from Air.Inst.Index to block ids
  60blocks: std.AutoArrayHashMapUnmanaged(Air.Inst.Index, struct {
  61    label: u32,
  62    value: WValue,
  63}) = .{},
  64/// Maps `loop` instructions to their label. `br` to here repeats the loop.
  65loops: std.AutoHashMapUnmanaged(Air.Inst.Index, u32) = .empty,
  66/// The index the next local generated will have
  67/// NOTE: arguments share the index with locals therefore the first variable
  68/// will have the index that comes after the last argument's index
  69local_index: u32,
  70/// The index of the current argument.
  71/// Used to track which argument is being referenced in `airArg`.
  72arg_index: u32 = 0,
  73/// List of simd128 immediates. Each value is stored as an array of bytes.
  74/// This list will only be populated for 128bit-simd values when the target features
  75/// are enabled also.
  76simd_immediates: std.ArrayList([16]u8) = .empty,
  77/// The Target we're emitting (used to call intInfo)
  78target: *const std.Target,
  79ptr_size: enum { wasm32, wasm64 },
  80pt: Zcu.PerThread,
  81/// List of MIR Instructions
  82mir_instructions: std.MultiArrayList(Mir.Inst),
  83/// Contains extra data for MIR
  84mir_extra: std.ArrayList(u32),
  85/// List of all locals' types generated throughout this declaration
  86/// used to emit locals count at start of 'code' section.
  87mir_locals: std.ArrayList(std.wasm.Valtype),
  88/// Set of all UAVs referenced by this function. Key is the UAV value, value is the alignment.
  89/// `.none` means naturally aligned. An explicit alignment is never less than the natural alignment.
  90mir_uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment),
  91/// Set of all functions whose address this function has taken and which therefore might be called
  92/// via a `call_indirect` function.
  93mir_indirect_function_set: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, void),
  94/// Set of all function types used by this function. These must be interned by the linker.
  95mir_func_tys: std.AutoArrayHashMapUnmanaged(InternPool.Index, void),
  96/// The number of `error_name_table_ref` instructions emitted.
  97error_name_table_ref_count: u32,
  98/// When a function is executing, we store the the current stack pointer's value within this local.
  99/// This value is then used to restore the stack pointer to the original value at the return of the function.
 100initial_stack_value: WValue = .none,
 101/// The current stack pointer subtracted with the stack size. From this value, we will calculate
 102/// all offsets of the stack values.
 103bottom_stack_value: WValue = .none,
 104/// Arguments of this function declaration
 105/// This will be set after `resolveCallingConventionValues`
 106args: []WValue,
 107/// This will only be `.none` if the function returns void, or returns an immediate.
 108/// When it returns a pointer to the stack, the `.local` tag will be active and must be populated
 109/// before this function returns its execution to the caller.
 110return_value: WValue,
 111/// The size of the stack this function occupies. In the function prologue
 112/// we will move the stack pointer by this number, forward aligned with the `stack_alignment`.
 113stack_size: u32 = 0,
 114/// The stack alignment, which is 16 bytes by default. This is specified by the
 115/// tool-conventions: https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md
 116/// and also what the llvm backend will emit.
 117/// However, local variables or the usage of `incoming_stack_alignment` in a `CallingConvention` can overwrite this default.
 118stack_alignment: Alignment = .@"16",
 119
 120// For each individual Wasm valtype we store a seperate free list which
 121// allows us to re-use locals that are no longer used. e.g. a temporary local.
 122/// A list of indexes which represents a local of valtype `i32`.
 123/// It is illegal to store a non-i32 valtype in this list.
 124free_locals_i32: std.ArrayList(u32) = .empty,
 125/// A list of indexes which represents a local of valtype `i64`.
 126/// It is illegal to store a non-i64 valtype in this list.
 127free_locals_i64: std.ArrayList(u32) = .empty,
 128/// A list of indexes which represents a local of valtype `f32`.
 129/// It is illegal to store a non-f32 valtype in this list.
 130free_locals_f32: std.ArrayList(u32) = .empty,
 131/// A list of indexes which represents a local of valtype `f64`.
 132/// It is illegal to store a non-f64 valtype in this list.
 133free_locals_f64: std.ArrayList(u32) = .empty,
 134/// A list of indexes which represents a local of valtype `v127`.
 135/// It is illegal to store a non-v128 valtype in this list.
 136free_locals_v128: std.ArrayList(u32) = .empty,
 137
 138/// When in debug mode, this tracks if no `finishAir` was missed.
 139/// Forgetting to call `finishAir` will cause the result to not be
 140/// stored in our `values` map and therefore cause bugs.
 141air_bookkeeping: @TypeOf(bookkeeping_init) = bookkeeping_init,
 142
 143/// Wasm Value, created when generating an instruction
 144const WValue = union(enum) {
 145    /// `WValue` which has been freed and may no longer hold
 146    /// any references.
 147    dead: void,
 148    /// May be referenced but is unused
 149    none: void,
 150    /// The value lives on top of the stack
 151    stack: void,
 152    /// Index of the local
 153    local: struct {
 154        /// Contains the index to the local
 155        value: u32,
 156        /// The amount of instructions referencing this `WValue`
 157        references: u32,
 158    },
 159    /// An immediate 32bit value
 160    imm32: u32,
 161    /// An immediate 64bit value
 162    imm64: u64,
 163    /// Index into the list of simd128 immediates. This `WValue` is
 164    /// only possible in very rare cases, therefore it would be
 165    /// a waste of memory to store the value in a 128 bit integer.
 166    imm128: u32,
 167    /// A constant 32bit float value
 168    float32: f32,
 169    /// A constant 64bit float value
 170    float64: f64,
 171    nav_ref: struct {
 172        nav_index: InternPool.Nav.Index,
 173        offset: i32 = 0,
 174    },
 175    uav_ref: struct {
 176        ip_index: InternPool.Index,
 177        offset: i32 = 0,
 178        orig_ptr_ty: InternPool.Index = .none,
 179    },
 180    /// Offset from the bottom of the virtual stack, with the offset
 181    /// pointing to where the value lives.
 182    stack_offset: struct {
 183        /// Contains the actual value of the offset
 184        value: u32,
 185        /// The amount of instructions referencing this `WValue`
 186        references: u32,
 187    },
 188
 189    /// Returns the offset from the bottom of the stack. This is useful when
 190    /// we use the load or store instruction to ensure we retrieve the value
 191    /// from the correct position, rather than the value that lives at the
 192    /// bottom of the stack. For instances where `WValue` is not `stack_value`
 193    /// this will return 0, which allows us to simply call this function for all
 194    /// loads and stores without requiring checks everywhere.
 195    fn offset(value: WValue) u32 {
 196        switch (value) {
 197            .stack_offset => |stack_offset| return stack_offset.value,
 198            .dead => unreachable,
 199            else => return 0,
 200        }
 201    }
 202
 203    /// Promotes a `WValue` to a local when given value is on top of the stack.
 204    /// When encountering a `local` or `stack_offset` this is essentially a no-op.
 205    /// All other tags are illegal.
 206    fn toLocal(value: WValue, gen: *CodeGen, ty: Type) InnerError!WValue {
 207        switch (value) {
 208            .stack => {
 209                const new_local = try gen.allocLocal(ty);
 210                try gen.addLocal(.local_set, new_local.local.value);
 211                return new_local;
 212            },
 213            .local, .stack_offset => return value,
 214            else => unreachable,
 215        }
 216    }
 217
 218    /// Marks a local as no longer being referenced and essentially allows
 219    /// us to re-use it somewhere else within the function.
 220    /// The valtype of the local is deducted by using the index of the given `WValue`.
 221    fn free(value: *WValue, gen: *CodeGen) void {
 222        if (value.* != .local) return;
 223        const local_value = value.local.value;
 224        const reserved = gen.args.len + @intFromBool(gen.return_value != .none);
 225        if (local_value < reserved + 2) return; // reserved locals may never be re-used. Also accounts for 2 stack locals.
 226
 227        const index = local_value - reserved;
 228        const valtype = gen.mir_locals.items[index];
 229        switch (valtype) {
 230            .i32 => gen.free_locals_i32.append(gen.gpa, local_value) catch return, // It's ok to fail any of those, a new local can be allocated instead
 231            .i64 => gen.free_locals_i64.append(gen.gpa, local_value) catch return,
 232            .f32 => gen.free_locals_f32.append(gen.gpa, local_value) catch return,
 233            .f64 => gen.free_locals_f64.append(gen.gpa, local_value) catch return,
 234            .v128 => gen.free_locals_v128.append(gen.gpa, local_value) catch return,
 235        }
 236        log.debug("freed local ({d}) of type {}", .{ local_value, valtype });
 237        value.* = .dead;
 238    }
 239};
 240
 241const Op = enum {
 242    @"unreachable",
 243    nop,
 244    block,
 245    loop,
 246    @"if",
 247    @"else",
 248    end,
 249    br,
 250    br_if,
 251    br_table,
 252    @"return",
 253    call,
 254    drop,
 255    select,
 256    global_get,
 257    global_set,
 258    load,
 259    store,
 260    memory_size,
 261    memory_grow,
 262    @"const",
 263    eqz,
 264    eq,
 265    ne,
 266    lt,
 267    gt,
 268    le,
 269    ge,
 270    clz,
 271    ctz,
 272    popcnt,
 273    add,
 274    sub,
 275    mul,
 276    div,
 277    rem,
 278    @"and",
 279    @"or",
 280    xor,
 281    shl,
 282    shr,
 283    rotl,
 284    rotr,
 285    abs,
 286    neg,
 287    ceil,
 288    floor,
 289    trunc,
 290    nearest,
 291    sqrt,
 292    min,
 293    max,
 294    copysign,
 295    wrap,
 296    convert,
 297    demote,
 298    promote,
 299    reinterpret,
 300    extend,
 301};
 302
 303const OpcodeBuildArguments = struct {
 304    /// First valtype in the opcode (usually represents the type of the output)
 305    valtype1: ?std.wasm.Valtype = null,
 306    /// The operation (e.g. call, unreachable, div, min, sqrt, etc.)
 307    op: Op,
 308    /// Width of the operation (e.g. 8 for i32_load8_s, 16 for i64_extend16_i32_s)
 309    width: ?u8 = null,
 310    /// Second valtype in the opcode name (usually represents the type of the input)
 311    valtype2: ?std.wasm.Valtype = null,
 312    /// Signedness of the op
 313    signedness: ?std.builtin.Signedness = null,
 314};
 315
 316/// TODO: deprecated, should be split up per tag.
 317fn buildOpcode(args: OpcodeBuildArguments) std.wasm.Opcode {
 318    switch (args.op) {
 319        .@"unreachable" => unreachable,
 320        .nop => unreachable,
 321        .block => unreachable,
 322        .loop => unreachable,
 323        .@"if" => unreachable,
 324        .@"else" => unreachable,
 325        .end => unreachable,
 326        .br => unreachable,
 327        .br_if => unreachable,
 328        .br_table => unreachable,
 329        .@"return" => unreachable,
 330        .call => unreachable,
 331        .drop => unreachable,
 332        .select => unreachable,
 333        .global_get => unreachable,
 334        .global_set => unreachable,
 335
 336        .load => if (args.width) |width| switch (width) {
 337            8 => switch (args.valtype1.?) {
 338                .i32 => if (args.signedness.? == .signed) return .i32_load8_s else return .i32_load8_u,
 339                .i64 => if (args.signedness.? == .signed) return .i64_load8_s else return .i64_load8_u,
 340                .f32, .f64, .v128 => unreachable,
 341            },
 342            16 => switch (args.valtype1.?) {
 343                .i32 => if (args.signedness.? == .signed) return .i32_load16_s else return .i32_load16_u,
 344                .i64 => if (args.signedness.? == .signed) return .i64_load16_s else return .i64_load16_u,
 345                .f32, .f64, .v128 => unreachable,
 346            },
 347            32 => switch (args.valtype1.?) {
 348                .i64 => if (args.signedness.? == .signed) return .i64_load32_s else return .i64_load32_u,
 349                .i32 => return .i32_load,
 350                .f32 => return .f32_load,
 351                .f64, .v128 => unreachable,
 352            },
 353            64 => switch (args.valtype1.?) {
 354                .i64 => return .i64_load,
 355                .f64 => return .f64_load,
 356                else => unreachable,
 357            },
 358            else => unreachable,
 359        } else switch (args.valtype1.?) {
 360            .i32 => return .i32_load,
 361            .i64 => return .i64_load,
 362            .f32 => return .f32_load,
 363            .f64 => return .f64_load,
 364            .v128 => unreachable, // handled independently
 365        },
 366        .store => if (args.width) |width| {
 367            switch (width) {
 368                8 => switch (args.valtype1.?) {
 369                    .i32 => return .i32_store8,
 370                    .i64 => return .i64_store8,
 371                    .f32, .f64, .v128 => unreachable,
 372                },
 373                16 => switch (args.valtype1.?) {
 374                    .i32 => return .i32_store16,
 375                    .i64 => return .i64_store16,
 376                    .f32, .f64, .v128 => unreachable,
 377                },
 378                32 => switch (args.valtype1.?) {
 379                    .i64 => return .i64_store32,
 380                    .i32 => return .i32_store,
 381                    .f32 => return .f32_store,
 382                    .f64, .v128 => unreachable,
 383                },
 384                64 => switch (args.valtype1.?) {
 385                    .i64 => return .i64_store,
 386                    .f64 => return .f64_store,
 387                    else => unreachable,
 388                },
 389                else => unreachable,
 390            }
 391        } else {
 392            switch (args.valtype1.?) {
 393                .i32 => return .i32_store,
 394                .i64 => return .i64_store,
 395                .f32 => return .f32_store,
 396                .f64 => return .f64_store,
 397                .v128 => unreachable, // handled independently
 398            }
 399        },
 400
 401        .memory_size => return .memory_size,
 402        .memory_grow => return .memory_grow,
 403
 404        .@"const" => switch (args.valtype1.?) {
 405            .i32 => return .i32_const,
 406            .i64 => return .i64_const,
 407            .f32 => return .f32_const,
 408            .f64 => return .f64_const,
 409            .v128 => unreachable, // handled independently
 410        },
 411
 412        .eqz => switch (args.valtype1.?) {
 413            .i32 => return .i32_eqz,
 414            .i64 => return .i64_eqz,
 415            .f32, .f64, .v128 => unreachable,
 416        },
 417        .eq => switch (args.valtype1.?) {
 418            .i32 => return .i32_eq,
 419            .i64 => return .i64_eq,
 420            .f32 => return .f32_eq,
 421            .f64 => return .f64_eq,
 422            .v128 => unreachable, // handled independently
 423        },
 424        .ne => switch (args.valtype1.?) {
 425            .i32 => return .i32_ne,
 426            .i64 => return .i64_ne,
 427            .f32 => return .f32_ne,
 428            .f64 => return .f64_ne,
 429            .v128 => unreachable, // handled independently
 430        },
 431
 432        .lt => switch (args.valtype1.?) {
 433            .i32 => if (args.signedness.? == .signed) return .i32_lt_s else return .i32_lt_u,
 434            .i64 => if (args.signedness.? == .signed) return .i64_lt_s else return .i64_lt_u,
 435            .f32 => return .f32_lt,
 436            .f64 => return .f64_lt,
 437            .v128 => unreachable, // handled independently
 438        },
 439        .gt => switch (args.valtype1.?) {
 440            .i32 => if (args.signedness.? == .signed) return .i32_gt_s else return .i32_gt_u,
 441            .i64 => if (args.signedness.? == .signed) return .i64_gt_s else return .i64_gt_u,
 442            .f32 => return .f32_gt,
 443            .f64 => return .f64_gt,
 444            .v128 => unreachable, // handled independently
 445        },
 446        .le => switch (args.valtype1.?) {
 447            .i32 => if (args.signedness.? == .signed) return .i32_le_s else return .i32_le_u,
 448            .i64 => if (args.signedness.? == .signed) return .i64_le_s else return .i64_le_u,
 449            .f32 => return .f32_le,
 450            .f64 => return .f64_le,
 451            .v128 => unreachable, // handled independently
 452        },
 453        .ge => switch (args.valtype1.?) {
 454            .i32 => if (args.signedness.? == .signed) return .i32_ge_s else return .i32_ge_u,
 455            .i64 => if (args.signedness.? == .signed) return .i64_ge_s else return .i64_ge_u,
 456            .f32 => return .f32_ge,
 457            .f64 => return .f64_ge,
 458            .v128 => unreachable, // handled independently
 459        },
 460
 461        .clz => switch (args.valtype1.?) {
 462            .i32 => return .i32_clz,
 463            .i64 => return .i64_clz,
 464            .f32, .f64 => unreachable,
 465            .v128 => unreachable, // handled independently
 466        },
 467        .ctz => switch (args.valtype1.?) {
 468            .i32 => return .i32_ctz,
 469            .i64 => return .i64_ctz,
 470            .f32, .f64 => unreachable,
 471            .v128 => unreachable, // handled independently
 472        },
 473        .popcnt => switch (args.valtype1.?) {
 474            .i32 => return .i32_popcnt,
 475            .i64 => return .i64_popcnt,
 476            .f32, .f64 => unreachable,
 477            .v128 => unreachable, // handled independently
 478        },
 479
 480        .add => switch (args.valtype1.?) {
 481            .i32 => return .i32_add,
 482            .i64 => return .i64_add,
 483            .f32 => return .f32_add,
 484            .f64 => return .f64_add,
 485            .v128 => unreachable, // handled independently
 486        },
 487        .sub => switch (args.valtype1.?) {
 488            .i32 => return .i32_sub,
 489            .i64 => return .i64_sub,
 490            .f32 => return .f32_sub,
 491            .f64 => return .f64_sub,
 492            .v128 => unreachable, // handled independently
 493        },
 494        .mul => switch (args.valtype1.?) {
 495            .i32 => return .i32_mul,
 496            .i64 => return .i64_mul,
 497            .f32 => return .f32_mul,
 498            .f64 => return .f64_mul,
 499            .v128 => unreachable, // handled independently
 500        },
 501
 502        .div => switch (args.valtype1.?) {
 503            .i32 => if (args.signedness.? == .signed) return .i32_div_s else return .i32_div_u,
 504            .i64 => if (args.signedness.? == .signed) return .i64_div_s else return .i64_div_u,
 505            .f32 => return .f32_div,
 506            .f64 => return .f64_div,
 507            .v128 => unreachable, // handled independently
 508        },
 509        .rem => switch (args.valtype1.?) {
 510            .i32 => if (args.signedness.? == .signed) return .i32_rem_s else return .i32_rem_u,
 511            .i64 => if (args.signedness.? == .signed) return .i64_rem_s else return .i64_rem_u,
 512            .f32, .f64 => unreachable,
 513            .v128 => unreachable, // handled independently
 514        },
 515
 516        .@"and" => switch (args.valtype1.?) {
 517            .i32 => return .i32_and,
 518            .i64 => return .i64_and,
 519            .f32, .f64 => unreachable,
 520            .v128 => unreachable, // handled independently
 521        },
 522        .@"or" => switch (args.valtype1.?) {
 523            .i32 => return .i32_or,
 524            .i64 => return .i64_or,
 525            .f32, .f64 => unreachable,
 526            .v128 => unreachable, // handled independently
 527        },
 528        .xor => switch (args.valtype1.?) {
 529            .i32 => return .i32_xor,
 530            .i64 => return .i64_xor,
 531            .f32, .f64 => unreachable,
 532            .v128 => unreachable, // handled independently
 533        },
 534
 535        .shl => switch (args.valtype1.?) {
 536            .i32 => return .i32_shl,
 537            .i64 => return .i64_shl,
 538            .f32, .f64 => unreachable,
 539            .v128 => unreachable, // handled independently
 540        },
 541        .shr => switch (args.valtype1.?) {
 542            .i32 => if (args.signedness.? == .signed) return .i32_shr_s else return .i32_shr_u,
 543            .i64 => if (args.signedness.? == .signed) return .i64_shr_s else return .i64_shr_u,
 544            .f32, .f64 => unreachable,
 545            .v128 => unreachable, // handled independently
 546        },
 547        .rotl => switch (args.valtype1.?) {
 548            .i32 => return .i32_rotl,
 549            .i64 => return .i64_rotl,
 550            .f32, .f64 => unreachable,
 551            .v128 => unreachable, // handled independently
 552        },
 553        .rotr => switch (args.valtype1.?) {
 554            .i32 => return .i32_rotr,
 555            .i64 => return .i64_rotr,
 556            .f32, .f64 => unreachable,
 557            .v128 => unreachable, // handled independently
 558        },
 559
 560        .abs => switch (args.valtype1.?) {
 561            .i32, .i64 => unreachable,
 562            .f32 => return .f32_abs,
 563            .f64 => return .f64_abs,
 564            .v128 => unreachable, // handled independently
 565        },
 566        .neg => switch (args.valtype1.?) {
 567            .i32, .i64 => unreachable,
 568            .f32 => return .f32_neg,
 569            .f64 => return .f64_neg,
 570            .v128 => unreachable, // handled independently
 571        },
 572        .ceil => switch (args.valtype1.?) {
 573            .i64 => unreachable,
 574            .i32 => return .f32_ceil, // when valtype is f16, we store it in i32.
 575            .f32 => return .f32_ceil,
 576            .f64 => return .f64_ceil,
 577            .v128 => unreachable, // handled independently
 578        },
 579        .floor => switch (args.valtype1.?) {
 580            .i64 => unreachable,
 581            .i32 => return .f32_floor, // when valtype is f16, we store it in i32.
 582            .f32 => return .f32_floor,
 583            .f64 => return .f64_floor,
 584            .v128 => unreachable, // handled independently
 585        },
 586        .trunc => switch (args.valtype1.?) {
 587            .i32 => if (args.valtype2) |valty| switch (valty) {
 588                .i32 => unreachable,
 589                .i64 => unreachable,
 590                .f32 => if (args.signedness.? == .signed) return .i32_trunc_f32_s else return .i32_trunc_f32_u,
 591                .f64 => if (args.signedness.? == .signed) return .i32_trunc_f64_s else return .i32_trunc_f64_u,
 592                .v128 => unreachable, // handled independently
 593            } else return .f32_trunc, // when no valtype2, it's an f16 instead which is stored in an i32.
 594            .i64 => switch (args.valtype2.?) {
 595                .i32 => unreachable,
 596                .i64 => unreachable,
 597                .f32 => if (args.signedness.? == .signed) return .i64_trunc_f32_s else return .i64_trunc_f32_u,
 598                .f64 => if (args.signedness.? == .signed) return .i64_trunc_f64_s else return .i64_trunc_f64_u,
 599                .v128 => unreachable, // handled independently
 600            },
 601            .f32 => return .f32_trunc,
 602            .f64 => return .f64_trunc,
 603            .v128 => unreachable, // handled independently
 604        },
 605        .nearest => switch (args.valtype1.?) {
 606            .i32, .i64 => unreachable,
 607            .f32 => return .f32_nearest,
 608            .f64 => return .f64_nearest,
 609            .v128 => unreachable, // handled independently
 610        },
 611        .sqrt => switch (args.valtype1.?) {
 612            .i32, .i64 => unreachable,
 613            .f32 => return .f32_sqrt,
 614            .f64 => return .f64_sqrt,
 615            .v128 => unreachable, // handled independently
 616        },
 617        .min => switch (args.valtype1.?) {
 618            .i32, .i64 => unreachable,
 619            .f32 => return .f32_min,
 620            .f64 => return .f64_min,
 621            .v128 => unreachable, // handled independently
 622        },
 623        .max => switch (args.valtype1.?) {
 624            .i32, .i64 => unreachable,
 625            .f32 => return .f32_max,
 626            .f64 => return .f64_max,
 627            .v128 => unreachable, // handled independently
 628        },
 629        .copysign => switch (args.valtype1.?) {
 630            .i32, .i64 => unreachable,
 631            .f32 => return .f32_copysign,
 632            .f64 => return .f64_copysign,
 633            .v128 => unreachable, // handled independently
 634        },
 635
 636        .wrap => switch (args.valtype1.?) {
 637            .i32 => switch (args.valtype2.?) {
 638                .i32 => unreachable,
 639                .i64 => return .i32_wrap_i64,
 640                .f32, .f64 => unreachable,
 641                .v128 => unreachable, // handled independently
 642            },
 643            .i64, .f32, .f64 => unreachable,
 644            .v128 => unreachable, // handled independently
 645        },
 646        .convert => switch (args.valtype1.?) {
 647            .i32, .i64 => unreachable,
 648            .f32 => switch (args.valtype2.?) {
 649                .i32 => if (args.signedness.? == .signed) return .f32_convert_i32_s else return .f32_convert_i32_u,
 650                .i64 => if (args.signedness.? == .signed) return .f32_convert_i64_s else return .f32_convert_i64_u,
 651                .f32, .f64 => unreachable,
 652                .v128 => unreachable, // handled independently
 653            },
 654            .f64 => switch (args.valtype2.?) {
 655                .i32 => if (args.signedness.? == .signed) return .f64_convert_i32_s else return .f64_convert_i32_u,
 656                .i64 => if (args.signedness.? == .signed) return .f64_convert_i64_s else return .f64_convert_i64_u,
 657                .f32, .f64 => unreachable,
 658                .v128 => unreachable, // handled independently
 659            },
 660            .v128 => unreachable, // handled independently
 661        },
 662        .demote => if (args.valtype1.? == .f32 and args.valtype2.? == .f64) return .f32_demote_f64 else unreachable,
 663        .promote => if (args.valtype1.? == .f64 and args.valtype2.? == .f32) return .f64_promote_f32 else unreachable,
 664        .reinterpret => switch (args.valtype1.?) {
 665            .i32 => if (args.valtype2.? == .f32) return .i32_reinterpret_f32 else unreachable,
 666            .i64 => if (args.valtype2.? == .f64) return .i64_reinterpret_f64 else unreachable,
 667            .f32 => if (args.valtype2.? == .i32) return .f32_reinterpret_i32 else unreachable,
 668            .f64 => if (args.valtype2.? == .i64) return .f64_reinterpret_i64 else unreachable,
 669            .v128 => unreachable, // handled independently
 670        },
 671        .extend => switch (args.valtype1.?) {
 672            .i32 => switch (args.width.?) {
 673                8 => if (args.signedness.? == .signed) return .i32_extend8_s else unreachable,
 674                16 => if (args.signedness.? == .signed) return .i32_extend16_s else unreachable,
 675                else => unreachable,
 676            },
 677            .i64 => switch (args.width.?) {
 678                8 => if (args.signedness.? == .signed) return .i64_extend8_s else unreachable,
 679                16 => if (args.signedness.? == .signed) return .i64_extend16_s else unreachable,
 680                32 => if (args.signedness.? == .signed) return .i64_extend32_s else unreachable,
 681                else => unreachable,
 682            },
 683            .f32, .f64 => unreachable,
 684            .v128 => unreachable, // handled independently
 685        },
 686    }
 687}
 688
 689test "Wasm - buildOpcode" {
 690    // Make sure buildOpcode is referenced, and test some examples
 691    const i32_const = buildOpcode(.{ .op = .@"const", .valtype1 = .i32 });
 692    const i64_extend32_s = buildOpcode(.{ .op = .extend, .valtype1 = .i64, .width = 32, .signedness = .signed });
 693    const f64_reinterpret_i64 = buildOpcode(.{ .op = .reinterpret, .valtype1 = .f64, .valtype2 = .i64 });
 694
 695    try testing.expectEqual(@as(std.wasm.Opcode, .i32_const), i32_const);
 696    try testing.expectEqual(@as(std.wasm.Opcode, .i64_extend32_s), i64_extend32_s);
 697    try testing.expectEqual(@as(std.wasm.Opcode, .f64_reinterpret_i64), f64_reinterpret_i64);
 698}
 699
 700/// Hashmap to store generated `WValue` for each `Air.Inst.Ref`
 701pub const ValueTable = std.AutoArrayHashMapUnmanaged(Air.Inst.Ref, WValue);
 702
 703const bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {};
 704
 705const InnerError = error{
 706    OutOfMemory,
 707    /// An error occurred when trying to lower AIR to MIR.
 708    CodegenFail,
 709    /// Compiler implementation could not handle a large integer.
 710    Overflow,
 711} || link.File.UpdateDebugInfoError;
 712
 713pub fn deinit(cg: *CodeGen) void {
 714    const gpa = cg.gpa;
 715    for (cg.branches.items) |*branch| branch.deinit(gpa);
 716    cg.branches.deinit(gpa);
 717    cg.blocks.deinit(gpa);
 718    cg.loops.deinit(gpa);
 719    cg.simd_immediates.deinit(gpa);
 720    cg.free_locals_i32.deinit(gpa);
 721    cg.free_locals_i64.deinit(gpa);
 722    cg.free_locals_f32.deinit(gpa);
 723    cg.free_locals_f64.deinit(gpa);
 724    cg.free_locals_v128.deinit(gpa);
 725    cg.mir_instructions.deinit(gpa);
 726    cg.mir_extra.deinit(gpa);
 727    cg.mir_locals.deinit(gpa);
 728    cg.mir_uavs.deinit(gpa);
 729    cg.mir_indirect_function_set.deinit(gpa);
 730    cg.mir_func_tys.deinit(gpa);
 731    cg.* = undefined;
 732}
 733
 734fn fail(cg: *CodeGen, comptime fmt: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } {
 735    const zcu = cg.pt.zcu;
 736    const func = zcu.funcInfo(cg.func_index);
 737    return zcu.codegenFail(func.owner_nav, fmt, args);
 738}
 739
 740/// Resolves the `WValue` for the given instruction `inst`
 741/// When the given instruction has a `Value`, it returns a constant instead
 742fn resolveInst(cg: *CodeGen, ref: Air.Inst.Ref) InnerError!WValue {
 743    var branch_index = cg.branches.items.len;
 744    while (branch_index > 0) : (branch_index -= 1) {
 745        const branch = cg.branches.items[branch_index - 1];
 746        if (branch.values.get(ref)) |value| {
 747            return value;
 748        }
 749    }
 750
 751    // when we did not find an existing instruction, it
 752    // means we must generate it from a constant.
 753    // We always store constants in the most outer branch as they must never
 754    // be removed. The most outer branch is always at index 0.
 755    const gop = try cg.branches.items[0].values.getOrPut(cg.gpa, ref);
 756    assert(!gop.found_existing);
 757
 758    const pt = cg.pt;
 759    const zcu = pt.zcu;
 760    const val = (try cg.air.value(ref, pt)).?;
 761    const ty = cg.typeOf(ref);
 762    if (!ty.hasRuntimeBitsIgnoreComptime(zcu) and !ty.isInt(zcu) and !ty.isError(zcu)) {
 763        gop.value_ptr.* = .none;
 764        return .none;
 765    }
 766
 767    // When we need to pass the value by reference (such as a struct), we will
 768    // leverage `generateSymbol` to lower the constant to bytes and emit it
 769    // to the 'rodata' section. We then return the index into the section as `WValue`.
 770    //
 771    // In the other cases, we will simply lower the constant to a value that fits
 772    // into a single local (such as a pointer, integer, bool, etc).
 773    const result: WValue = if (isByRef(ty, zcu, cg.target))
 774        .{ .uav_ref = .{ .ip_index = val.toIntern() } }
 775    else
 776        try cg.lowerConstant(val, ty);
 777
 778    gop.value_ptr.* = result;
 779    return result;
 780}
 781
 782fn resolveValue(cg: *CodeGen, val: Value) InnerError!WValue {
 783    const zcu = cg.pt.zcu;
 784    const ty = val.typeOf(zcu);
 785
 786    return if (isByRef(ty, zcu, cg.target))
 787        .{ .uav_ref = .{ .ip_index = val.toIntern() } }
 788    else
 789        try cg.lowerConstant(val, ty);
 790}
 791
 792/// NOTE: if result == .stack, it will be stored in .local
 793fn finishAir(cg: *CodeGen, inst: Air.Inst.Index, result: WValue, operands: []const Air.Inst.Ref) InnerError!void {
 794    assert(operands.len <= Air.Liveness.bpi - 1);
 795    var tomb_bits = cg.liveness.getTombBits(inst);
 796    for (operands) |operand| {
 797        const dies = @as(u1, @truncate(tomb_bits)) != 0;
 798        tomb_bits >>= 1;
 799        if (!dies) continue;
 800        processDeath(cg, operand);
 801    }
 802
 803    // results of `none` can never be referenced.
 804    if (result != .none) {
 805        const trackable_result = if (result != .stack)
 806            result
 807        else
 808            try result.toLocal(cg, cg.typeOfIndex(inst));
 809        const branch = cg.currentBranch();
 810        branch.values.putAssumeCapacityNoClobber(inst.toRef(), trackable_result);
 811    }
 812
 813    if (std.debug.runtime_safety) {
 814        cg.air_bookkeeping += 1;
 815    }
 816}
 817
 818const Branch = struct {
 819    values: ValueTable = .{},
 820
 821    fn deinit(branch: *Branch, gpa: Allocator) void {
 822        branch.values.deinit(gpa);
 823        branch.* = undefined;
 824    }
 825};
 826
 827inline fn currentBranch(cg: *CodeGen) *Branch {
 828    return &cg.branches.items[cg.branches.items.len - 1];
 829}
 830
 831const BigTomb = struct {
 832    gen: *CodeGen,
 833    inst: Air.Inst.Index,
 834    lbt: Air.Liveness.BigTomb,
 835
 836    fn feed(bt: *BigTomb, op_ref: Air.Inst.Ref) void {
 837        const dies = bt.lbt.feed();
 838        if (!dies) return;
 839        // This will be a nop for interned constants.
 840        processDeath(bt.gen, op_ref);
 841    }
 842
 843    fn finishAir(bt: *BigTomb, result: WValue) void {
 844        assert(result != .stack);
 845        if (result != .none) {
 846            bt.gen.currentBranch().values.putAssumeCapacityNoClobber(bt.inst.toRef(), result);
 847        }
 848
 849        if (std.debug.runtime_safety) {
 850            bt.gen.air_bookkeeping += 1;
 851        }
 852    }
 853};
 854
 855fn iterateBigTomb(cg: *CodeGen, inst: Air.Inst.Index, operand_count: usize) !BigTomb {
 856    try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, operand_count + 1);
 857    return BigTomb{
 858        .gen = cg,
 859        .inst = inst,
 860        .lbt = cg.liveness.iterateBigTomb(inst),
 861    };
 862}
 863
 864fn processDeath(cg: *CodeGen, ref: Air.Inst.Ref) void {
 865    if (ref.toIndex() == null) return;
 866    // Branches are currently only allowed to free locals allocated
 867    // within their own branch.
 868    // TODO: Upon branch consolidation free any locals if needed.
 869    const value = cg.currentBranch().values.getPtr(ref) orelse return;
 870    if (value.* != .local) return;
 871    const reserved_indexes = cg.args.len + @intFromBool(cg.return_value != .none);
 872    if (value.local.value < reserved_indexes) {
 873        return; // function arguments can never be re-used
 874    }
 875    log.debug("Decreasing reference for ref: %{d}, using local '{d}'", .{ @intFromEnum(ref.toIndex().?), value.local.value });
 876    value.local.references -= 1; // if this panics, a call to `reuseOperand` was forgotten by the developer
 877    if (value.local.references == 0) {
 878        value.free(cg);
 879    }
 880}
 881
 882fn addInst(cg: *CodeGen, inst: Mir.Inst) error{OutOfMemory}!void {
 883    try cg.mir_instructions.append(cg.gpa, inst);
 884}
 885
 886fn addTag(cg: *CodeGen, tag: Mir.Inst.Tag) error{OutOfMemory}!void {
 887    try cg.addInst(.{ .tag = tag, .data = .{ .tag = {} } });
 888}
 889
 890fn addExtended(cg: *CodeGen, opcode: std.wasm.MiscOpcode) error{OutOfMemory}!void {
 891    const extra_index: u32 = @intCast(cg.mir_extra.items.len);
 892    try cg.mir_extra.append(cg.gpa, @intFromEnum(opcode));
 893    try cg.addInst(.{ .tag = .misc_prefix, .data = .{ .payload = extra_index } });
 894}
 895
 896fn addLabel(cg: *CodeGen, tag: Mir.Inst.Tag, label: u32) error{OutOfMemory}!void {
 897    try cg.addInst(.{ .tag = tag, .data = .{ .label = label } });
 898}
 899
 900fn addLocal(cg: *CodeGen, tag: Mir.Inst.Tag, local: u32) error{OutOfMemory}!void {
 901    try cg.addInst(.{ .tag = tag, .data = .{ .local = local } });
 902}
 903
 904/// Accepts an unsigned 32bit integer rather than a signed integer to
 905/// prevent us from having to bitcast multiple times as most values
 906/// within codegen are represented as unsigned rather than signed.
 907fn addImm32(cg: *CodeGen, imm: u32) error{OutOfMemory}!void {
 908    try cg.addInst(.{ .tag = .i32_const, .data = .{ .imm32 = @bitCast(imm) } });
 909}
 910
 911/// Accepts an unsigned 64bit integer rather than a signed integer to
 912/// prevent us from having to bitcast multiple times as most values
 913/// within codegen are represented as unsigned rather than signed.
 914fn addImm64(cg: *CodeGen, imm: u64) error{OutOfMemory}!void {
 915    const extra_index = try cg.addExtra(Mir.Imm64.init(imm));
 916    try cg.addInst(.{ .tag = .i64_const, .data = .{ .payload = extra_index } });
 917}
 918
 919/// Accepts the index into the list of 128bit-immediates
 920fn addImm128(cg: *CodeGen, index: u32) error{OutOfMemory}!void {
 921    const simd_values = cg.simd_immediates.items[index];
 922    const extra_index: u32 = @intCast(cg.mir_extra.items.len);
 923    // tag + 128bit value
 924    try cg.mir_extra.ensureUnusedCapacity(cg.gpa, 5);
 925    cg.mir_extra.appendAssumeCapacity(@intFromEnum(std.wasm.SimdOpcode.v128_const));
 926    cg.mir_extra.appendSliceAssumeCapacity(@alignCast(mem.bytesAsSlice(u32, &simd_values)));
 927    try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
 928}
 929
 930fn addFloat64(cg: *CodeGen, float: f64) error{OutOfMemory}!void {
 931    const extra_index = try cg.addExtra(Mir.Float64.init(float));
 932    try cg.addInst(.{ .tag = .f64_const, .data = .{ .payload = extra_index } });
 933}
 934
 935/// Inserts an instruction to load/store from/to wasm's linear memory dependent on the given `tag`.
 936fn addMemArg(cg: *CodeGen, tag: Mir.Inst.Tag, mem_arg: Mir.MemArg) error{OutOfMemory}!void {
 937    const extra_index = try cg.addExtra(mem_arg);
 938    try cg.addInst(.{ .tag = tag, .data = .{ .payload = extra_index } });
 939}
 940
 941/// Inserts an instruction from the 'atomics' feature which accesses wasm's linear memory dependent on the
 942/// given `tag`.
 943fn addAtomicMemArg(cg: *CodeGen, tag: std.wasm.AtomicsOpcode, mem_arg: Mir.MemArg) error{OutOfMemory}!void {
 944    const extra_index = try cg.addExtra(@as(struct { val: u32 }, .{ .val = @intFromEnum(tag) }));
 945    _ = try cg.addExtra(mem_arg);
 946    try cg.addInst(.{ .tag = .atomics_prefix, .data = .{ .payload = extra_index } });
 947}
 948
 949/// Helper function to emit atomic mir opcodes.
 950fn addAtomicTag(cg: *CodeGen, tag: std.wasm.AtomicsOpcode) error{OutOfMemory}!void {
 951    const extra_index = try cg.addExtra(@as(struct { val: u32 }, .{ .val = @intFromEnum(tag) }));
 952    try cg.addInst(.{ .tag = .atomics_prefix, .data = .{ .payload = extra_index } });
 953}
 954
 955/// Appends entries to `mir_extra` based on the type of `extra`.
 956/// Returns the index into `mir_extra`
 957fn addExtra(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 {
 958    const fields = std.meta.fields(@TypeOf(extra));
 959    try cg.mir_extra.ensureUnusedCapacity(cg.gpa, fields.len);
 960    return cg.addExtraAssumeCapacity(extra);
 961}
 962
 963/// Appends entries to `mir_extra` based on the type of `extra`.
 964/// Returns the index into `mir_extra`
 965fn addExtraAssumeCapacity(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 {
 966    const fields = std.meta.fields(@TypeOf(extra));
 967    const result: u32 = @intCast(cg.mir_extra.items.len);
 968    inline for (fields) |field| {
 969        cg.mir_extra.appendAssumeCapacity(switch (field.type) {
 970            u32 => @field(extra, field.name),
 971            i32 => @bitCast(@field(extra, field.name)),
 972            InternPool.Index,
 973            InternPool.Nav.Index,
 974            => @intFromEnum(@field(extra, field.name)),
 975            else => |field_type| @compileError("Unsupported field type " ++ @typeName(field_type)),
 976        });
 977    }
 978    return result;
 979}
 980
 981/// For `std.builtin.CallingConvention.auto`.
 982pub fn typeToValtype(ty: Type, zcu: *const Zcu, target: *const std.Target) std.wasm.Valtype {
 983    const ip = &zcu.intern_pool;
 984    return switch (ty.zigTypeTag(zcu)) {
 985        .float => switch (ty.floatBits(target)) {
 986            16 => .i32, // stored/loaded as u16
 987            32 => .f32,
 988            64 => .f64,
 989            80, 128 => .i32,
 990            else => unreachable,
 991        },
 992        .int, .@"enum" => switch (ty.intInfo(zcu).bits) {
 993            0...32 => .i32,
 994            33...64 => .i64,
 995            else => .i32,
 996        },
 997        .@"struct" => blk: {
 998            if (zcu.typeToPackedStruct(ty)) |packed_struct| {
 999                const backing_int_ty = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip));
1000                break :blk typeToValtype(backing_int_ty, zcu, target);
1001            } else {
1002                break :blk .i32;
1003            }
1004        },
1005        .vector => switch (CodeGen.determineSimdStoreStrategy(ty, zcu, target)) {
1006            .direct => .v128,
1007            .unrolled => .i32,
1008        },
1009        .@"union" => switch (ty.containerLayout(zcu)) {
1010            .@"packed" => switch (ty.bitSize(zcu)) {
1011                0...32 => .i32,
1012                33...64 => .i64,
1013                else => .i32,
1014            },
1015            else => .i32,
1016        },
1017        else => .i32, // all represented as reference/immediate
1018    };
1019}
1020
1021/// Using a given `Type`, returns the corresponding wasm value type
1022/// Differently from `typeToValtype` this also allows `void` to create a block
1023/// with no return type
1024fn genBlockType(ty: Type, zcu: *const Zcu, target: *const std.Target) std.wasm.BlockType {
1025    return switch (ty.ip_index) {
1026        .void_type, .noreturn_type => .empty,
1027        else => .fromValtype(typeToValtype(ty, zcu, target)),
1028    };
1029}
1030
1031/// Writes the bytecode depending on the given `WValue` in `val`
1032fn emitWValue(cg: *CodeGen, value: WValue) InnerError!void {
1033    switch (value) {
1034        .dead => unreachable, // reference to free'd `WValue` (missing reuseOperand?)
1035        .none, .stack => {}, // no-op
1036        .local => |idx| try cg.addLocal(.local_get, idx.value),
1037        .imm32 => |val| try cg.addImm32(val),
1038        .imm64 => |val| try cg.addImm64(val),
1039        .imm128 => |val| try cg.addImm128(val),
1040        .float32 => |val| try cg.addInst(.{ .tag = .f32_const, .data = .{ .float32 = val } }),
1041        .float64 => |val| try cg.addFloat64(val),
1042        .nav_ref => |nav_ref| {
1043            const zcu = cg.pt.zcu;
1044            const ip = &zcu.intern_pool;
1045            if (ip.getNav(nav_ref.nav_index).isFn(ip)) {
1046                assert(nav_ref.offset == 0);
1047                try cg.mir_indirect_function_set.put(cg.gpa, nav_ref.nav_index, {});
1048                try cg.addInst(.{ .tag = .func_ref, .data = .{ .nav_index = nav_ref.nav_index } });
1049            } else if (nav_ref.offset == 0) {
1050                try cg.addInst(.{ .tag = .nav_ref, .data = .{ .nav_index = nav_ref.nav_index } });
1051            } else {
1052                try cg.addInst(.{
1053                    .tag = .nav_ref_off,
1054                    .data = .{
1055                        .payload = try cg.addExtra(Mir.NavRefOff{
1056                            .nav_index = nav_ref.nav_index,
1057                            .offset = nav_ref.offset,
1058                        }),
1059                    },
1060                });
1061            }
1062        },
1063        .uav_ref => |uav| {
1064            const zcu = cg.pt.zcu;
1065            const ip = &zcu.intern_pool;
1066            assert(!ip.isFunctionType(ip.typeOf(uav.ip_index)));
1067            const gop = try cg.mir_uavs.getOrPut(cg.gpa, uav.ip_index);
1068            const this_align: Alignment = a: {
1069                if (uav.orig_ptr_ty == .none) break :a .none;
1070                const ptr_type = ip.indexToKey(uav.orig_ptr_ty).ptr_type;
1071                const this_align = ptr_type.flags.alignment;
1072                if (this_align == .none) break :a .none;
1073                const abi_align = Type.fromInterned(ptr_type.child).abiAlignment(zcu);
1074                if (this_align.compare(.lte, abi_align)) break :a .none;
1075                break :a this_align;
1076            };
1077            if (!gop.found_existing or
1078                gop.value_ptr.* == .none or
1079                (this_align != .none and this_align.compare(.gt, gop.value_ptr.*)))
1080            {
1081                gop.value_ptr.* = this_align;
1082            }
1083            if (uav.offset == 0) {
1084                try cg.addInst(.{
1085                    .tag = .uav_ref,
1086                    .data = .{ .ip_index = uav.ip_index },
1087                });
1088            } else {
1089                try cg.addInst(.{
1090                    .tag = .uav_ref_off,
1091                    .data = .{ .payload = try cg.addExtra(@as(Mir.UavRefOff, .{
1092                        .value = uav.ip_index,
1093                        .offset = uav.offset,
1094                    })) },
1095                });
1096            }
1097        },
1098        .stack_offset => try cg.addLocal(.local_get, cg.bottom_stack_value.local.value), // caller must ensure to address the offset
1099    }
1100}
1101
1102/// If given a local or stack-offset, increases the reference count by 1.
1103/// The old `WValue` found at instruction `ref` is then replaced by the
1104/// modified `WValue` and returned. When given a non-local or non-stack-offset,
1105/// returns the given `operand` itfunc instead.
1106fn reuseOperand(cg: *CodeGen, ref: Air.Inst.Ref, operand: WValue) WValue {
1107    if (operand != .local and operand != .stack_offset) return operand;
1108    var new_value = operand;
1109    switch (new_value) {
1110        .local => |*local| local.references += 1,
1111        .stack_offset => |*stack_offset| stack_offset.references += 1,
1112        else => unreachable,
1113    }
1114    const old_value = cg.getResolvedInst(ref);
1115    old_value.* = new_value;
1116    return new_value;
1117}
1118
1119/// From a reference, returns its resolved `WValue`.
1120/// It's illegal to provide a `Air.Inst.Ref` that hasn't been resolved yet.
1121fn getResolvedInst(cg: *CodeGen, ref: Air.Inst.Ref) *WValue {
1122    var index = cg.branches.items.len;
1123    while (index > 0) : (index -= 1) {
1124        const branch = cg.branches.items[index - 1];
1125        if (branch.values.getPtr(ref)) |value| {
1126            return value;
1127        }
1128    }
1129    unreachable; // developer-error: This can only be called on resolved instructions. Use `resolveInst` instead.
1130}
1131
1132/// Creates one locals for a given `Type`.
1133/// Returns a corresponding `Wvalue` with `local` as active tag
1134fn allocLocal(cg: *CodeGen, ty: Type) InnerError!WValue {
1135    const zcu = cg.pt.zcu;
1136    const valtype = typeToValtype(ty, zcu, cg.target);
1137    const index_or_null = switch (valtype) {
1138        .i32 => cg.free_locals_i32.pop(),
1139        .i64 => cg.free_locals_i64.pop(),
1140        .f32 => cg.free_locals_f32.pop(),
1141        .f64 => cg.free_locals_f64.pop(),
1142        .v128 => cg.free_locals_v128.pop(),
1143    };
1144    if (index_or_null) |index| {
1145        log.debug("reusing local ({d}) of type {}", .{ index, valtype });
1146        return .{ .local = .{ .value = index, .references = 1 } };
1147    }
1148    log.debug("new local of type {}", .{valtype});
1149    return cg.ensureAllocLocal(ty);
1150}
1151
1152/// Ensures a new local will be created. This is useful when it's useful
1153/// to use a zero-initialized local.
1154fn ensureAllocLocal(cg: *CodeGen, ty: Type) InnerError!WValue {
1155    const zcu = cg.pt.zcu;
1156    try cg.mir_locals.append(cg.gpa, typeToValtype(ty, zcu, cg.target));
1157    const initial_index = cg.local_index;
1158    cg.local_index += 1;
1159    return .{ .local = .{ .value = initial_index, .references = 1 } };
1160}
1161
1162pub const Error = error{
1163    OutOfMemory,
1164    /// Compiler was asked to operate on a number larger than supported.
1165    Overflow,
1166    /// Indicates the error is already stored in Zcu `failed_codegen`.
1167    CodegenFail,
1168};
1169
1170pub fn generate(
1171    bin_file: *link.File,
1172    pt: Zcu.PerThread,
1173    src_loc: Zcu.LazySrcLoc,
1174    func_index: InternPool.Index,
1175    air: *const Air,
1176    liveness: *const ?Air.Liveness,
1177) Error!Mir {
1178    _ = src_loc;
1179    _ = bin_file;
1180    const zcu = pt.zcu;
1181    const gpa = zcu.gpa;
1182    const cg = zcu.funcInfo(func_index);
1183    const file_scope = zcu.navFileScope(cg.owner_nav);
1184    const target = &file_scope.mod.?.resolved_target.result;
1185    const fn_ty = zcu.navValue(cg.owner_nav).typeOf(zcu);
1186    const fn_info = zcu.typeToFunc(fn_ty).?;
1187    const ret_ty: Type = .fromInterned(fn_info.return_type);
1188    const any_returns = !firstParamSRet(fn_info.cc, ret_ty, zcu, target) and ret_ty.hasRuntimeBitsIgnoreComptime(zcu);
1189
1190    var cc_result = try resolveCallingConventionValues(zcu, fn_ty, target);
1191    defer cc_result.deinit(gpa);
1192
1193    var code_gen: CodeGen = .{
1194        .gpa = gpa,
1195        .pt = pt,
1196        .air = air.*,
1197        .liveness = liveness.*.?,
1198        .owner_nav = cg.owner_nav,
1199        .target = target,
1200        .ptr_size = switch (target.cpu.arch) {
1201            .wasm32 => .wasm32,
1202            .wasm64 => .wasm64,
1203            else => unreachable,
1204        },
1205        .func_index = func_index,
1206        .args = cc_result.args,
1207        .return_value = cc_result.return_value,
1208        .local_index = cc_result.local_index,
1209        .mir_instructions = .empty,
1210        .mir_extra = .empty,
1211        .mir_locals = .empty,
1212        .mir_uavs = .empty,
1213        .mir_indirect_function_set = .empty,
1214        .mir_func_tys = .empty,
1215        .error_name_table_ref_count = 0,
1216    };
1217    defer code_gen.deinit();
1218
1219    try code_gen.mir_func_tys.putNoClobber(gpa, fn_ty.toIntern(), {});
1220
1221    return generateInner(&code_gen, any_returns) catch |err| switch (err) {
1222        error.CodegenFail,
1223        error.OutOfMemory,
1224        error.Overflow,
1225        => |e| return e,
1226        else => |e| return code_gen.fail("failed to generate function: {s}", .{@errorName(e)}),
1227    };
1228}
1229
1230fn generateInner(cg: *CodeGen, any_returns: bool) InnerError!Mir {
1231    const zcu = cg.pt.zcu;
1232    try cg.branches.append(cg.gpa, .{});
1233    // clean up outer branch
1234    defer {
1235        var outer_branch = cg.branches.pop().?;
1236        outer_branch.deinit(cg.gpa);
1237        assert(cg.branches.items.len == 0); // missing branch merge
1238    }
1239    // Generate MIR for function body
1240    try cg.genBody(cg.air.getMainBody());
1241
1242    // In case we have a return value, but the last instruction is a noreturn (such as a while loop)
1243    // we emit an unreachable instruction to tell the stack validator that part will never be reached.
1244    if (any_returns and cg.air.instructions.len > 0) {
1245        const inst: Air.Inst.Index = @enumFromInt(cg.air.instructions.len - 1);
1246        const last_inst_ty = cg.typeOfIndex(inst);
1247        if (!last_inst_ty.hasRuntimeBitsIgnoreComptime(zcu) or last_inst_ty.isNoReturn(zcu)) {
1248            try cg.addTag(.@"unreachable");
1249        }
1250    }
1251    // End of function body
1252    try cg.addTag(.end);
1253    try cg.addTag(.dbg_epilogue_begin);
1254
1255    var mir: Mir = .{
1256        .instructions = cg.mir_instructions.toOwnedSlice(),
1257        .extra = &.{}, // fallible so assigned after errdefer
1258        .locals = &.{}, // fallible so assigned after errdefer
1259        .prologue = if (cg.initial_stack_value == .none) .none else .{
1260            .sp_local = cg.initial_stack_value.local.value,
1261            .flags = .{ .stack_alignment = cg.stack_alignment },
1262            .stack_size = cg.stack_size,
1263            .bottom_stack_local = cg.bottom_stack_value.local.value,
1264        },
1265        .uavs = cg.mir_uavs.move(),
1266        .indirect_function_set = cg.mir_indirect_function_set.move(),
1267        .func_tys = cg.mir_func_tys.move(),
1268        .error_name_table_ref_count = cg.error_name_table_ref_count,
1269    };
1270    errdefer mir.deinit(cg.gpa);
1271    mir.extra = try cg.mir_extra.toOwnedSlice(cg.gpa);
1272    mir.locals = try cg.mir_locals.toOwnedSlice(cg.gpa);
1273    return mir;
1274}
1275
1276const CallWValues = struct {
1277    args: []WValue,
1278    return_value: WValue,
1279    local_index: u32,
1280
1281    fn deinit(values: *CallWValues, gpa: Allocator) void {
1282        gpa.free(values.args);
1283        values.* = undefined;
1284    }
1285};
1286
1287fn resolveCallingConventionValues(
1288    zcu: *const Zcu,
1289    fn_ty: Type,
1290    target: *const std.Target,
1291) Allocator.Error!CallWValues {
1292    const gpa = zcu.gpa;
1293    const ip = &zcu.intern_pool;
1294    const fn_info = zcu.typeToFunc(fn_ty).?;
1295    const cc = fn_info.cc;
1296
1297    var result: CallWValues = .{
1298        .args = &.{},
1299        .return_value = .none,
1300        .local_index = 0,
1301    };
1302    if (cc == .naked) return result;
1303
1304    var args = std.array_list.Managed(WValue).init(gpa);
1305    defer args.deinit();
1306
1307    // Check if we store the result as a pointer to the stack rather than
1308    // by value
1309    if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, target)) {
1310        // the sret arg will be passed as first argument, therefore we
1311        // set the `return_value` before allocating locals for regular args.
1312        result.return_value = .{ .local = .{ .value = result.local_index, .references = 1 } };
1313        result.local_index += 1;
1314    }
1315
1316    switch (cc) {
1317        .auto => {
1318            for (fn_info.param_types.get(ip)) |ty| {
1319                if (!Type.fromInterned(ty).hasRuntimeBitsIgnoreComptime(zcu)) {
1320                    continue;
1321                }
1322
1323                try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
1324                result.local_index += 1;
1325            }
1326        },
1327        .wasm_mvp => {
1328            for (fn_info.param_types.get(ip)) |ty| {
1329                if (!Type.fromInterned(ty).hasRuntimeBitsIgnoreComptime(zcu)) {
1330                    continue;
1331                }
1332                switch (abi.classifyType(.fromInterned(ty), zcu)) {
1333                    .direct => |scalar_ty| if (!abi.lowerAsDoubleI64(scalar_ty, zcu)) {
1334                        try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
1335                        result.local_index += 1;
1336                    } else {
1337                        try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
1338                        try args.append(.{ .local = .{ .value = result.local_index + 1, .references = 1 } });
1339                        result.local_index += 2;
1340                    },
1341                    .indirect => {
1342                        try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
1343                        result.local_index += 1;
1344                    },
1345                }
1346            }
1347        },
1348        else => unreachable, // Frontend is responsible for emitting an error earlier.
1349    }
1350    result.args = try args.toOwnedSlice();
1351    return result;
1352}
1353
1354pub fn firstParamSRet(
1355    cc: std.builtin.CallingConvention,
1356    return_type: Type,
1357    zcu: *const Zcu,
1358    target: *const std.Target,
1359) bool {
1360    if (!return_type.hasRuntimeBitsIgnoreComptime(zcu)) return false;
1361    switch (cc) {
1362        .@"inline" => unreachable,
1363        .auto => return isByRef(return_type, zcu, target),
1364        .wasm_mvp => switch (abi.classifyType(return_type, zcu)) {
1365            .direct => |scalar_ty| return abi.lowerAsDoubleI64(scalar_ty, zcu),
1366            .indirect => return true,
1367        },
1368        else => return false,
1369    }
1370}
1371
1372/// Lowers a Zig type and its value based on a given calling convention to ensure
1373/// it matches the ABI.
1374fn lowerArg(cg: *CodeGen, cc: std.builtin.CallingConvention, ty: Type, value: WValue) !void {
1375    if (cc != .wasm_mvp) {
1376        return cg.lowerToStack(value);
1377    }
1378
1379    const zcu = cg.pt.zcu;
1380
1381    switch (abi.classifyType(ty, zcu)) {
1382        .direct => |scalar_type| if (!abi.lowerAsDoubleI64(scalar_type, zcu)) {
1383            if (!isByRef(ty, zcu, cg.target)) {
1384                return cg.lowerToStack(value);
1385            } else {
1386                switch (value) {
1387                    .nav_ref, .stack_offset => _ = try cg.load(value, scalar_type, 0),
1388                    .dead => unreachable,
1389                    else => try cg.emitWValue(value),
1390                }
1391            }
1392        } else {
1393            assert(ty.abiSize(zcu) == 16);
1394            // in this case we have an integer or float that must be lowered as 2 i64's.
1395            try cg.emitWValue(value);
1396            try cg.addMemArg(.i64_load, .{ .offset = value.offset(), .alignment = 8 });
1397            try cg.emitWValue(value);
1398            try cg.addMemArg(.i64_load, .{ .offset = value.offset() + 8, .alignment = 8 });
1399        },
1400        .indirect => return cg.lowerToStack(value),
1401    }
1402}
1403
1404/// Lowers a `WValue` to the stack. This means when the `value` results in
1405/// `.stack_offset` we calculate the pointer of this offset and use that.
1406/// The value is left on the stack, and not stored in any temporary.
1407fn lowerToStack(cg: *CodeGen, value: WValue) !void {
1408    switch (value) {
1409        .stack_offset => |offset| {
1410            try cg.emitWValue(value);
1411            if (offset.value > 0) {
1412                switch (cg.ptr_size) {
1413                    .wasm32 => {
1414                        try cg.addImm32(offset.value);
1415                        try cg.addTag(.i32_add);
1416                    },
1417                    .wasm64 => {
1418                        try cg.addImm64(offset.value);
1419                        try cg.addTag(.i64_add);
1420                    },
1421                }
1422            }
1423        },
1424        else => try cg.emitWValue(value),
1425    }
1426}
1427
1428/// Creates a local for the initial stack value
1429/// Asserts `initial_stack_value` is `.none`
1430fn initializeStack(cg: *CodeGen) !void {
1431    assert(cg.initial_stack_value == .none);
1432    // Reserve a local to store the current stack pointer
1433    // We can later use this local to set the stack pointer back to the value
1434    // we have stored here.
1435    cg.initial_stack_value = try cg.ensureAllocLocal(Type.usize);
1436    // Also reserve a local to store the bottom stack value
1437    cg.bottom_stack_value = try cg.ensureAllocLocal(Type.usize);
1438}
1439
1440/// Reads the stack pointer from `Context.initial_stack_value` and writes it
1441/// to the global stack pointer variable
1442fn restoreStackPointer(cg: *CodeGen) !void {
1443    // only restore the pointer if it was initialized
1444    if (cg.initial_stack_value == .none) return;
1445    // Get the original stack pointer's value
1446    try cg.emitWValue(cg.initial_stack_value);
1447
1448    try cg.addTag(.global_set_sp);
1449}
1450
1451/// From a given type, will create space on the virtual stack to store the value of such type.
1452/// This returns a `WValue` with its active tag set to `local`, containing the index to the local
1453/// that points to the position on the virtual stack. This function should be used instead of
1454/// moveStack unless a local was already created to store the pointer.
1455///
1456/// Asserts Type has codegenbits
1457fn allocStack(cg: *CodeGen, ty: Type) !WValue {
1458    const pt = cg.pt;
1459    const zcu = pt.zcu;
1460    assert(ty.hasRuntimeBitsIgnoreComptime(zcu));
1461    if (cg.initial_stack_value == .none) {
1462        try cg.initializeStack();
1463    }
1464
1465    const abi_size = std.math.cast(u32, ty.abiSize(zcu)) orelse {
1466        return cg.fail("Type {f} with ABI size of {d} exceeds stack frame size", .{
1467            ty.fmt(pt), ty.abiSize(zcu),
1468        });
1469    };
1470    const abi_align = ty.abiAlignment(zcu);
1471
1472    cg.stack_alignment = cg.stack_alignment.max(abi_align);
1473
1474    const offset: u32 = @intCast(abi_align.forward(cg.stack_size));
1475    defer cg.stack_size = offset + abi_size;
1476
1477    return .{ .stack_offset = .{ .value = offset, .references = 1 } };
1478}
1479
1480/// From a given AIR instruction generates a pointer to the stack where
1481/// the value of its type will live.
1482/// This is different from allocStack where this will use the pointer's alignment
1483/// if it is set, to ensure the stack alignment will be set correctly.
1484fn allocStackPtr(cg: *CodeGen, inst: Air.Inst.Index) !WValue {
1485    const pt = cg.pt;
1486    const zcu = pt.zcu;
1487    const ptr_ty = cg.typeOfIndex(inst);
1488    const pointee_ty = ptr_ty.childType(zcu);
1489
1490    if (cg.initial_stack_value == .none) {
1491        try cg.initializeStack();
1492    }
1493
1494    if (!pointee_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
1495        return cg.allocStack(Type.usize); // create a value containing just the stack pointer.
1496    }
1497
1498    const abi_alignment = ptr_ty.ptrAlignment(zcu);
1499    const abi_size = std.math.cast(u32, pointee_ty.abiSize(zcu)) orelse {
1500        return cg.fail("Type {f} with ABI size of {d} exceeds stack frame size", .{
1501            pointee_ty.fmt(pt), pointee_ty.abiSize(zcu),
1502        });
1503    };
1504    cg.stack_alignment = cg.stack_alignment.max(abi_alignment);
1505
1506    const offset: u32 = @intCast(abi_alignment.forward(cg.stack_size));
1507    defer cg.stack_size = offset + abi_size;
1508
1509    return .{ .stack_offset = .{ .value = offset, .references = 1 } };
1510}
1511
1512/// From given zig bitsize, returns the wasm bitsize
1513fn toWasmBits(bits: u16) ?u16 {
1514    return for ([_]u16{ 32, 64, 128 }) |wasm_bits| {
1515        if (bits <= wasm_bits) return wasm_bits;
1516    } else null;
1517}
1518
1519/// Performs a copy of bytes for a given type. Copying all bytes
1520/// from rhs to lhs.
1521fn memcpy(cg: *CodeGen, dst: WValue, src: WValue, len: WValue) !void {
1522    const len_known_neq_0 = switch (len) {
1523        .imm32 => |val| if (val != 0) true else return,
1524        .imm64 => |val| if (val != 0) true else return,
1525        else => false,
1526    };
1527    // When bulk_memory is enabled, we lower it to wasm's memcpy instruction.
1528    // If not, we lower it ourselves manually
1529    if (cg.target.cpu.has(.wasm, .bulk_memory)) {
1530        const len0_ok = cg.target.cpu.has(.wasm, .nontrapping_bulk_memory_len0);
1531        const emit_check = !(len0_ok or len_known_neq_0);
1532
1533        if (emit_check) {
1534            try cg.startBlock(.block, .empty);
1535
1536            // Even if `len` is zero, the spec requires an implementation to trap if `src + len` or
1537            // `dst + len` are out of memory bounds. This can easily happen in Zig in a case such
1538            // as:
1539            //
1540            // const dst: [*]u8 = undefined;
1541            // const src: [*]u8 = undefined;
1542            // var len: usize = runtime_zero();
1543            // @memcpy(dst[0..len], src[0..len]);
1544            //
1545            // So explicitly avoid using `memory.copy` in the `len == 0` case. Lovely design.
1546            try cg.emitWValue(len);
1547            try cg.addTag(.i32_eqz);
1548            try cg.addLabel(.br_if, 0);
1549        }
1550
1551        try cg.lowerToStack(dst);
1552        try cg.lowerToStack(src);
1553        try cg.emitWValue(len);
1554        try cg.addExtended(.memory_copy);
1555
1556        if (emit_check) {
1557            try cg.endBlock();
1558        }
1559
1560        return;
1561    }
1562
1563    // when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having
1564    // the loop during codegen, rather than inserting a runtime loop into the binary.
1565    switch (len) {
1566        .imm32, .imm64 => blk: {
1567            const length = switch (len) {
1568                .imm32 => |val| val,
1569                .imm64 => |val| val,
1570                else => unreachable,
1571            };
1572            // if the size (length) is more than 32 bytes, we use a runtime loop instead to prevent
1573            // binary size bloat.
1574            if (length > 32) break :blk;
1575            var offset: u32 = 0;
1576            const lhs_base = dst.offset();
1577            const rhs_base = src.offset();
1578            while (offset < length) : (offset += 1) {
1579                // get dst's address to store the result
1580                try cg.emitWValue(dst);
1581                // load byte from src's address
1582                try cg.emitWValue(src);
1583                switch (cg.ptr_size) {
1584                    .wasm32 => {
1585                        try cg.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
1586                        try cg.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
1587                    },
1588                    .wasm64 => {
1589                        try cg.addMemArg(.i64_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
1590                        try cg.addMemArg(.i64_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
1591                    },
1592                }
1593            }
1594            return;
1595        },
1596        else => {},
1597    }
1598
1599    // allocate a local for the offset, and set it to 0.
1600    // This to ensure that inside loops we correctly re-set the counter.
1601    var offset = try cg.allocLocal(Type.usize); // local for counter
1602    defer offset.free(cg);
1603    switch (cg.ptr_size) {
1604        .wasm32 => try cg.addImm32(0),
1605        .wasm64 => try cg.addImm64(0),
1606    }
1607    try cg.addLocal(.local_set, offset.local.value);
1608
1609    // outer block to jump to when loop is done
1610    try cg.startBlock(.block, .empty);
1611    try cg.startBlock(.loop, .empty);
1612
1613    // loop condition (offset == length -> break)
1614    {
1615        try cg.emitWValue(offset);
1616        try cg.emitWValue(len);
1617        switch (cg.ptr_size) {
1618            .wasm32 => try cg.addTag(.i32_eq),
1619            .wasm64 => try cg.addTag(.i64_eq),
1620        }
1621        try cg.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
1622    }
1623
1624    // get dst ptr
1625    {
1626        try cg.emitWValue(dst);
1627        try cg.emitWValue(offset);
1628        switch (cg.ptr_size) {
1629            .wasm32 => try cg.addTag(.i32_add),
1630            .wasm64 => try cg.addTag(.i64_add),
1631        }
1632    }
1633
1634    // get src value and also store in dst
1635    {
1636        try cg.emitWValue(src);
1637        try cg.emitWValue(offset);
1638        switch (cg.ptr_size) {
1639            .wasm32 => {
1640                try cg.addTag(.i32_add);
1641                try cg.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 });
1642                try cg.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 });
1643            },
1644            .wasm64 => {
1645                try cg.addTag(.i64_add);
1646                try cg.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 });
1647                try cg.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 });
1648            },
1649        }
1650    }
1651
1652    // increment loop counter
1653    {
1654        try cg.emitWValue(offset);
1655        switch (cg.ptr_size) {
1656            .wasm32 => {
1657                try cg.addImm32(1);
1658                try cg.addTag(.i32_add);
1659            },
1660            .wasm64 => {
1661                try cg.addImm64(1);
1662                try cg.addTag(.i64_add);
1663            },
1664        }
1665        try cg.addLocal(.local_set, offset.local.value);
1666        try cg.addLabel(.br, 0); // jump to start of loop
1667    }
1668    try cg.endBlock(); // close off loop block
1669    try cg.endBlock(); // close off outer block
1670}
1671
1672fn ptrSize(cg: *const CodeGen) u16 {
1673    return @divExact(cg.target.ptrBitWidth(), 8);
1674}
1675
1676/// For a given `Type`, will return true when the type will be passed
1677/// by reference, rather than by value
1678fn isByRef(ty: Type, zcu: *const Zcu, target: *const std.Target) bool {
1679    const ip = &zcu.intern_pool;
1680    switch (ty.zigTypeTag(zcu)) {
1681        .type,
1682        .comptime_int,
1683        .comptime_float,
1684        .enum_literal,
1685        .undefined,
1686        .null,
1687        .@"opaque",
1688        => unreachable,
1689
1690        .noreturn,
1691        .void,
1692        .bool,
1693        .error_set,
1694        .@"fn",
1695        .@"anyframe",
1696        => return false,
1697
1698        .array,
1699        .frame,
1700        => return ty.hasRuntimeBitsIgnoreComptime(zcu),
1701        .@"union" => {
1702            if (zcu.typeToUnion(ty)) |union_obj| {
1703                if (union_obj.flagsUnordered(ip).layout == .@"packed") {
1704                    return ty.abiSize(zcu) > 8;
1705                }
1706            }
1707            return ty.hasRuntimeBitsIgnoreComptime(zcu);
1708        },
1709        .@"struct" => {
1710            if (zcu.typeToPackedStruct(ty)) |packed_struct| {
1711                return isByRef(Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)), zcu, target);
1712            }
1713            return ty.hasRuntimeBitsIgnoreComptime(zcu);
1714        },
1715        .vector => return determineSimdStoreStrategy(ty, zcu, target) == .unrolled,
1716        .int => return ty.intInfo(zcu).bits > 64,
1717        .@"enum" => return ty.intInfo(zcu).bits > 64,
1718        .float => return ty.floatBits(target) > 64,
1719        .error_union => {
1720            const pl_ty = ty.errorUnionPayload(zcu);
1721            if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
1722                return false;
1723            }
1724            return true;
1725        },
1726        .optional => {
1727            if (ty.isPtrLikeOptional(zcu)) return false;
1728            const pl_type = ty.optionalChild(zcu);
1729            if (pl_type.zigTypeTag(zcu) == .error_set) return false;
1730            return pl_type.hasRuntimeBitsIgnoreComptime(zcu);
1731        },
1732        .pointer => {
1733            // Slices act like struct and will be passed by reference
1734            if (ty.isSlice(zcu)) return true;
1735            return false;
1736        },
1737    }
1738}
1739
1740const SimdStoreStrategy = enum {
1741    direct,
1742    unrolled,
1743};
1744
1745/// For a given vector type, returns the `SimdStoreStrategy`.
1746/// This means when a given type is 128 bits and either the simd128 or relaxed-simd
1747/// features are enabled, the function will return `.direct`. This would allow to store
1748/// it using a instruction, rather than an unrolled version.
1749pub fn determineSimdStoreStrategy(ty: Type, zcu: *const Zcu, target: *const std.Target) SimdStoreStrategy {
1750    assert(ty.zigTypeTag(zcu) == .vector);
1751    if (ty.bitSize(zcu) != 128) return .unrolled;
1752    if (target.cpu.has(.wasm, .relaxed_simd) or target.cpu.has(.wasm, .simd128)) {
1753        return .direct;
1754    }
1755    return .unrolled;
1756}
1757
1758/// Creates a new local for a pointer that points to memory with given offset.
1759/// This can be used to get a pointer to a struct field, error payload, etc.
1760/// By providing `modify` as action, it will modify the given `ptr_value` instead of making a new
1761/// local value to store the pointer. This allows for local re-use and improves binary size.
1762fn buildPointerOffset(cg: *CodeGen, ptr_value: WValue, offset: u64, action: enum { modify, new }) InnerError!WValue {
1763    // do not perform arithmetic when offset is 0.
1764    if (offset == 0 and ptr_value.offset() == 0 and action == .modify) return ptr_value;
1765    const result_ptr: WValue = switch (action) {
1766        .new => try cg.ensureAllocLocal(Type.usize),
1767        .modify => ptr_value,
1768    };
1769    try cg.emitWValue(ptr_value);
1770    if (offset + ptr_value.offset() > 0) {
1771        switch (cg.ptr_size) {
1772            .wasm32 => {
1773                try cg.addImm32(@intCast(offset + ptr_value.offset()));
1774                try cg.addTag(.i32_add);
1775            },
1776            .wasm64 => {
1777                try cg.addImm64(offset + ptr_value.offset());
1778                try cg.addTag(.i64_add);
1779            },
1780        }
1781    }
1782    try cg.addLocal(.local_set, result_ptr.local.value);
1783    return result_ptr;
1784}
1785
1786fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
1787    const air_tags = cg.air.instructions.items(.tag);
1788    return switch (air_tags[@intFromEnum(inst)]) {
1789        // No "scalarize" legalizations are enabled, so these instructions never appear.
1790        .legalize_vec_elem_val => unreachable,
1791        .legalize_vec_store_elem => unreachable,
1792        // No soft float legalizations are enabled.
1793        .legalize_compiler_rt_call => unreachable,
1794
1795        .inferred_alloc, .inferred_alloc_comptime => unreachable,
1796
1797        .add => cg.airBinOp(inst, .add),
1798        .add_sat => cg.airSatBinOp(inst, .add),
1799        .add_wrap => cg.airWrapBinOp(inst, .add),
1800        .sub => cg.airBinOp(inst, .sub),
1801        .sub_sat => cg.airSatBinOp(inst, .sub),
1802        .sub_wrap => cg.airWrapBinOp(inst, .sub),
1803        .mul => cg.airBinOp(inst, .mul),
1804        .mul_sat => cg.airSatMul(inst),
1805        .mul_wrap => cg.airWrapBinOp(inst, .mul),
1806        .div_float, .div_exact => cg.airDiv(inst),
1807        .div_trunc => cg.airDivTrunc(inst),
1808        .div_floor => cg.airDivFloor(inst),
1809        .bit_and => cg.airBinOp(inst, .@"and"),
1810        .bit_or => cg.airBinOp(inst, .@"or"),
1811        .bool_and => cg.airBinOp(inst, .@"and"),
1812        .bool_or => cg.airBinOp(inst, .@"or"),
1813        .rem => cg.airRem(inst),
1814        .mod => cg.airMod(inst),
1815        .shl => cg.airWrapBinOp(inst, .shl),
1816        .shl_exact => cg.airBinOp(inst, .shl),
1817        .shl_sat => cg.airShlSat(inst),
1818        .shr, .shr_exact => cg.airBinOp(inst, .shr),
1819        .xor => cg.airBinOp(inst, .xor),
1820        .max => cg.airMaxMin(inst, .fmax, .gt),
1821        .min => cg.airMaxMin(inst, .fmin, .lt),
1822        .mul_add => cg.airMulAdd(inst),
1823
1824        .sqrt => cg.airUnaryFloatOp(inst, .sqrt),
1825        .sin => cg.airUnaryFloatOp(inst, .sin),
1826        .cos => cg.airUnaryFloatOp(inst, .cos),
1827        .tan => cg.airUnaryFloatOp(inst, .tan),
1828        .exp => cg.airUnaryFloatOp(inst, .exp),
1829        .exp2 => cg.airUnaryFloatOp(inst, .exp2),
1830        .log => cg.airUnaryFloatOp(inst, .log),
1831        .log2 => cg.airUnaryFloatOp(inst, .log2),
1832        .log10 => cg.airUnaryFloatOp(inst, .log10),
1833        .floor => cg.airUnaryFloatOp(inst, .floor),
1834        .ceil => cg.airUnaryFloatOp(inst, .ceil),
1835        .round => cg.airUnaryFloatOp(inst, .round),
1836        .trunc_float => cg.airUnaryFloatOp(inst, .trunc),
1837        .neg => cg.airUnaryFloatOp(inst, .neg),
1838
1839        .abs => cg.airAbs(inst),
1840
1841        .add_with_overflow => cg.airAddSubWithOverflow(inst, .add),
1842        .sub_with_overflow => cg.airAddSubWithOverflow(inst, .sub),
1843        .shl_with_overflow => cg.airShlWithOverflow(inst),
1844        .mul_with_overflow => cg.airMulWithOverflow(inst),
1845
1846        .clz => cg.airClz(inst),
1847        .ctz => cg.airCtz(inst),
1848
1849        .cmp_eq => cg.airCmp(inst, .eq),
1850        .cmp_gte => cg.airCmp(inst, .gte),
1851        .cmp_gt => cg.airCmp(inst, .gt),
1852        .cmp_lte => cg.airCmp(inst, .lte),
1853        .cmp_lt => cg.airCmp(inst, .lt),
1854        .cmp_neq => cg.airCmp(inst, .neq),
1855
1856        .cmp_vector => cg.airCmpVector(inst),
1857        .cmp_lt_errors_len => cg.airCmpLtErrorsLen(inst),
1858
1859        .array_elem_val => cg.airArrayElemVal(inst),
1860        .array_to_slice => cg.airArrayToSlice(inst),
1861        .alloc => cg.airAlloc(inst),
1862        .arg => cg.airArg(inst),
1863        .bitcast => cg.airBitcast(inst),
1864        .block => cg.airBlock(inst),
1865        .trap => cg.airTrap(inst),
1866        .breakpoint => cg.airBreakpoint(inst),
1867        .br => cg.airBr(inst),
1868        .repeat => cg.airRepeat(inst),
1869        .switch_dispatch => cg.airSwitchDispatch(inst),
1870        .cond_br => cg.airCondBr(inst),
1871        .intcast => cg.airIntcast(inst),
1872        .fptrunc => cg.airFptrunc(inst),
1873        .fpext => cg.airFpext(inst),
1874        .int_from_float => cg.airIntFromFloat(inst),
1875        .float_from_int => cg.airFloatFromInt(inst),
1876        .get_union_tag => cg.airGetUnionTag(inst),
1877
1878        .@"try" => cg.airTry(inst),
1879        .try_cold => cg.airTry(inst),
1880        .try_ptr => cg.airTryPtr(inst),
1881        .try_ptr_cold => cg.airTryPtr(inst),
1882
1883        .dbg_stmt => cg.airDbgStmt(inst),
1884        .dbg_empty_stmt => try cg.finishAir(inst, .none, &.{}),
1885        .dbg_inline_block => cg.airDbgInlineBlock(inst),
1886        .dbg_var_ptr => cg.airDbgVar(inst, .local_var, true),
1887        .dbg_var_val => cg.airDbgVar(inst, .local_var, false),
1888        .dbg_arg_inline => cg.airDbgVar(inst, .arg, false),
1889
1890        .call => cg.airCall(inst, .auto),
1891        .call_always_tail => cg.airCall(inst, .always_tail),
1892        .call_never_tail => cg.airCall(inst, .never_tail),
1893        .call_never_inline => cg.airCall(inst, .never_inline),
1894
1895        .is_err => cg.airIsErr(inst, .i32_ne, .value),
1896        .is_non_err => cg.airIsErr(inst, .i32_eq, .value),
1897        .is_err_ptr => cg.airIsErr(inst, .i32_ne, .ptr),
1898        .is_non_err_ptr => cg.airIsErr(inst, .i32_eq, .ptr),
1899
1900        .is_null => cg.airIsNull(inst, .i32_eq, .value),
1901        .is_non_null => cg.airIsNull(inst, .i32_ne, .value),
1902        .is_null_ptr => cg.airIsNull(inst, .i32_eq, .ptr),
1903        .is_non_null_ptr => cg.airIsNull(inst, .i32_ne, .ptr),
1904
1905        .load => cg.airLoad(inst),
1906        .loop => cg.airLoop(inst),
1907        .memset => cg.airMemset(inst, false),
1908        .memset_safe => cg.airMemset(inst, true),
1909        .not => cg.airNot(inst),
1910        .optional_payload => cg.airOptionalPayload(inst),
1911        .optional_payload_ptr => cg.airOptionalPayloadPtr(inst),
1912        .optional_payload_ptr_set => cg.airOptionalPayloadPtrSet(inst),
1913        .ptr_add => cg.airPtrBinOp(inst, .add),
1914        .ptr_sub => cg.airPtrBinOp(inst, .sub),
1915        .ptr_elem_ptr => cg.airPtrElemPtr(inst),
1916        .ptr_elem_val => cg.airPtrElemVal(inst),
1917        .ret => cg.airRet(inst),
1918        .ret_safe => cg.airRet(inst), // TODO
1919        .ret_ptr => cg.airRetPtr(inst),
1920        .ret_load => cg.airRetLoad(inst),
1921        .splat => cg.airSplat(inst),
1922        .select => cg.airSelect(inst),
1923        .shuffle_one => cg.airShuffleOne(inst),
1924        .shuffle_two => cg.airShuffleTwo(inst),
1925        .reduce => cg.airReduce(inst),
1926        .aggregate_init => cg.airAggregateInit(inst),
1927        .union_init => cg.airUnionInit(inst),
1928        .prefetch => cg.airPrefetch(inst),
1929        .popcount => cg.airPopcount(inst),
1930        .byte_swap => cg.airByteSwap(inst),
1931        .bit_reverse => cg.airBitReverse(inst),
1932
1933        .slice => cg.airSlice(inst),
1934        .slice_len => cg.airSliceLen(inst),
1935        .slice_elem_val => cg.airSliceElemVal(inst),
1936        .slice_elem_ptr => cg.airSliceElemPtr(inst),
1937        .slice_ptr => cg.airSlicePtr(inst),
1938        .ptr_slice_len_ptr => cg.airPtrSliceFieldPtr(inst, cg.ptrSize()),
1939        .ptr_slice_ptr_ptr => cg.airPtrSliceFieldPtr(inst, 0),
1940        .store => cg.airStore(inst, false),
1941        .store_safe => cg.airStore(inst, true),
1942
1943        .set_union_tag => cg.airSetUnionTag(inst),
1944        .struct_field_ptr => cg.airStructFieldPtr(inst),
1945        .struct_field_ptr_index_0 => cg.airStructFieldPtrIndex(inst, 0),
1946        .struct_field_ptr_index_1 => cg.airStructFieldPtrIndex(inst, 1),
1947        .struct_field_ptr_index_2 => cg.airStructFieldPtrIndex(inst, 2),
1948        .struct_field_ptr_index_3 => cg.airStructFieldPtrIndex(inst, 3),
1949        .struct_field_val => cg.airStructFieldVal(inst),
1950        .field_parent_ptr => cg.airFieldParentPtr(inst),
1951
1952        .switch_br => cg.airSwitchBr(inst, false),
1953        .loop_switch_br => cg.airSwitchBr(inst, true),
1954        .trunc => cg.airTrunc(inst),
1955        .unreach => cg.airUnreachable(inst),
1956
1957        .wrap_optional => cg.airWrapOptional(inst),
1958        .unwrap_errunion_payload => cg.airUnwrapErrUnionPayload(inst, false),
1959        .unwrap_errunion_payload_ptr => cg.airUnwrapErrUnionPayload(inst, true),
1960        .unwrap_errunion_err => cg.airUnwrapErrUnionError(inst, false),
1961        .unwrap_errunion_err_ptr => cg.airUnwrapErrUnionError(inst, true),
1962        .wrap_errunion_payload => cg.airWrapErrUnionPayload(inst),
1963        .wrap_errunion_err => cg.airWrapErrUnionErr(inst),
1964        .errunion_payload_ptr_set => cg.airErrUnionPayloadPtrSet(inst),
1965        .error_name => cg.airErrorName(inst),
1966
1967        .wasm_memory_size => cg.airWasmMemorySize(inst),
1968        .wasm_memory_grow => cg.airWasmMemoryGrow(inst),
1969
1970        .memcpy, .memmove => cg.airMemcpy(inst),
1971
1972        .ret_addr => cg.airRetAddr(inst),
1973        .tag_name => cg.airTagName(inst),
1974
1975        .error_set_has_value => cg.airErrorSetHasValue(inst),
1976        .frame_addr => cg.airFrameAddress(inst),
1977
1978        .runtime_nav_ptr => cg.airRuntimeNavPtr(inst),
1979
1980        .assembly,
1981
1982        .err_return_trace,
1983        .set_err_return_trace,
1984        .save_err_return_trace_index,
1985        .is_named_enum_value,
1986        .addrspace_cast,
1987        .c_va_arg,
1988        .c_va_copy,
1989        .c_va_end,
1990        .c_va_start,
1991        => |tag| return cg.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}),
1992
1993        .atomic_load => cg.airAtomicLoad(inst),
1994        .atomic_store_unordered,
1995        .atomic_store_monotonic,
1996        .atomic_store_release,
1997        .atomic_store_seq_cst,
1998        // in WebAssembly, all atomic instructions are sequentially ordered.
1999        => cg.airAtomicStore(inst),
2000        .atomic_rmw => cg.airAtomicRmw(inst),
2001        .cmpxchg_weak => cg.airCmpxchg(inst),
2002        .cmpxchg_strong => cg.airCmpxchg(inst),
2003
2004        .add_optimized,
2005        .sub_optimized,
2006        .mul_optimized,
2007        .div_float_optimized,
2008        .div_trunc_optimized,
2009        .div_floor_optimized,
2010        .div_exact_optimized,
2011        .rem_optimized,
2012        .mod_optimized,
2013        .neg_optimized,
2014        .cmp_lt_optimized,
2015        .cmp_lte_optimized,
2016        .cmp_eq_optimized,
2017        .cmp_gte_optimized,
2018        .cmp_gt_optimized,
2019        .cmp_neq_optimized,
2020        .cmp_vector_optimized,
2021        .reduce_optimized,
2022        .int_from_float_optimized,
2023        => return cg.fail("TODO implement optimized float mode", .{}),
2024
2025        .add_safe,
2026        .sub_safe,
2027        .mul_safe,
2028        .intcast_safe,
2029        .int_from_float_safe,
2030        .int_from_float_optimized_safe,
2031        => return cg.fail("TODO implement safety_checked_instructions", .{}),
2032
2033        .work_item_id,
2034        .work_group_size,
2035        .work_group_id,
2036        => unreachable,
2037    };
2038}
2039
2040fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
2041    const zcu = cg.pt.zcu;
2042    const ip = &zcu.intern_pool;
2043
2044    for (body) |inst| {
2045        if (cg.liveness.isUnused(inst) and !cg.air.mustLower(inst, ip)) {
2046            continue;
2047        }
2048        const old_bookkeeping_value = cg.air_bookkeeping;
2049        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, Air.Liveness.bpi);
2050        try cg.genInst(inst);
2051
2052        if (std.debug.runtime_safety and cg.air_bookkeeping < old_bookkeeping_value + 1) {
2053            std.debug.panic("Missing call to `finishAir` in AIR instruction %{d} ('{t}')", .{
2054                inst,
2055                cg.air.instructions.items(.tag)[@intFromEnum(inst)],
2056            });
2057        }
2058    }
2059}
2060
2061fn airRet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
2062    const zcu = cg.pt.zcu;
2063    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
2064    const operand = try cg.resolveInst(un_op);
2065    const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?;
2066    const ret_ty = Type.fromInterned(fn_info.return_type);
2067
2068    // result must be stored in the stack and we return a pointer
2069    // to the stack instead
2070    if (cg.return_value != .none) {
2071        try cg.store(cg.return_value, operand, ret_ty, 0);
2072    } else if (fn_info.cc == .wasm_mvp and ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
2073        switch (abi.classifyType(ret_ty, zcu)) {
2074            .direct => |scalar_type| {
2075                assert(!abi.lowerAsDoubleI64(scalar_type, zcu));
2076                if (!isByRef(ret_ty, zcu, cg.target)) {
2077                    try cg.emitWValue(operand);
2078                } else {
2079                    _ = try cg.load(operand, scalar_type, 0);
2080                }
2081            },
2082            .indirect => unreachable,
2083        }
2084    } else {
2085        if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu) and ret_ty.isError(zcu)) {
2086            try cg.addImm32(0);
2087        } else {
2088            try cg.emitWValue(operand);
2089        }
2090    }
2091    try cg.restoreStackPointer();
2092    try cg.addTag(.@"return");
2093
2094    return cg.finishAir(inst, .none, &.{un_op});
2095}
2096
2097fn airRetPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
2098    const zcu = cg.pt.zcu;
2099    const child_type = cg.typeOfIndex(inst).childType(zcu);
2100
2101    const result = result: {
2102        if (!child_type.isFnOrHasRuntimeBitsIgnoreComptime(zcu)) {
2103            break :result try cg.allocStack(Type.usize); // create pointer to void
2104        }
2105
2106        const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?;
2107        if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target)) {
2108            break :result cg.return_value;
2109        }
2110
2111        break :result try cg.allocStackPtr(inst);
2112    };
2113
2114    return cg.finishAir(inst, result, &.{});
2115}
2116
2117fn airRetLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
2118    const zcu = cg.pt.zcu;
2119    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
2120    const operand = try cg.resolveInst(un_op);
2121    const ret_ty = cg.typeOf(un_op).childType(zcu);
2122
2123    const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?;
2124    if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
2125        if (ret_ty.isError(zcu)) {
2126            try cg.addImm32(0);
2127        }
2128    } else if (!firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target)) {
2129        // leave on the stack
2130        _ = try cg.load(operand, ret_ty, 0);
2131    }
2132
2133    try cg.restoreStackPointer();
2134    try cg.addTag(.@"return");
2135    return cg.finishAir(inst, .none, &.{un_op});
2136}
2137
2138fn airCall(cg: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) InnerError!void {
2139    if (modifier == .always_tail) return cg.fail("TODO implement tail calls for wasm", .{});
2140    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
2141    const extra = cg.air.extraData(Air.Call, pl_op.payload);
2142    const args: []const Air.Inst.Ref = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.args_len]);
2143    const ty = cg.typeOf(pl_op.operand);
2144
2145    const pt = cg.pt;
2146    const zcu = pt.zcu;
2147    const ip = &zcu.intern_pool;
2148    const fn_ty = switch (ty.zigTypeTag(zcu)) {
2149        .@"fn" => ty,
2150        .pointer => ty.childType(zcu),
2151        else => unreachable,
2152    };
2153    const ret_ty = fn_ty.fnReturnType(zcu);
2154    const fn_info = zcu.typeToFunc(fn_ty).?;
2155    const first_param_sret = firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target);
2156
2157    const callee: ?InternPool.Nav.Index = blk: {
2158        const func_val = (try cg.air.value(pl_op.operand, pt)) orelse break :blk null;
2159
2160        switch (ip.indexToKey(func_val.toIntern())) {
2161            inline .func, .@"extern" => |x| break :blk x.owner_nav,
2162            .ptr => |ptr| if (ptr.byte_offset == 0) switch (ptr.base_addr) {
2163                .nav => |nav| break :blk nav,
2164                else => {},
2165            },
2166            else => {},
2167        }
2168        return cg.fail("unable to lower callee to a function index", .{});
2169    };
2170
2171    const sret: WValue = if (first_param_sret) blk: {
2172        const sret_local = try cg.allocStack(ret_ty);
2173        try cg.lowerToStack(sret_local);
2174        break :blk sret_local;
2175    } else .none;
2176
2177    for (args) |arg| {
2178        const arg_val = try cg.resolveInst(arg);
2179
2180        const arg_ty = cg.typeOf(arg);
2181        if (!arg_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
2182
2183        try cg.lowerArg(zcu.typeToFunc(fn_ty).?.cc, arg_ty, arg_val);
2184    }
2185
2186    if (callee) |nav_index| {
2187        try cg.addInst(.{ .tag = .call_nav, .data = .{ .nav_index = nav_index } });
2188    } else {
2189        // in this case we call a function pointer
2190        // so load its value onto the stack
2191        assert(ty.zigTypeTag(zcu) == .pointer);
2192        const operand = try cg.resolveInst(pl_op.operand);
2193        try cg.emitWValue(operand);
2194
2195        try cg.mir_func_tys.put(cg.gpa, fn_ty.toIntern(), {});
2196        try cg.addInst(.{
2197            .tag = .call_indirect,
2198            .data = .{ .ip_index = fn_ty.toIntern() },
2199        });
2200    }
2201
2202    const result_value = result_value: {
2203        if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu) and !ret_ty.isError(zcu)) {
2204            break :result_value .none;
2205        } else if (ret_ty.isNoReturn(zcu)) {
2206            try cg.addTag(.@"unreachable");
2207            break :result_value .none;
2208        } else if (first_param_sret) {
2209            break :result_value sret;
2210        } else if (zcu.typeToFunc(fn_ty).?.cc == .wasm_mvp) {
2211            switch (abi.classifyType(ret_ty, zcu)) {
2212                .direct => |scalar_type| {
2213                    assert(!abi.lowerAsDoubleI64(scalar_type, zcu));
2214                    if (!isByRef(ret_ty, zcu, cg.target)) {
2215                        const result_local = try cg.allocLocal(ret_ty);
2216                        try cg.addLocal(.local_set, result_local.local.value);
2217                        break :result_value result_local;
2218                    } else {
2219                        const result_local = try cg.allocLocal(ret_ty);
2220                        try cg.addLocal(.local_set, result_local.local.value);
2221                        const result = try cg.allocStack(ret_ty);
2222                        try cg.store(result, result_local, scalar_type, 0);
2223                        break :result_value result;
2224                    }
2225                },
2226                .indirect => unreachable,
2227            }
2228        } else {
2229            const result_local = try cg.allocLocal(ret_ty);
2230            try cg.addLocal(.local_set, result_local.local.value);
2231            break :result_value result_local;
2232        }
2233    };
2234
2235    var bt = try cg.iterateBigTomb(inst, 1 + args.len);
2236    bt.feed(pl_op.operand);
2237    for (args) |arg| bt.feed(arg);
2238    return bt.finishAir(result_value);
2239}
2240
2241fn airAlloc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
2242    const value = try cg.allocStackPtr(inst);
2243    return cg.finishAir(inst, value, &.{});
2244}
2245
2246fn airStore(cg: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void {
2247    const pt = cg.pt;
2248    const zcu = pt.zcu;
2249    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
2250
2251    const lhs = try cg.resolveInst(bin_op.lhs);
2252    const rhs = try cg.resolveInst(bin_op.rhs);
2253    const ptr_ty = cg.typeOf(bin_op.lhs);
2254    const ptr_info = ptr_ty.ptrInfo(zcu);
2255    const ty = ptr_ty.childType(zcu);
2256
2257    if (!safety and bin_op.rhs == .undef) {
2258        return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
2259    }
2260
2261    if (ptr_info.packed_offset.host_size == 0) {
2262        try cg.store(lhs, rhs, ty, 0);
2263    } else {
2264        // at this point we have a non-natural alignment, we must
2265        // load the value, and then shift+or the rhs into the result location.
2266        const host_size = ptr_info.packed_offset.host_size * 8;
2267        const host_ty = try pt.intType(.unsigned, host_size);
2268        const bit_size: u16 = @intCast(ty.bitSize(zcu));
2269        const bit_offset = ptr_info.packed_offset.bit_offset;
2270
2271        const mask_val = try cg.resolveValue(val: {
2272            const limbs = try cg.gpa.alloc(
2273                std.math.big.Limb,
2274                std.math.big.int.calcTwosCompLimbCount(host_size) + 1,
2275            );
2276            defer cg.gpa.free(limbs);
2277
2278            var mask_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
2279            mask_bigint.setTwosCompIntLimit(.max, .unsigned, host_size);
2280
2281            if (bit_size != host_size) {
2282                mask_bigint.shiftRight(mask_bigint.toConst(), host_size - bit_size);
2283            }
2284            if (bit_offset != 0) {
2285                mask_bigint.shiftLeft(mask_bigint.toConst(), bit_offset);
2286            }
2287            mask_bigint.bitNotWrap(mask_bigint.toConst(), .unsigned, host_size);
2288
2289            break :val try pt.intValue_big(host_ty, mask_bigint.toConst());
2290        });
2291
2292        const shift_val: WValue = if (33 <= host_size and host_size <= 64)
2293            .{ .imm64 = bit_offset }
2294        else
2295            .{ .imm32 = bit_offset };
2296
2297        if (host_size <= 64) {
2298            try cg.emitWValue(lhs);
2299        }
2300        const loaded = if (host_size <= 64)
2301            try cg.load(lhs, host_ty, 0)
2302        else
2303            lhs;
2304        const anded = try cg.binOp(loaded, mask_val, host_ty, .@"and");
2305        const extended_value = try cg.intcast(rhs, ty, host_ty);
2306        const shifted_value = if (bit_offset > 0)
2307            try cg.binOp(extended_value, shift_val, host_ty, .shl)
2308        else
2309            extended_value;
2310        const result = try cg.binOp(anded, shifted_value, host_ty, .@"or");
2311        if (host_size <= 64) {
2312            try cg.store(.stack, result, host_ty, lhs.offset());
2313        } else {
2314            try cg.store(lhs, result, host_ty, lhs.offset());
2315        }
2316    }
2317
2318    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
2319}
2320
2321fn store(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerError!void {
2322    assert(!(lhs != .stack and rhs == .stack));
2323    const pt = cg.pt;
2324    const zcu = pt.zcu;
2325    const abi_size = ty.abiSize(zcu);
2326
2327    if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
2328
2329    switch (ty.zigTypeTag(zcu)) {
2330        .error_union => {
2331            const pl_ty = ty.errorUnionPayload(zcu);
2332            if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
2333                return cg.store(lhs, rhs, Type.anyerror, offset);
2334            }
2335
2336            const len = @as(u32, @intCast(abi_size));
2337            assert(offset == 0);
2338            return cg.memcpy(lhs, rhs, .{ .imm32 = len });
2339        },
2340        .optional => {
2341            if (ty.isPtrLikeOptional(zcu)) {
2342                return cg.store(lhs, rhs, Type.usize, offset);
2343            }
2344            const pl_ty = ty.optionalChild(zcu);
2345            if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
2346                return cg.store(lhs, rhs, Type.u8, offset);
2347            }
2348            if (pl_ty.zigTypeTag(zcu) == .error_set) {
2349                return cg.store(lhs, rhs, Type.anyerror, offset);
2350            }
2351
2352            const len = @as(u32, @intCast(abi_size));
2353            assert(offset == 0);
2354            return cg.memcpy(lhs, rhs, .{ .imm32 = len });
2355        },
2356        .@"struct", .array, .@"union" => if (isByRef(ty, zcu, cg.target)) {
2357            const len = @as(u32, @intCast(abi_size));
2358            assert(offset == 0);
2359            return cg.memcpy(lhs, rhs, .{ .imm32 = len });
2360        },
2361        .vector => switch (determineSimdStoreStrategy(ty, zcu, cg.target)) {
2362            .unrolled => {
2363                const len: u32 = @intCast(abi_size);
2364                return cg.memcpy(lhs, rhs, .{ .imm32 = len });
2365            },
2366            .direct => {
2367                try cg.emitWValue(lhs);
2368                try cg.lowerToStack(rhs);
2369                // TODO: Add helper functions for simd opcodes
2370                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
2371                // stores as := opcode, offset, alignment (opcode::memarg)
2372                try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
2373                    @intFromEnum(std.wasm.SimdOpcode.v128_store),
2374                    offset + lhs.offset(),
2375                    @intCast(ty.abiAlignment(zcu).toByteUnits() orelse 0),
2376                });
2377                return cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
2378            },
2379        },
2380        .pointer => {
2381            if (ty.isSlice(zcu)) {
2382                assert(offset == 0);
2383                // store pointer first
2384                // lower it to the stack so we do not have to store rhs into a local first
2385                try cg.emitWValue(lhs);
2386                const ptr_local = try cg.load(rhs, Type.usize, 0);
2387                try cg.store(.stack, ptr_local, Type.usize, 0 + lhs.offset());
2388
2389                // retrieve length from rhs, and store that alongside lhs as well
2390                try cg.emitWValue(lhs);
2391                const len_local = try cg.load(rhs, Type.usize, cg.ptrSize());
2392                try cg.store(.stack, len_local, Type.usize, cg.ptrSize() + lhs.offset());
2393                return;
2394            }
2395        },
2396        .int, .@"enum", .float => if (abi_size > 8 and abi_size <= 16) {
2397            assert(offset == 0);
2398            try cg.emitWValue(lhs);
2399            const lsb = try cg.load(rhs, Type.u64, 0);
2400            try cg.store(.stack, lsb, Type.u64, 0 + lhs.offset());
2401
2402            try cg.emitWValue(lhs);
2403            const msb = try cg.load(rhs, Type.u64, 8);
2404            try cg.store(.stack, msb, Type.u64, 8 + lhs.offset());
2405            return;
2406        } else if (abi_size > 16) {
2407            assert(offset == 0);
2408            try cg.memcpy(lhs, rhs, .{ .imm32 = @as(u32, @intCast(ty.abiSize(zcu))) });
2409        },
2410        else => if (abi_size > 8) {
2411            return cg.fail("TODO: `store` for type `{f}` with abisize `{d}`", .{ ty.fmt(pt), abi_size });
2412        },
2413    }
2414    try cg.emitWValue(lhs);
2415    // In this case we're actually interested in storing the stack position
2416    // into lhs, so we calculate that and emit that instead
2417    try cg.lowerToStack(rhs);
2418
2419    const valtype = typeToValtype(ty, zcu, cg.target);
2420    const opcode = buildOpcode(.{
2421        .valtype1 = valtype,
2422        .width = @as(u8, @intCast(abi_size * 8)),
2423        .op = .store,
2424    });
2425
2426    // store rhs value at stack pointer's location in memory
2427    try cg.addMemArg(
2428        Mir.Inst.Tag.fromOpcode(opcode),
2429        .{
2430            .offset = offset + lhs.offset(),
2431            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
2432        },
2433    );
2434}
2435
2436fn airLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
2437    const pt = cg.pt;
2438    const zcu = pt.zcu;
2439    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
2440    const operand = try cg.resolveInst(ty_op.operand);
2441    const ty = ty_op.ty.toType();
2442    const ptr_ty = cg.typeOf(ty_op.operand);
2443    const ptr_info = ptr_ty.ptrInfo(zcu);
2444
2445    if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return cg.finishAir(inst, .none, &.{ty_op.operand});
2446
2447    const result = result: {
2448        if (isByRef(ty, zcu, cg.target)) {
2449            const new_local = try cg.allocStack(ty);
2450            try cg.store(new_local, operand, ty, 0);
2451            break :result new_local;
2452        }
2453
2454        if (ptr_info.packed_offset.host_size == 0) {
2455            const loaded = try cg.load(operand, ty, 0);
2456            const ty_size = ty.abiSize(zcu);
2457            if (ty.isAbiInt(zcu) and ty_size * 8 > ty.bitSize(zcu)) {
2458                const int_elem_ty = try pt.intType(.unsigned, @intCast(ty_size * 8));
2459                break :result try cg.trunc(loaded, ty, int_elem_ty);
2460            } else {
2461                break :result loaded;
2462            }
2463        } else {
2464            const int_elem_ty = try pt.intType(.unsigned, ptr_info.packed_offset.host_size * 8);
2465            const shift_val: WValue = if (ptr_info.packed_offset.host_size <= 4)
2466                .{ .imm32 = ptr_info.packed_offset.bit_offset }
2467            else if (ptr_info.packed_offset.host_size <= 8)
2468                .{ .imm64 = ptr_info.packed_offset.bit_offset }
2469            else
2470                .{ .imm32 = ptr_info.packed_offset.bit_offset };
2471
2472            const stack_loaded = if (ptr_info.packed_offset.host_size <= 8)
2473                try cg.load(operand, int_elem_ty, 0)
2474            else
2475                operand;
2476            const shifted = try cg.binOp(stack_loaded, shift_val, int_elem_ty, .shr);
2477            break :result try cg.trunc(shifted, ty, int_elem_ty);
2478        }
2479    };
2480    return cg.finishAir(inst, result, &.{ty_op.operand});
2481}
2482
2483/// Loads an operand from the linear memory section.
2484/// NOTE: Leaves the value on the stack.
2485fn load(cg: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValue {
2486    const zcu = cg.pt.zcu;
2487    // load local's value from memory by its stack position
2488    try cg.emitWValue(operand);
2489
2490    if (ty.zigTypeTag(zcu) == .vector) {
2491        // TODO: Add helper functions for simd opcodes
2492        const extra_index: u32 = @intCast(cg.mir_extra.items.len);
2493        // stores as := opcode, offset, alignment (opcode::memarg)
2494        try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
2495            @intFromEnum(std.wasm.SimdOpcode.v128_load),
2496            offset + operand.offset(),
2497            @intCast(ty.abiAlignment(zcu).toByteUnits().?),
2498        });
2499        try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
2500        return .stack;
2501    }
2502
2503    const abi_size: u8 = @intCast(ty.abiSize(zcu));
2504    const opcode = buildOpcode(.{
2505        .valtype1 = typeToValtype(ty, zcu, cg.target),
2506        .width = abi_size * 8,
2507        .op = .load,
2508        .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned,
2509    });
2510
2511    try cg.addMemArg(
2512        Mir.Inst.Tag.fromOpcode(opcode),
2513        .{
2514            .offset = offset + operand.offset(),
2515            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
2516        },
2517    );
2518
2519    return .stack;
2520}
2521
2522fn airArg(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
2523    const pt = cg.pt;
2524    const zcu = pt.zcu;
2525    const arg_index = cg.arg_index;
2526    const arg = cg.args[arg_index];
2527    const cc = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?.cc;
2528    const arg_ty = cg.typeOfIndex(inst);
2529    if (cc == .wasm_mvp) {
2530        switch (abi.classifyType(arg_ty, zcu)) {
2531            .direct => |scalar_ty| if (!abi.lowerAsDoubleI64(scalar_ty, zcu)) {
2532                cg.arg_index += 1;
2533            } else {
2534                cg.arg_index += 2;
2535                const result = try cg.allocStack(arg_ty);
2536                try cg.store(result, arg, Type.u64, 0);
2537                try cg.store(result, cg.args[arg_index + 1], Type.u64, 8);
2538                return cg.finishAir(inst, result, &.{});
2539            },
2540            .indirect => cg.arg_index += 1,
2541        }
2542    } else {
2543        cg.arg_index += 1;
2544    }
2545
2546    return cg.finishAir(inst, arg, &.{});
2547}
2548
2549fn airBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
2550    const zcu = cg.pt.zcu;
2551    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
2552    const lhs = try cg.resolveInst(bin_op.lhs);
2553    const rhs = try cg.resolveInst(bin_op.rhs);
2554    const lhs_ty = cg.typeOf(bin_op.lhs);
2555    const rhs_ty = cg.typeOf(bin_op.rhs);
2556
2557    // For certain operations, such as shifting, the types are different.
2558    // When converting this to a WebAssembly type, they *must* match to perform
2559    // an operation. For this reason we verify if the WebAssembly type is different, in which
2560    // case we first coerce the operands to the same type before performing the operation.
2561    // For big integers we can ignore this as we will call into compiler-rt which handles this.
2562    const result = switch (op) {
2563        .shr, .shl => result: {
2564            if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) {
2565                return cg.fail("TODO: implement vector '{s}' with scalar rhs", .{@tagName(op)});
2566            }
2567
2568            const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse {
2569                return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)});
2570            };
2571            const rhs_wasm_bits = toWasmBits(@intCast(rhs_ty.bitSize(zcu))).?;
2572            const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128)
2573                try (try cg.intcast(rhs, rhs_ty, lhs_ty)).toLocal(cg, lhs_ty)
2574            else
2575                rhs;
2576            break :result try cg.binOp(lhs, new_rhs, lhs_ty, op);
2577        },
2578        else => try cg.binOp(lhs, rhs, lhs_ty, op),
2579    };
2580
2581    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
2582}
2583
2584/// Performs a binary operation on the given `WValue`'s
2585/// NOTE: THis leaves the value on top of the stack.
2586fn binOp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
2587    const pt = cg.pt;
2588    const zcu = pt.zcu;
2589    assert(!(lhs != .stack and rhs == .stack));
2590
2591    if (ty.isAnyFloat()) {
2592        const float_op = FloatOp.fromOp(op);
2593        return cg.floatOp(float_op, ty, &.{ lhs, rhs });
2594    }
2595
2596    if (isByRef(ty, zcu, cg.target)) {
2597        if (ty.zigTypeTag(zcu) == .int) {
2598            return cg.binOpBigInt(lhs, rhs, ty, op);
2599        } else {
2600            return cg.fail("TODO: Implement binary operation for type: {f}", .{ty.fmt(pt)});
2601        }
2602    }
2603
2604    const opcode: std.wasm.Opcode = buildOpcode(.{
2605        .op = op,
2606        .valtype1 = typeToValtype(ty, zcu, cg.target),
2607        .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned,
2608    });
2609    try cg.emitWValue(lhs);
2610    try cg.emitWValue(rhs);
2611
2612    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
2613
2614    return .stack;
2615}
2616
2617fn binOpBigInt(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
2618    const zcu = cg.pt.zcu;
2619    const int_info = ty.intInfo(zcu);
2620    if (int_info.bits > 128) {
2621        return cg.fail("TODO: Implement binary operation for big integers larger than 128 bits", .{});
2622    }
2623
2624    switch (op) {
2625        .mul => return cg.callIntrinsic(.__multi3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
2626        .div => switch (int_info.signedness) {
2627            .signed => return cg.callIntrinsic(.__divti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
2628            .unsigned => return cg.callIntrinsic(.__udivti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
2629        },
2630        .rem => switch (int_info.signedness) {
2631            .signed => return cg.callIntrinsic(.__modti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
2632            .unsigned => return cg.callIntrinsic(.__umodti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
2633        },
2634        .shr => switch (int_info.signedness) {
2635            .signed => return cg.callIntrinsic(.__ashrti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }),
2636            .unsigned => return cg.callIntrinsic(.__lshrti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }),
2637        },
2638        .shl => return cg.callIntrinsic(.__ashlti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }),
2639        .@"and", .@"or", .xor => {
2640            const result = try cg.allocStack(ty);
2641            try cg.emitWValue(result);
2642            const lhs_lsb = try cg.load(lhs, Type.u64, 0);
2643            const rhs_lsb = try cg.load(rhs, Type.u64, 0);
2644            const op_lsb = try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, op);
2645            try cg.store(.stack, op_lsb, Type.u64, result.offset());
2646
2647            try cg.emitWValue(result);
2648            const lhs_msb = try cg.load(lhs, Type.u64, 8);
2649            const rhs_msb = try cg.load(rhs, Type.u64, 8);
2650            const op_msb = try cg.binOp(lhs_msb, rhs_msb, Type.u64, op);
2651            try cg.store(.stack, op_msb, Type.u64, result.offset() + 8);
2652            return result;
2653        },
2654        .add, .sub => {
2655            const result = try cg.allocStack(ty);
2656            var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64);
2657            defer lhs_lsb.free(cg);
2658            var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64);
2659            defer rhs_lsb.free(cg);
2660            var op_lsb = try (try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, op)).toLocal(cg, Type.u64);
2661            defer op_lsb.free(cg);
2662
2663            const lhs_msb = try cg.load(lhs, Type.u64, 8);
2664            const rhs_msb = try cg.load(rhs, Type.u64, 8);
2665            const op_msb = try cg.binOp(lhs_msb, rhs_msb, Type.u64, op);
2666
2667            const lt = if (op == .add) blk: {
2668                break :blk try cg.cmp(op_lsb, rhs_lsb, Type.u64, .lt);
2669            } else if (op == .sub) blk: {
2670                break :blk try cg.cmp(lhs_lsb, rhs_lsb, Type.u64, .lt);
2671            } else unreachable;
2672            const tmp = try cg.intcast(lt, Type.u32, Type.u64);
2673            var tmp_op = try (try cg.binOp(op_msb, tmp, Type.u64, op)).toLocal(cg, Type.u64);
2674            defer tmp_op.free(cg);
2675
2676            try cg.store(result, op_lsb, Type.u64, 0);
2677            try cg.store(result, tmp_op, Type.u64, 8);
2678            return result;
2679        },
2680        else => return cg.fail("TODO: Implement binary operation for big integers: '{s}'", .{@tagName(op)}),
2681    }
2682}
2683
2684const FloatOp = enum {
2685    add,
2686    ceil,
2687    cos,
2688    div,
2689    exp,
2690    exp2,
2691    fabs,
2692    floor,
2693    fma,
2694    fmax,
2695    fmin,
2696    fmod,
2697    log,
2698    log10,
2699    log2,
2700    mul,
2701    neg,
2702    round,
2703    sin,
2704    sqrt,
2705    sub,
2706    tan,
2707    trunc,
2708
2709    pub fn fromOp(op: Op) FloatOp {
2710        return switch (op) {
2711            .add => .add,
2712            .ceil => .ceil,
2713            .div => .div,
2714            .abs => .fabs,
2715            .floor => .floor,
2716            .max => .fmax,
2717            .min => .fmin,
2718            .mul => .mul,
2719            .neg => .neg,
2720            .nearest => .round,
2721            .sqrt => .sqrt,
2722            .sub => .sub,
2723            .trunc => .trunc,
2724            .rem => .fmod,
2725            else => unreachable,
2726        };
2727    }
2728
2729    pub fn toOp(float_op: FloatOp) ?Op {
2730        return switch (float_op) {
2731            .add => .add,
2732            .ceil => .ceil,
2733            .div => .div,
2734            .fabs => .abs,
2735            .floor => .floor,
2736            .fmax => .max,
2737            .fmin => .min,
2738            .mul => .mul,
2739            .neg => .neg,
2740            .round => .nearest,
2741            .sqrt => .sqrt,
2742            .sub => .sub,
2743            .trunc => .trunc,
2744
2745            .cos,
2746            .exp,
2747            .exp2,
2748            .fma,
2749            .fmod,
2750            .log,
2751            .log10,
2752            .log2,
2753            .sin,
2754            .tan,
2755            => null,
2756        };
2757    }
2758
2759    fn intrinsic(op: FloatOp, bits: u16) Mir.Intrinsic {
2760        return switch (op) {
2761            inline .add, .sub, .div, .mul => |ct_op| switch (bits) {
2762                inline 16, 80, 128 => |ct_bits| @field(
2763                    Mir.Intrinsic,
2764                    "__" ++ @tagName(ct_op) ++ compilerRtFloatAbbrev(ct_bits) ++ "f3",
2765                ),
2766                else => unreachable,
2767            },
2768
2769            inline .ceil,
2770            .fabs,
2771            .floor,
2772            .fmax,
2773            .fmin,
2774            .round,
2775            .sqrt,
2776            .trunc,
2777            => |ct_op| switch (bits) {
2778                inline 16, 80, 128 => |ct_bits| @field(
2779                    Mir.Intrinsic,
2780                    libcFloatPrefix(ct_bits) ++ @tagName(ct_op) ++ libcFloatSuffix(ct_bits),
2781                ),
2782                else => unreachable,
2783            },
2784
2785            inline .cos,
2786            .exp,
2787            .exp2,
2788            .fma,
2789            .fmod,
2790            .log,
2791            .log10,
2792            .log2,
2793            .sin,
2794            .tan,
2795            => |ct_op| switch (bits) {
2796                inline 16, 32, 64, 80, 128 => |ct_bits| @field(
2797                    Mir.Intrinsic,
2798                    libcFloatPrefix(ct_bits) ++ @tagName(ct_op) ++ libcFloatSuffix(ct_bits),
2799                ),
2800                else => unreachable,
2801            },
2802
2803            .neg => unreachable,
2804        };
2805    }
2806};
2807
2808fn airAbs(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
2809    const pt = cg.pt;
2810    const zcu = pt.zcu;
2811    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
2812    const operand = try cg.resolveInst(ty_op.operand);
2813    const ty = cg.typeOf(ty_op.operand);
2814    const scalar_ty = ty.scalarType(zcu);
2815
2816    switch (scalar_ty.zigTypeTag(zcu)) {
2817        .int => if (ty.zigTypeTag(zcu) == .vector) {
2818            return cg.fail("TODO implement airAbs for {f}", .{ty.fmt(pt)});
2819        } else {
2820            const int_bits = ty.intInfo(zcu).bits;
2821            const wasm_bits = toWasmBits(int_bits) orelse {
2822                return cg.fail("TODO: airAbs for signed integers larger than '{d}' bits", .{int_bits});
2823            };
2824
2825            switch (wasm_bits) {
2826                32 => {
2827                    try cg.emitWValue(operand);
2828
2829                    try cg.addImm32(31);
2830                    try cg.addTag(.i32_shr_s);
2831
2832                    var tmp = try cg.allocLocal(ty);
2833                    defer tmp.free(cg);
2834                    try cg.addLocal(.local_tee, tmp.local.value);
2835
2836                    try cg.emitWValue(operand);
2837                    try cg.addTag(.i32_xor);
2838                    try cg.emitWValue(tmp);
2839                    try cg.addTag(.i32_sub);
2840                    return cg.finishAir(inst, .stack, &.{ty_op.operand});
2841                },
2842                64 => {
2843                    try cg.emitWValue(operand);
2844
2845                    try cg.addImm64(63);
2846                    try cg.addTag(.i64_shr_s);
2847
2848                    var tmp = try cg.allocLocal(ty);
2849                    defer tmp.free(cg);
2850                    try cg.addLocal(.local_tee, tmp.local.value);
2851
2852                    try cg.emitWValue(operand);
2853                    try cg.addTag(.i64_xor);
2854                    try cg.emitWValue(tmp);
2855                    try cg.addTag(.i64_sub);
2856                    return cg.finishAir(inst, .stack, &.{ty_op.operand});
2857                },
2858                128 => {
2859                    const mask = try cg.allocStack(Type.u128);
2860                    try cg.emitWValue(mask);
2861                    try cg.emitWValue(mask);
2862
2863                    _ = try cg.load(operand, Type.u64, 8);
2864                    try cg.addImm64(63);
2865                    try cg.addTag(.i64_shr_s);
2866
2867                    var tmp = try cg.allocLocal(Type.u64);
2868                    defer tmp.free(cg);
2869                    try cg.addLocal(.local_tee, tmp.local.value);
2870                    try cg.store(.stack, .stack, Type.u64, mask.offset() + 0);
2871                    try cg.emitWValue(tmp);
2872                    try cg.store(.stack, .stack, Type.u64, mask.offset() + 8);
2873
2874                    const a = try cg.binOpBigInt(operand, mask, Type.u128, .xor);
2875                    const b = try cg.binOpBigInt(a, mask, Type.u128, .sub);
2876
2877                    return cg.finishAir(inst, b, &.{ty_op.operand});
2878                },
2879                else => unreachable,
2880            }
2881        },
2882        .float => {
2883            const result = try cg.floatOp(.fabs, ty, &.{operand});
2884            return cg.finishAir(inst, result, &.{ty_op.operand});
2885        },
2886        else => unreachable,
2887    }
2888}
2889
2890fn airUnaryFloatOp(cg: *CodeGen, inst: Air.Inst.Index, op: FloatOp) InnerError!void {
2891    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
2892    const operand = try cg.resolveInst(un_op);
2893    const ty = cg.typeOf(un_op);
2894
2895    const result = try cg.floatOp(op, ty, &.{operand});
2896    return cg.finishAir(inst, result, &.{un_op});
2897}
2898
2899fn floatOp(cg: *CodeGen, float_op: FloatOp, ty: Type, args: []const WValue) InnerError!WValue {
2900    const zcu = cg.pt.zcu;
2901    if (ty.zigTypeTag(zcu) == .vector) {
2902        return cg.fail("TODO: Implement floatOps for vectors", .{});
2903    }
2904
2905    const float_bits = ty.floatBits(cg.target);
2906
2907    if (float_op == .neg) {
2908        return cg.floatNeg(ty, args[0]);
2909    }
2910
2911    if (float_bits == 32 or float_bits == 64) {
2912        if (float_op.toOp()) |op| {
2913            for (args) |operand| {
2914                try cg.emitWValue(operand);
2915            }
2916            const opcode = buildOpcode(.{ .op = op, .valtype1 = typeToValtype(ty, zcu, cg.target) });
2917            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
2918            return .stack;
2919        }
2920    }
2921
2922    const intrinsic = float_op.intrinsic(float_bits);
2923
2924    // fma requires three operands
2925    var param_types_buffer: [3]InternPool.Index = .{ ty.ip_index, ty.ip_index, ty.ip_index };
2926    const param_types = param_types_buffer[0..args.len];
2927    return cg.callIntrinsic(intrinsic, param_types, ty, args);
2928}
2929
2930/// NOTE: The result value remains on top of the stack.
2931fn floatNeg(cg: *CodeGen, ty: Type, arg: WValue) InnerError!WValue {
2932    const float_bits = ty.floatBits(cg.target);
2933    switch (float_bits) {
2934        16 => {
2935            try cg.emitWValue(arg);
2936            try cg.addImm32(0x8000);
2937            try cg.addTag(.i32_xor);
2938            return .stack;
2939        },
2940        32, 64 => {
2941            try cg.emitWValue(arg);
2942            const val_type: std.wasm.Valtype = if (float_bits == 32) .f32 else .f64;
2943            const opcode = buildOpcode(.{ .op = .neg, .valtype1 = val_type });
2944            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
2945            return .stack;
2946        },
2947        80, 128 => {
2948            const result = try cg.allocStack(ty);
2949            try cg.emitWValue(result);
2950            try cg.emitWValue(arg);
2951            try cg.addMemArg(.i64_load, .{ .offset = 0 + arg.offset(), .alignment = 2 });
2952            try cg.addMemArg(.i64_store, .{ .offset = 0 + result.offset(), .alignment = 2 });
2953
2954            try cg.emitWValue(result);
2955            try cg.emitWValue(arg);
2956            try cg.addMemArg(.i64_load, .{ .offset = 8 + arg.offset(), .alignment = 2 });
2957
2958            if (float_bits == 80) {
2959                try cg.addImm64(0x8000);
2960                try cg.addTag(.i64_xor);
2961                try cg.addMemArg(.i64_store16, .{ .offset = 8 + result.offset(), .alignment = 2 });
2962            } else {
2963                try cg.addImm64(0x8000000000000000);
2964                try cg.addTag(.i64_xor);
2965                try cg.addMemArg(.i64_store, .{ .offset = 8 + result.offset(), .alignment = 2 });
2966            }
2967            return result;
2968        },
2969        else => unreachable,
2970    }
2971}
2972
2973fn airWrapBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
2974    const zcu = cg.pt.zcu;
2975    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
2976
2977    const lhs = try cg.resolveInst(bin_op.lhs);
2978    const rhs = try cg.resolveInst(bin_op.rhs);
2979    const lhs_ty = cg.typeOf(bin_op.lhs);
2980    const rhs_ty = cg.typeOf(bin_op.rhs);
2981
2982    if (lhs_ty.isVector(zcu)) {
2983        if ((op == .shr or op == .shl) and !rhs_ty.isVector(zcu)) {
2984            return cg.fail("TODO: implement wrapping vector '{s}' with scalar rhs", .{@tagName(op)});
2985        } else {
2986            return cg.fail("TODO: implement wrapping '{s}' for vectors", .{@tagName(op)});
2987        }
2988    }
2989
2990    // For certain operations, such as shifting, the types are different.
2991    // When converting this to a WebAssembly type, they *must* match to perform
2992    // an operation. For this reason we verify if the WebAssembly type is different, in which
2993    // case we first coerce the operands to the same type before performing the operation.
2994    // For big integers we can ignore this as we will call into compiler-rt which handles this.
2995    const result = switch (op) {
2996        .shr, .shl => result: {
2997            const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse {
2998                return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)});
2999            };
3000            const rhs_wasm_bits = toWasmBits(@intCast(rhs_ty.bitSize(zcu))).?;
3001            const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128)
3002                try (try cg.intcast(rhs, rhs_ty, lhs_ty)).toLocal(cg, lhs_ty)
3003            else
3004                rhs;
3005            break :result try cg.wrapBinOp(lhs, new_rhs, lhs_ty, op);
3006        },
3007        else => try cg.wrapBinOp(lhs, rhs, lhs_ty, op),
3008    };
3009
3010    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
3011}
3012
3013/// Performs a wrapping binary operation.
3014/// Asserts rhs is not a stack value when lhs also isn't.
3015/// NOTE: Leaves the result on the stack when its Type is <= 64 bits
3016fn wrapBinOp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
3017    const bin_local = try cg.binOp(lhs, rhs, ty, op);
3018    return cg.wrapOperand(bin_local, ty);
3019}
3020
3021/// Wraps an operand based on a given type's bitsize.
3022/// Asserts `Type` is <= 128 bits.
3023/// NOTE: When the Type is <= 64 bits, leaves the value on top of the stack, if wrapping was needed.
3024fn wrapOperand(cg: *CodeGen, operand: WValue, ty: Type) InnerError!WValue {
3025    const zcu = cg.pt.zcu;
3026    assert(ty.abiSize(zcu) <= 16);
3027    const int_bits: u16 = @intCast(ty.bitSize(zcu)); // TODO use ty.intInfo(zcu).bits
3028    const wasm_bits = toWasmBits(int_bits) orelse {
3029        return cg.fail("TODO: Implement wrapOperand for bitsize '{d}'", .{int_bits});
3030    };
3031
3032    if (wasm_bits == int_bits) return operand;
3033
3034    switch (wasm_bits) {
3035        32 => {
3036            try cg.emitWValue(operand);
3037            if (ty.isSignedInt(zcu)) {
3038                try cg.addImm32(32 - int_bits);
3039                try cg.addTag(.i32_shl);
3040                try cg.addImm32(32 - int_bits);
3041                try cg.addTag(.i32_shr_s);
3042            } else {
3043                try cg.addImm32(~@as(u32, 0) >> @intCast(32 - int_bits));
3044                try cg.addTag(.i32_and);
3045            }
3046            return .stack;
3047        },
3048        64 => {
3049            try cg.emitWValue(operand);
3050            if (ty.isSignedInt(zcu)) {
3051                try cg.addImm64(64 - int_bits);
3052                try cg.addTag(.i64_shl);
3053                try cg.addImm64(64 - int_bits);
3054                try cg.addTag(.i64_shr_s);
3055            } else {
3056                try cg.addImm64(~@as(u64, 0) >> @intCast(64 - int_bits));
3057                try cg.addTag(.i64_and);
3058            }
3059            return .stack;
3060        },
3061        128 => {
3062            assert(operand != .stack);
3063            const result = try cg.allocStack(ty);
3064
3065            try cg.emitWValue(result);
3066            _ = try cg.load(operand, Type.u64, 0);
3067            try cg.store(.stack, .stack, Type.u64, result.offset());
3068
3069            try cg.emitWValue(result);
3070            _ = try cg.load(operand, Type.u64, 8);
3071            if (ty.isSignedInt(zcu)) {
3072                try cg.addImm64(128 - int_bits);
3073                try cg.addTag(.i64_shl);
3074                try cg.addImm64(128 - int_bits);
3075                try cg.addTag(.i64_shr_s);
3076            } else {
3077                try cg.addImm64(~@as(u64, 0) >> @intCast(128 - int_bits));
3078                try cg.addTag(.i64_and);
3079            }
3080            try cg.store(.stack, .stack, Type.u64, result.offset() + 8);
3081
3082            return result;
3083        },
3084        else => unreachable,
3085    }
3086}
3087
3088fn lowerPtr(cg: *CodeGen, ptr_val: InternPool.Index, prev_offset: u64) InnerError!WValue {
3089    const pt = cg.pt;
3090    const zcu = pt.zcu;
3091    const ptr = zcu.intern_pool.indexToKey(ptr_val).ptr;
3092    const offset: u64 = prev_offset + ptr.byte_offset;
3093    return switch (ptr.base_addr) {
3094        .nav => |nav| return .{ .nav_ref = .{ .nav_index = nav, .offset = @intCast(offset) } },
3095        .uav => |uav| return .{ .uav_ref = .{ .ip_index = uav.val, .offset = @intCast(offset), .orig_ptr_ty = uav.orig_ty } },
3096        .int => return cg.lowerConstant(try pt.intValue(Type.usize, offset), Type.usize),
3097        .eu_payload => |eu_ptr| try cg.lowerPtr(
3098            eu_ptr,
3099            offset + codegen.errUnionPayloadOffset(
3100                Value.fromInterned(eu_ptr).typeOf(zcu).childType(zcu),
3101                zcu,
3102            ),
3103        ),
3104        .opt_payload => |opt_ptr| return cg.lowerPtr(opt_ptr, offset),
3105        .field => |field| {
3106            const base_ptr = Value.fromInterned(field.base);
3107            const base_ty = base_ptr.typeOf(zcu).childType(zcu);
3108            const field_off: u64 = switch (base_ty.zigTypeTag(zcu)) {
3109                .pointer => off: {
3110                    assert(base_ty.isSlice(zcu));
3111                    break :off switch (field.index) {
3112                        Value.slice_ptr_index => 0,
3113                        Value.slice_len_index => @divExact(cg.target.ptrBitWidth(), 8),
3114                        else => unreachable,
3115                    };
3116                },
3117                .@"struct" => switch (base_ty.containerLayout(zcu)) {
3118                    .auto => base_ty.structFieldOffset(@intCast(field.index), zcu),
3119                    .@"extern", .@"packed" => unreachable,
3120                },
3121                .@"union" => switch (base_ty.containerLayout(zcu)) {
3122                    .auto => base_ty.structFieldOffset(@intCast(field.index), zcu),
3123                    .@"extern", .@"packed" => unreachable,
3124                },
3125                else => unreachable,
3126            };
3127            return cg.lowerPtr(field.base, offset + field_off);
3128        },
3129        .arr_elem, .comptime_field, .comptime_alloc => unreachable,
3130    };
3131}
3132
3133/// Asserts that `isByRef` returns `false` for `ty`.
3134fn lowerConstant(cg: *CodeGen, val: Value, ty: Type) InnerError!WValue {
3135    const pt = cg.pt;
3136    const zcu = pt.zcu;
3137    assert(!isByRef(ty, zcu, cg.target));
3138    const ip = &zcu.intern_pool;
3139    if (val.isUndef(zcu)) return cg.emitUndefined(ty);
3140
3141    switch (ip.indexToKey(val.ip_index)) {
3142        .int_type,
3143        .ptr_type,
3144        .array_type,
3145        .vector_type,
3146        .opt_type,
3147        .anyframe_type,
3148        .error_union_type,
3149        .simple_type,
3150        .struct_type,
3151        .tuple_type,
3152        .union_type,
3153        .opaque_type,
3154        .enum_type,
3155        .func_type,
3156        .error_set_type,
3157        .inferred_error_set_type,
3158        => unreachable, // types, not values
3159
3160        .undef => unreachable, // handled above
3161        .simple_value => |simple_value| switch (simple_value) {
3162            .undefined,
3163            .void,
3164            .null,
3165            .empty_tuple,
3166            .@"unreachable",
3167            => unreachable, // non-runtime values
3168            .false, .true => return .{ .imm32 = switch (simple_value) {
3169                .false => 0,
3170                .true => 1,
3171                else => unreachable,
3172            } },
3173        },
3174        .variable,
3175        .@"extern",
3176        .func,
3177        .enum_literal,
3178        .empty_enum_value,
3179        => unreachable, // non-runtime values
3180        .int => {
3181            const int_info = ty.intInfo(zcu);
3182            switch (int_info.signedness) {
3183                .signed => switch (int_info.bits) {
3184                    0...32 => return .{ .imm32 = @bitCast(@as(i32, @intCast(val.toSignedInt(zcu)))) },
3185                    33...64 => return .{ .imm64 = @bitCast(val.toSignedInt(zcu)) },
3186                    else => unreachable,
3187                },
3188                .unsigned => switch (int_info.bits) {
3189                    0...32 => return .{ .imm32 = @intCast(val.toUnsignedInt(zcu)) },
3190                    33...64 => return .{ .imm64 = val.toUnsignedInt(zcu) },
3191                    else => unreachable,
3192                },
3193            }
3194        },
3195        .err => |err| {
3196            const int = try pt.getErrorValue(err.name);
3197            return .{ .imm32 = int };
3198        },
3199        .error_union => |error_union| {
3200            const err_int_ty = try pt.errorIntType();
3201            const err_ty, const err_val = switch (error_union.val) {
3202                .err_name => |err_name| .{
3203                    ty.errorUnionSet(zcu),
3204                    Value.fromInterned(try pt.intern(.{ .err = .{
3205                        .ty = ty.errorUnionSet(zcu).toIntern(),
3206                        .name = err_name,
3207                    } })),
3208                },
3209                .payload => .{
3210                    err_int_ty,
3211                    try pt.intValue(err_int_ty, 0),
3212                },
3213            };
3214            const payload_type = ty.errorUnionPayload(zcu);
3215            if (!payload_type.hasRuntimeBitsIgnoreComptime(zcu)) {
3216                // We use the error type directly as the type.
3217                return cg.lowerConstant(err_val, err_ty);
3218            }
3219
3220            return cg.fail("Wasm TODO: lowerConstant error union with non-zero-bit payload type", .{});
3221        },
3222        .enum_tag => |enum_tag| {
3223            const int_tag_ty = ip.typeOf(enum_tag.int);
3224            return cg.lowerConstant(Value.fromInterned(enum_tag.int), Type.fromInterned(int_tag_ty));
3225        },
3226        .float => |float| switch (float.storage) {
3227            .f16 => |f16_val| return .{ .imm32 = @as(u16, @bitCast(f16_val)) },
3228            .f32 => |f32_val| return .{ .float32 = f32_val },
3229            .f64 => |f64_val| return .{ .float64 = f64_val },
3230            else => unreachable,
3231        },
3232        .slice => unreachable, // isByRef == true
3233        .ptr => return cg.lowerPtr(val.toIntern(), 0),
3234        .opt => if (ty.optionalReprIsPayload(zcu)) {
3235            const pl_ty = ty.optionalChild(zcu);
3236            if (val.optionalValue(zcu)) |payload| {
3237                return cg.lowerConstant(payload, pl_ty);
3238            } else {
3239                return .{ .imm32 = 0 };
3240            }
3241        } else {
3242            return .{ .imm32 = @intFromBool(!val.isNull(zcu)) };
3243        },
3244        .aggregate => switch (ip.indexToKey(ty.ip_index)) {
3245            .array_type => return cg.fail("Wasm TODO: LowerConstant for {f}", .{ty.fmt(pt)}),
3246            .vector_type => {
3247                assert(determineSimdStoreStrategy(ty, zcu, cg.target) == .direct);
3248                var buf: [16]u8 = undefined;
3249                val.writeToMemory(pt, &buf) catch unreachable;
3250                return cg.storeSimdImmd(buf);
3251            },
3252            .struct_type => {
3253                const struct_type = ip.loadStructType(ty.toIntern());
3254                // non-packed structs are not handled in this function because they
3255                // are by-ref types.
3256                assert(struct_type.layout == .@"packed");
3257                var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer
3258                val.writeToPackedMemory(ty, pt, &buf, 0) catch unreachable;
3259                const backing_int_ty = Type.fromInterned(struct_type.backingIntTypeUnordered(ip));
3260                const int_val = try pt.intValue(
3261                    backing_int_ty,
3262                    mem.readInt(u64, &buf, .little),
3263                );
3264                return cg.lowerConstant(int_val, backing_int_ty);
3265            },
3266            else => unreachable,
3267        },
3268        .un => {
3269            const int_type = try pt.intType(.unsigned, @intCast(ty.bitSize(zcu)));
3270
3271            var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer
3272            val.writeToPackedMemory(ty, pt, &buf, 0) catch unreachable;
3273            const int_val = try pt.intValue(
3274                int_type,
3275                mem.readInt(u64, &buf, .little),
3276            );
3277            return cg.lowerConstant(int_val, int_type);
3278        },
3279        .memoized_call => unreachable,
3280    }
3281}
3282
3283/// Stores the value as a 128bit-immediate value by storing it inside
3284/// the list and returning the index into this list as `WValue`.
3285fn storeSimdImmd(cg: *CodeGen, value: [16]u8) !WValue {
3286    const index = @as(u32, @intCast(cg.simd_immediates.items.len));
3287    try cg.simd_immediates.append(cg.gpa, value);
3288    return .{ .imm128 = index };
3289}
3290
3291fn emitUndefined(cg: *CodeGen, ty: Type) InnerError!WValue {
3292    const zcu = cg.pt.zcu;
3293    const ip = &zcu.intern_pool;
3294    switch (ty.zigTypeTag(zcu)) {
3295        .bool, .error_set => return .{ .imm32 = 0xaaaaaaaa },
3296        .int, .@"enum" => switch (ty.intInfo(zcu).bits) {
3297            0...32 => return .{ .imm32 = 0xaaaaaaaa },
3298            33...64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa },
3299            else => unreachable,
3300        },
3301        .float => switch (ty.floatBits(cg.target)) {
3302            16 => return .{ .imm32 = 0xaaaaaaaa },
3303            32 => return .{ .float32 = @as(f32, @bitCast(@as(u32, 0xaaaaaaaa))) },
3304            64 => return .{ .float64 = @as(f64, @bitCast(@as(u64, 0xaaaaaaaaaaaaaaaa))) },
3305            else => unreachable,
3306        },
3307        .pointer => switch (cg.ptr_size) {
3308            .wasm32 => return .{ .imm32 = 0xaaaaaaaa },
3309            .wasm64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa },
3310        },
3311        .optional => {
3312            const pl_ty = ty.optionalChild(zcu);
3313            if (ty.optionalReprIsPayload(zcu)) {
3314                return cg.emitUndefined(pl_ty);
3315            }
3316            return .{ .imm32 = 0xaaaaaaaa };
3317        },
3318        .error_union => {
3319            return .{ .imm32 = 0xaaaaaaaa };
3320        },
3321        .@"struct" => {
3322            const packed_struct = zcu.typeToPackedStruct(ty).?;
3323            return cg.emitUndefined(Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)));
3324        },
3325        .@"union" => switch (ty.containerLayout(zcu)) {
3326            .@"packed" => switch (ty.bitSize(zcu)) {
3327                0...32 => return .{ .imm32 = 0xaaaaaaaa },
3328                33...64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa },
3329                else => unreachable,
3330            },
3331            else => unreachable,
3332        },
3333        else => return cg.fail("Wasm TODO: emitUndefined for type: {t}\n", .{ty.zigTypeTag(zcu)}),
3334    }
3335}
3336
3337fn airBlock(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3338    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
3339    const extra = cg.air.extraData(Air.Block, ty_pl.payload);
3340    try cg.lowerBlock(inst, ty_pl.ty.toType(), @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]));
3341}
3342
3343fn lowerBlock(cg: *CodeGen, inst: Air.Inst.Index, block_ty: Type, body: []const Air.Inst.Index) InnerError!void {
3344    const zcu = cg.pt.zcu;
3345    // if wasm_block_ty is non-empty, we create a register to store the temporary value
3346    const block_result: WValue = if (block_ty.hasRuntimeBitsIgnoreComptime(zcu))
3347        try cg.allocLocal(block_ty)
3348    else
3349        .none;
3350
3351    try cg.startBlock(.block, .empty);
3352    // Here we set the current block idx, so breaks know the depth to jump
3353    // to when breaking out.
3354    try cg.blocks.putNoClobber(cg.gpa, inst, .{
3355        .label = cg.block_depth,
3356        .value = block_result,
3357    });
3358
3359    try cg.genBody(body);
3360    try cg.endBlock();
3361
3362    const liveness = cg.liveness.getBlock(inst);
3363    try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths.len);
3364
3365    return cg.finishAir(inst, block_result, &.{});
3366}
3367
3368/// appends a new wasm block to the code section and increases the `block_depth` by 1
3369fn startBlock(cg: *CodeGen, block_tag: std.wasm.Opcode, block_type: std.wasm.BlockType) !void {
3370    cg.block_depth += 1;
3371    try cg.addInst(.{
3372        .tag = Mir.Inst.Tag.fromOpcode(block_tag),
3373        .data = .{ .block_type = block_type },
3374    });
3375}
3376
3377/// Ends the current wasm block and decreases the `block_depth` by 1
3378fn endBlock(cg: *CodeGen) !void {
3379    try cg.addTag(.end);
3380    cg.block_depth -= 1;
3381}
3382
3383fn airLoop(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3384    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
3385    const loop = cg.air.extraData(Air.Block, ty_pl.payload);
3386    const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[loop.end..][0..loop.data.body_len]);
3387
3388    // result type of loop is always 'noreturn', meaning we can always
3389    // emit the wasm type 'block_empty'.
3390    try cg.startBlock(.loop, .empty);
3391
3392    try cg.loops.putNoClobber(cg.gpa, inst, cg.block_depth);
3393    defer assert(cg.loops.remove(inst));
3394
3395    try cg.genBody(body);
3396    try cg.endBlock();
3397
3398    return cg.finishAir(inst, .none, &.{});
3399}
3400
3401fn airCondBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3402    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
3403    const condition = try cg.resolveInst(pl_op.operand);
3404    const extra = cg.air.extraData(Air.CondBr, pl_op.payload);
3405    const then_body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.then_body_len]);
3406    const else_body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end + then_body.len ..][0..extra.data.else_body_len]);
3407    const liveness_condbr = cg.liveness.getCondBr(inst);
3408
3409    // result type is always noreturn, so use `block_empty` as type.
3410    try cg.startBlock(.block, .empty);
3411    // emit the conditional value
3412    try cg.emitWValue(condition);
3413
3414    // we inserted the block in front of the condition
3415    // so now check if condition matches. If not, break outside this block
3416    // and continue with the then codepath
3417    try cg.addLabel(.br_if, 0);
3418
3419    try cg.branches.ensureUnusedCapacity(cg.gpa, 2);
3420    {
3421        cg.branches.appendAssumeCapacity(.{});
3422        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, @as(u32, @intCast(liveness_condbr.else_deaths.len)));
3423        defer {
3424            var else_stack = cg.branches.pop().?;
3425            else_stack.deinit(cg.gpa);
3426        }
3427        try cg.genBody(else_body);
3428        try cg.endBlock();
3429    }
3430
3431    // Outer block that matches the condition
3432    {
3433        cg.branches.appendAssumeCapacity(.{});
3434        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, @as(u32, @intCast(liveness_condbr.then_deaths.len)));
3435        defer {
3436            var then_stack = cg.branches.pop().?;
3437            then_stack.deinit(cg.gpa);
3438        }
3439        try cg.genBody(then_body);
3440    }
3441
3442    return cg.finishAir(inst, .none, &.{});
3443}
3444
3445fn airCmp(cg: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) InnerError!void {
3446    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
3447
3448    const lhs = try cg.resolveInst(bin_op.lhs);
3449    const rhs = try cg.resolveInst(bin_op.rhs);
3450    const operand_ty = cg.typeOf(bin_op.lhs);
3451    const result = try cg.cmp(lhs, rhs, operand_ty, op);
3452    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
3453}
3454
3455/// Compares two operands.
3456/// Asserts rhs is not a stack value when the lhs isn't a stack value either
3457/// NOTE: This leaves the result on top of the stack, rather than a new local.
3458fn cmp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: std.math.CompareOperator) InnerError!WValue {
3459    assert(!(lhs != .stack and rhs == .stack));
3460    const zcu = cg.pt.zcu;
3461    if (ty.zigTypeTag(zcu) == .optional and !ty.optionalReprIsPayload(zcu)) {
3462        const payload_ty = ty.optionalChild(zcu);
3463        if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
3464            // When we hit this case, we must check the value of optionals
3465            // that are not pointers. This means first checking against non-null for
3466            // both lhs and rhs, as well as checking the payload are matching of lhs and rhs
3467            return cg.cmpOptionals(lhs, rhs, ty, op);
3468        }
3469    } else if (ty.isAnyFloat()) {
3470        return cg.cmpFloat(ty, lhs, rhs, op);
3471    } else if (isByRef(ty, zcu, cg.target)) {
3472        return cg.cmpBigInt(lhs, rhs, ty, op);
3473    }
3474
3475    const signedness: std.builtin.Signedness = blk: {
3476        // by default we tell the operand type is unsigned (i.e. bools and enum values)
3477        if (ty.zigTypeTag(zcu) != .int) break :blk .unsigned;
3478
3479        // incase of an actual integer, we emit the correct signedness
3480        break :blk ty.intInfo(zcu).signedness;
3481    };
3482
3483    // ensure that when we compare pointers, we emit
3484    // the true pointer of a stack value, rather than the stack pointer.
3485    try cg.lowerToStack(lhs);
3486    try cg.lowerToStack(rhs);
3487
3488    const opcode: std.wasm.Opcode = buildOpcode(.{
3489        .valtype1 = typeToValtype(ty, zcu, cg.target),
3490        .op = switch (op) {
3491            .lt => .lt,
3492            .lte => .le,
3493            .eq => .eq,
3494            .neq => .ne,
3495            .gte => .ge,
3496            .gt => .gt,
3497        },
3498        .signedness = signedness,
3499    });
3500    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
3501
3502    return .stack;
3503}
3504
3505/// Compares two floats.
3506/// NOTE: Leaves the result of the comparison on top of the stack.
3507fn cmpFloat(cg: *CodeGen, ty: Type, lhs: WValue, rhs: WValue, cmp_op: std.math.CompareOperator) InnerError!WValue {
3508    const float_bits = ty.floatBits(cg.target);
3509
3510    const op: Op = switch (cmp_op) {
3511        .lt => .lt,
3512        .lte => .le,
3513        .eq => .eq,
3514        .neq => .ne,
3515        .gte => .ge,
3516        .gt => .gt,
3517    };
3518
3519    switch (float_bits) {
3520        16 => {
3521            _ = try cg.fpext(lhs, Type.f16, Type.f32);
3522            _ = try cg.fpext(rhs, Type.f16, Type.f32);
3523            const opcode = buildOpcode(.{ .op = op, .valtype1 = .f32 });
3524            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
3525            return .stack;
3526        },
3527        32, 64 => {
3528            try cg.emitWValue(lhs);
3529            try cg.emitWValue(rhs);
3530            const val_type: std.wasm.Valtype = if (float_bits == 32) .f32 else .f64;
3531            const opcode = buildOpcode(.{ .op = op, .valtype1 = val_type });
3532            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
3533            return .stack;
3534        },
3535        80, 128 => {
3536            const intrinsic = floatCmpIntrinsic(cmp_op, float_bits);
3537            const result = try cg.callIntrinsic(intrinsic, &.{ ty.ip_index, ty.ip_index }, Type.bool, &.{ lhs, rhs });
3538            return cg.cmp(result, .{ .imm32 = 0 }, Type.i32, cmp_op);
3539        },
3540        else => unreachable,
3541    }
3542}
3543
3544fn airCmpVector(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3545    _ = inst;
3546    return cg.fail("TODO implement airCmpVector for wasm", .{});
3547}
3548
3549fn airCmpLtErrorsLen(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3550    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
3551    const operand = try cg.resolveInst(un_op);
3552
3553    try cg.emitWValue(operand);
3554    const pt = cg.pt;
3555    const err_int_ty = try pt.errorIntType();
3556    try cg.addTag(.errors_len);
3557    const result = try cg.cmp(.stack, .stack, err_int_ty, .lt);
3558
3559    return cg.finishAir(inst, result, &.{un_op});
3560}
3561
3562fn airBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3563    const br = cg.air.instructions.items(.data)[@intFromEnum(inst)].br;
3564    const block = cg.blocks.get(br.block_inst).?;
3565
3566    // if operand has codegen bits we should break with a value
3567    if (block.value != .none) {
3568        const operand = try cg.resolveInst(br.operand);
3569        try cg.lowerToStack(operand);
3570        try cg.addLocal(.local_set, block.value.local.value);
3571    }
3572
3573    // We map every block to its block index.
3574    // We then determine how far we have to jump to it by subtracting it from current block depth
3575    const idx: u32 = cg.block_depth - block.label;
3576    try cg.addLabel(.br, idx);
3577
3578    return cg.finishAir(inst, .none, &.{br.operand});
3579}
3580
3581fn airRepeat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3582    const repeat = cg.air.instructions.items(.data)[@intFromEnum(inst)].repeat;
3583    const loop_label = cg.loops.get(repeat.loop_inst).?;
3584
3585    const idx: u32 = cg.block_depth - loop_label;
3586    try cg.addLabel(.br, idx);
3587
3588    return cg.finishAir(inst, .none, &.{});
3589}
3590
3591fn airNot(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3592    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
3593
3594    const operand = try cg.resolveInst(ty_op.operand);
3595    const operand_ty = cg.typeOf(ty_op.operand);
3596    const pt = cg.pt;
3597    const zcu = pt.zcu;
3598
3599    const result = result: {
3600        if (operand_ty.zigTypeTag(zcu) == .bool) {
3601            try cg.emitWValue(operand);
3602            try cg.addTag(.i32_eqz);
3603            const not_tmp = try cg.allocLocal(operand_ty);
3604            try cg.addLocal(.local_set, not_tmp.local.value);
3605            break :result not_tmp;
3606        } else {
3607            const int_info = operand_ty.intInfo(zcu);
3608            const wasm_bits = toWasmBits(int_info.bits) orelse {
3609                return cg.fail("TODO: Implement binary NOT for {f}", .{operand_ty.fmt(pt)});
3610            };
3611
3612            switch (wasm_bits) {
3613                32 => {
3614                    try cg.emitWValue(operand);
3615                    try cg.addImm32(switch (int_info.signedness) {
3616                        .unsigned => ~@as(u32, 0) >> @intCast(32 - int_info.bits),
3617                        .signed => ~@as(u32, 0),
3618                    });
3619                    try cg.addTag(.i32_xor);
3620                    break :result .stack;
3621                },
3622                64 => {
3623                    try cg.emitWValue(operand);
3624                    try cg.addImm64(switch (int_info.signedness) {
3625                        .unsigned => ~@as(u64, 0) >> @intCast(64 - int_info.bits),
3626                        .signed => ~@as(u64, 0),
3627                    });
3628                    try cg.addTag(.i64_xor);
3629                    break :result .stack;
3630                },
3631                128 => {
3632                    const ptr = try cg.allocStack(operand_ty);
3633
3634                    try cg.emitWValue(ptr);
3635                    _ = try cg.load(operand, Type.u64, 0);
3636                    try cg.addImm64(~@as(u64, 0));
3637                    try cg.addTag(.i64_xor);
3638                    try cg.store(.stack, .stack, Type.u64, ptr.offset());
3639
3640                    try cg.emitWValue(ptr);
3641                    _ = try cg.load(operand, Type.u64, 8);
3642                    try cg.addImm64(switch (int_info.signedness) {
3643                        .unsigned => ~@as(u64, 0) >> @intCast(128 - int_info.bits),
3644                        .signed => ~@as(u64, 0),
3645                    });
3646                    try cg.addTag(.i64_xor);
3647                    try cg.store(.stack, .stack, Type.u64, ptr.offset() + 8);
3648
3649                    break :result ptr;
3650                },
3651                else => unreachable,
3652            }
3653        }
3654    };
3655    return cg.finishAir(inst, result, &.{ty_op.operand});
3656}
3657
3658fn airTrap(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3659    try cg.addTag(.@"unreachable");
3660    return cg.finishAir(inst, .none, &.{});
3661}
3662
3663fn airBreakpoint(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3664    // unsupported by wasm itfunc. Can be implemented once we support DWARF
3665    // for wasm
3666    try cg.addTag(.@"unreachable");
3667    return cg.finishAir(inst, .none, &.{});
3668}
3669
3670fn airUnreachable(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3671    try cg.addTag(.@"unreachable");
3672    return cg.finishAir(inst, .none, &.{});
3673}
3674
3675fn airBitcast(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3676    const zcu = cg.pt.zcu;
3677    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
3678    const operand = try cg.resolveInst(ty_op.operand);
3679    const wanted_ty = cg.typeOfIndex(inst);
3680    const given_ty = cg.typeOf(ty_op.operand);
3681
3682    const bit_size = given_ty.bitSize(zcu);
3683    const needs_wrapping = (given_ty.isSignedInt(zcu) != wanted_ty.isSignedInt(zcu)) and
3684        bit_size != 32 and bit_size != 64 and bit_size != 128;
3685
3686    const result = result: {
3687        if (given_ty.isAnyFloat() or wanted_ty.isAnyFloat()) {
3688            break :result try cg.bitcast(wanted_ty, given_ty, operand);
3689        }
3690
3691        if (isByRef(given_ty, zcu, cg.target) and !isByRef(wanted_ty, zcu, cg.target)) {
3692            const loaded_memory = try cg.load(operand, wanted_ty, 0);
3693            if (needs_wrapping) {
3694                break :result try cg.wrapOperand(loaded_memory, wanted_ty);
3695            } else {
3696                break :result loaded_memory;
3697            }
3698        }
3699        if (!isByRef(given_ty, zcu, cg.target) and isByRef(wanted_ty, zcu, cg.target)) {
3700            const stack_memory = try cg.allocStack(wanted_ty);
3701            try cg.store(stack_memory, operand, given_ty, 0);
3702            if (needs_wrapping) {
3703                break :result try cg.wrapOperand(stack_memory, wanted_ty);
3704            } else {
3705                break :result stack_memory;
3706            }
3707        }
3708
3709        if (needs_wrapping) {
3710            break :result try cg.wrapOperand(operand, wanted_ty);
3711        }
3712
3713        break :result switch (operand) {
3714            // for stack offset, return a pointer to this offset.
3715            .stack_offset => try cg.buildPointerOffset(operand, 0, .new),
3716            else => cg.reuseOperand(ty_op.operand, operand),
3717        };
3718    };
3719    return cg.finishAir(inst, result, &.{ty_op.operand});
3720}
3721
3722fn bitcast(cg: *CodeGen, wanted_ty: Type, given_ty: Type, operand: WValue) InnerError!WValue {
3723    const zcu = cg.pt.zcu;
3724    // if we bitcast a float to or from an integer we must use the 'reinterpret' instruction
3725    if (!(wanted_ty.isAnyFloat() or given_ty.isAnyFloat())) return operand;
3726    if (wanted_ty.ip_index == .f16_type or given_ty.ip_index == .f16_type) return operand;
3727    if (wanted_ty.bitSize(zcu) > 64) return operand;
3728    assert((wanted_ty.isInt(zcu) and given_ty.isAnyFloat()) or (wanted_ty.isAnyFloat() and given_ty.isInt(zcu)));
3729
3730    const opcode = buildOpcode(.{
3731        .op = .reinterpret,
3732        .valtype1 = typeToValtype(wanted_ty, zcu, cg.target),
3733        .valtype2 = typeToValtype(given_ty, zcu, cg.target),
3734    });
3735    try cg.emitWValue(operand);
3736    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
3737    return .stack;
3738}
3739
3740fn airStructFieldPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3741    const zcu = cg.pt.zcu;
3742    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
3743    const extra = cg.air.extraData(Air.StructField, ty_pl.payload);
3744
3745    const struct_ptr = try cg.resolveInst(extra.data.struct_operand);
3746    const struct_ptr_ty = cg.typeOf(extra.data.struct_operand);
3747    const struct_ty = struct_ptr_ty.childType(zcu);
3748    const result = try cg.structFieldPtr(inst, extra.data.struct_operand, struct_ptr, struct_ptr_ty, struct_ty, extra.data.field_index);
3749    return cg.finishAir(inst, result, &.{extra.data.struct_operand});
3750}
3751
3752fn airStructFieldPtrIndex(cg: *CodeGen, inst: Air.Inst.Index, index: u32) InnerError!void {
3753    const zcu = cg.pt.zcu;
3754    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
3755    const struct_ptr = try cg.resolveInst(ty_op.operand);
3756    const struct_ptr_ty = cg.typeOf(ty_op.operand);
3757    const struct_ty = struct_ptr_ty.childType(zcu);
3758
3759    const result = try cg.structFieldPtr(inst, ty_op.operand, struct_ptr, struct_ptr_ty, struct_ty, index);
3760    return cg.finishAir(inst, result, &.{ty_op.operand});
3761}
3762
3763fn structFieldPtr(
3764    cg: *CodeGen,
3765    inst: Air.Inst.Index,
3766    ref: Air.Inst.Ref,
3767    struct_ptr: WValue,
3768    struct_ptr_ty: Type,
3769    struct_ty: Type,
3770    index: u32,
3771) InnerError!WValue {
3772    const pt = cg.pt;
3773    const zcu = pt.zcu;
3774    const result_ty = cg.typeOfIndex(inst);
3775    const struct_ptr_ty_info = struct_ptr_ty.ptrInfo(zcu);
3776
3777    const offset = switch (struct_ty.containerLayout(zcu)) {
3778        .@"packed" => switch (struct_ty.zigTypeTag(zcu)) {
3779            .@"struct" => offset: {
3780                if (result_ty.ptrInfo(zcu).packed_offset.host_size != 0) {
3781                    break :offset @as(u32, 0);
3782                }
3783                const struct_type = zcu.typeToStruct(struct_ty).?;
3784                break :offset @divExact(zcu.structPackedFieldBitOffset(struct_type, index) + struct_ptr_ty_info.packed_offset.bit_offset, 8);
3785            },
3786            .@"union" => 0,
3787            else => unreachable,
3788        },
3789        else => struct_ty.structFieldOffset(index, zcu),
3790    };
3791    // save a load and store when we can simply reuse the operand
3792    if (offset == 0) {
3793        return cg.reuseOperand(ref, struct_ptr);
3794    }
3795    switch (struct_ptr) {
3796        .stack_offset => |stack_offset| {
3797            return .{ .stack_offset = .{ .value = stack_offset.value + @as(u32, @intCast(offset)), .references = 1 } };
3798        },
3799        else => return cg.buildPointerOffset(struct_ptr, offset, .new),
3800    }
3801}
3802
3803fn airStructFieldVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
3804    const pt = cg.pt;
3805    const zcu = pt.zcu;
3806    const ip = &zcu.intern_pool;
3807    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
3808    const struct_field = cg.air.extraData(Air.StructField, ty_pl.payload).data;
3809
3810    const struct_ty = cg.typeOf(struct_field.struct_operand);
3811    const operand = try cg.resolveInst(struct_field.struct_operand);
3812    const field_index = struct_field.field_index;
3813    const field_ty = struct_ty.fieldType(field_index, zcu);
3814    if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) return cg.finishAir(inst, .none, &.{struct_field.struct_operand});
3815
3816    const result: WValue = switch (struct_ty.containerLayout(zcu)) {
3817        .@"packed" => switch (struct_ty.zigTypeTag(zcu)) {
3818            .@"struct" => result: {
3819                const packed_struct = zcu.typeToPackedStruct(struct_ty).?;
3820                const offset = zcu.structPackedFieldBitOffset(packed_struct, field_index);
3821                const backing_ty = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip));
3822                const host_bits = backing_ty.intInfo(zcu).bits;
3823
3824                const const_wvalue: WValue = if (33 <= host_bits and host_bits <= 64)
3825                    .{ .imm64 = offset }
3826                else
3827                    .{ .imm32 = offset };
3828
3829                // for first field we don't require any shifting
3830                const shifted_value = if (offset == 0)
3831                    operand
3832                else
3833                    try cg.binOp(operand, const_wvalue, backing_ty, .shr);
3834
3835                if (field_ty.zigTypeTag(zcu) == .float) {
3836                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
3837                    const truncated = try cg.trunc(shifted_value, int_type, backing_ty);
3838                    break :result try cg.bitcast(field_ty, int_type, truncated);
3839                } else if (field_ty.isPtrAtRuntime(zcu) and packed_struct.field_types.len == 1) {
3840                    // In this case we do not have to perform any transformations,
3841                    // we can simply reuse the operand.
3842                    break :result cg.reuseOperand(struct_field.struct_operand, operand);
3843                } else if (field_ty.isPtrAtRuntime(zcu)) {
3844                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
3845                    break :result try cg.trunc(shifted_value, int_type, backing_ty);
3846                }
3847                break :result try cg.trunc(shifted_value, field_ty, backing_ty);
3848            },
3849            .@"union" => result: {
3850                if (isByRef(struct_ty, zcu, cg.target)) {
3851                    if (!isByRef(field_ty, zcu, cg.target)) {
3852                        break :result try cg.load(operand, field_ty, 0);
3853                    } else {
3854                        const new_stack_val = try cg.allocStack(field_ty);
3855                        try cg.store(new_stack_val, operand, field_ty, 0);
3856                        break :result new_stack_val;
3857                    }
3858                }
3859
3860                const union_int_type = try pt.intType(.unsigned, @as(u16, @intCast(struct_ty.bitSize(zcu))));
3861                if (field_ty.zigTypeTag(zcu) == .float) {
3862                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
3863                    const truncated = try cg.trunc(operand, int_type, union_int_type);
3864                    break :result try cg.bitcast(field_ty, int_type, truncated);
3865                } else if (field_ty.isPtrAtRuntime(zcu)) {
3866                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
3867                    break :result try cg.trunc(operand, int_type, union_int_type);
3868                }
3869                break :result try cg.trunc(operand, field_ty, union_int_type);
3870            },
3871            else => unreachable,
3872        },
3873        else => result: {
3874            const offset = std.math.cast(u32, struct_ty.structFieldOffset(field_index, zcu)) orelse {
3875                return cg.fail("Field type '{f}' too big to fit into stack frame", .{field_ty.fmt(pt)});
3876            };
3877            if (isByRef(field_ty, zcu, cg.target)) {
3878                switch (operand) {
3879                    .stack_offset => |stack_offset| {
3880                        break :result .{ .stack_offset = .{ .value = stack_offset.value + offset, .references = 1 } };
3881                    },
3882                    else => break :result try cg.buildPointerOffset(operand, offset, .new),
3883                }
3884            }
3885            break :result try cg.load(operand, field_ty, offset);
3886        },
3887    };
3888
3889    return cg.finishAir(inst, result, &.{struct_field.struct_operand});
3890}
3891
3892fn airSwitchBr(cg: *CodeGen, inst: Air.Inst.Index, is_dispatch_loop: bool) InnerError!void {
3893    const pt = cg.pt;
3894    const zcu = pt.zcu;
3895
3896    const switch_br = cg.air.unwrapSwitch(inst);
3897    const target_ty = cg.typeOf(switch_br.operand);
3898
3899    assert(target_ty.hasRuntimeBitsIgnoreComptime(zcu));
3900
3901    // swap target value with placeholder local, for dispatching
3902    const target = if (is_dispatch_loop) target: {
3903        const initial_target = try cg.resolveInst(switch_br.operand);
3904        const target: WValue = try cg.allocLocal(target_ty);
3905        try cg.lowerToStack(initial_target);
3906        try cg.addLocal(.local_set, target.local.value);
3907
3908        try cg.startBlock(.loop, .empty); // dispatch loop start
3909        try cg.blocks.putNoClobber(cg.gpa, inst, .{
3910            .label = cg.block_depth,
3911            .value = target,
3912        });
3913
3914        break :target target;
3915    } else try cg.resolveInst(switch_br.operand);
3916
3917    const liveness = try cg.liveness.getSwitchBr(cg.gpa, inst, switch_br.cases_len + 1);
3918    defer cg.gpa.free(liveness.deaths);
3919
3920    const has_else_body = switch_br.else_body_len != 0;
3921    const branch_count = switch_br.cases_len + 1; // if else branch is missing, we trap when failing all conditions
3922    try cg.branches.ensureUnusedCapacity(cg.gpa, switch_br.cases_len + @intFromBool(has_else_body));
3923
3924    if (switch_br.cases_len == 0) {
3925        assert(has_else_body);
3926
3927        var it = switch_br.iterateCases();
3928        const else_body = it.elseBody();
3929
3930        cg.branches.appendAssumeCapacity(.{});
3931        const else_deaths = liveness.deaths.len - 1;
3932        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[else_deaths].len);
3933        defer {
3934            var else_branch = cg.branches.pop().?;
3935            else_branch.deinit(cg.gpa);
3936        }
3937        try cg.genBody(else_body);
3938
3939        if (is_dispatch_loop) {
3940            try cg.endBlock(); // dispatch loop end
3941        }
3942        return cg.finishAir(inst, .none, &.{});
3943    }
3944
3945    var min: ?Value = null;
3946    var max: ?Value = null;
3947    var branching_size: u32 = 0; // single item +1, range +2
3948
3949    {
3950        var cases_it = switch_br.iterateCases();
3951        while (cases_it.next()) |case| {
3952            for (case.items) |item| {
3953                const val = Value.fromInterned(item.toInterned().?);
3954                if (min == null or val.compareHetero(.lt, min.?, zcu)) min = val;
3955                if (max == null or val.compareHetero(.gt, max.?, zcu)) max = val;
3956                branching_size += 1;
3957            }
3958            for (case.ranges) |range| {
3959                const low = Value.fromInterned(range[0].toInterned().?);
3960                if (min == null or low.compareHetero(.lt, min.?, zcu)) min = low;
3961                const high = Value.fromInterned(range[1].toInterned().?);
3962                if (max == null or high.compareHetero(.gt, max.?, zcu)) max = high;
3963                branching_size += 2;
3964            }
3965        }
3966    }
3967
3968    var min_space: Value.BigIntSpace = undefined;
3969    const min_bigint = min.?.toBigInt(&min_space, zcu);
3970    var max_space: Value.BigIntSpace = undefined;
3971    const max_bigint = max.?.toBigInt(&max_space, zcu);
3972    const limbs = try cg.gpa.alloc(
3973        std.math.big.Limb,
3974        @max(min_bigint.limbs.len, max_bigint.limbs.len) + 1,
3975    );
3976    defer cg.gpa.free(limbs);
3977
3978    const width_maybe: ?u32 = width: {
3979        var width_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
3980        width_bigint.sub(max_bigint, min_bigint);
3981        width_bigint.addScalar(width_bigint.toConst(), 1);
3982        break :width width_bigint.toConst().toInt(u32) catch null;
3983    };
3984
3985    try cg.startBlock(.block, .empty); // whole switch block start
3986
3987    for (0..branch_count) |_| {
3988        try cg.startBlock(.block, .empty);
3989    }
3990
3991    // Heuristic on deciding when to use .br_table instead of .br_if jump table
3992    // 1. Differences between lowest and highest values should fit into u32
3993    // 2. .br_table should be applied for "dense" switch, we test it by checking .br_if jumps will need more instructions
3994    // 3. Do not use .br_table for tiny switches
3995    const use_br_table = cond: {
3996        const width = width_maybe orelse break :cond false;
3997        if (width > 2 * branching_size) break :cond false;
3998        if (width < 2 or branch_count < 2) break :cond false;
3999        break :cond true;
4000    };
4001
4002    if (use_br_table) {
4003        const width = width_maybe.?;
4004
4005        const br_value_original = try cg.binOp(target, try cg.resolveValue(min.?), target_ty, .sub);
4006        _ = try cg.intcast(br_value_original, target_ty, Type.u32);
4007
4008        const jump_table: Mir.JumpTable = .{ .length = width + 1 };
4009        const table_extra_index = try cg.addExtra(jump_table);
4010        try cg.addInst(.{ .tag = .br_table, .data = .{ .payload = table_extra_index } });
4011
4012        const branch_list = try cg.mir_extra.addManyAsSlice(cg.gpa, width + 1);
4013        @memset(branch_list, branch_count - 1);
4014
4015        var cases_it = switch_br.iterateCases();
4016        while (cases_it.next()) |case| {
4017            for (case.items) |item| {
4018                const val = Value.fromInterned(item.toInterned().?);
4019                var val_space: Value.BigIntSpace = undefined;
4020                const val_bigint = val.toBigInt(&val_space, zcu);
4021                var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
4022                index_bigint.sub(val_bigint, min_bigint);
4023                branch_list[index_bigint.toConst().toInt(u32) catch unreachable] = case.idx;
4024            }
4025            for (case.ranges) |range| {
4026                var low_space: Value.BigIntSpace = undefined;
4027                const low_bigint = Value.fromInterned(range[0].toInterned().?).toBigInt(&low_space, zcu);
4028                var high_space: Value.BigIntSpace = undefined;
4029                const high_bigint = Value.fromInterned(range[1].toInterned().?).toBigInt(&high_space, zcu);
4030                var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
4031                index_bigint.sub(low_bigint, min_bigint);
4032                const start = index_bigint.toConst().toInt(u32) catch unreachable;
4033                index_bigint.sub(high_bigint, min_bigint);
4034                const end = (index_bigint.toConst().toInt(u32) catch unreachable) + 1;
4035                @memset(branch_list[start..end], case.idx);
4036            }
4037        }
4038    } else {
4039        var cases_it = switch_br.iterateCases();
4040        while (cases_it.next()) |case| {
4041            for (case.items) |ref| {
4042                const val = try cg.resolveInst(ref);
4043                _ = try cg.cmp(target, val, target_ty, .eq);
4044                try cg.addLabel(.br_if, case.idx); // item match found
4045            }
4046            for (case.ranges) |range| {
4047                const low = try cg.resolveInst(range[0]);
4048                const high = try cg.resolveInst(range[1]);
4049
4050                const gte = try cg.cmp(target, low, target_ty, .gte);
4051                const lte = try cg.cmp(target, high, target_ty, .lte);
4052                _ = try cg.binOp(gte, lte, Type.bool, .@"and");
4053                try cg.addLabel(.br_if, case.idx); // range match found
4054            }
4055        }
4056        try cg.addLabel(.br, branch_count - 1);
4057    }
4058
4059    var cases_it = switch_br.iterateCases();
4060    while (cases_it.next()) |case| {
4061        try cg.endBlock();
4062
4063        cg.branches.appendAssumeCapacity(.{});
4064        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[case.idx].len);
4065        defer {
4066            var case_branch = cg.branches.pop().?;
4067            case_branch.deinit(cg.gpa);
4068        }
4069        try cg.genBody(case.body);
4070
4071        try cg.addLabel(.br, branch_count - case.idx - 1); // matching case found and executed => exit switch
4072    }
4073
4074    try cg.endBlock();
4075    if (has_else_body) {
4076        const else_body = cases_it.elseBody();
4077
4078        cg.branches.appendAssumeCapacity(.{});
4079        const else_deaths = liveness.deaths.len - 1;
4080        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[else_deaths].len);
4081        defer {
4082            var else_branch = cg.branches.pop().?;
4083            else_branch.deinit(cg.gpa);
4084        }
4085        try cg.genBody(else_body);
4086    } else {
4087        try cg.addTag(.@"unreachable");
4088    }
4089
4090    try cg.endBlock(); // whole switch block end
4091
4092    if (is_dispatch_loop) {
4093        try cg.endBlock(); // dispatch loop end
4094    }
4095
4096    return cg.finishAir(inst, .none, &.{});
4097}
4098
4099fn airSwitchDispatch(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4100    const br = cg.air.instructions.items(.data)[@intFromEnum(inst)].br;
4101    const switch_loop = cg.blocks.get(br.block_inst).?;
4102
4103    const operand = try cg.resolveInst(br.operand);
4104    try cg.lowerToStack(operand);
4105    try cg.addLocal(.local_set, switch_loop.value.local.value);
4106
4107    const idx: u32 = cg.block_depth - switch_loop.label;
4108    try cg.addLabel(.br, idx);
4109
4110    return cg.finishAir(inst, .none, &.{br.operand});
4111}
4112
4113fn airIsErr(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void {
4114    const zcu = cg.pt.zcu;
4115    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
4116    const operand = try cg.resolveInst(un_op);
4117    const err_union_ty = cg.typeOf(un_op);
4118    const pl_ty = err_union_ty.errorUnionPayload(zcu);
4119
4120    const result: WValue = result: {
4121        if (err_union_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) {
4122            switch (opcode) {
4123                .i32_ne => break :result .{ .imm32 = 0 },
4124                .i32_eq => break :result .{ .imm32 = 1 },
4125                else => unreachable,
4126            }
4127        }
4128
4129        try cg.emitWValue(operand);
4130        if (op_kind == .ptr or pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4131            try cg.addMemArg(.i32_load16_u, .{
4132                .offset = operand.offset() + @as(u32, @intCast(errUnionErrorOffset(pl_ty, zcu))),
4133                .alignment = @intCast(Type.anyerror.abiAlignment(zcu).toByteUnits().?),
4134            });
4135        }
4136
4137        // Compare the error value with '0'
4138        try cg.addImm32(0);
4139        try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
4140        break :result .stack;
4141    };
4142    return cg.finishAir(inst, result, &.{un_op});
4143}
4144
4145/// E!T -> T op_is_ptr == false
4146/// *(E!T) -> *T op_is_prt == true
4147fn airUnwrapErrUnionPayload(cg: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void {
4148    const zcu = cg.pt.zcu;
4149    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4150
4151    const operand = try cg.resolveInst(ty_op.operand);
4152    const op_ty = cg.typeOf(ty_op.operand);
4153    const eu_ty = if (op_is_ptr) op_ty.childType(zcu) else op_ty;
4154    const payload_ty = eu_ty.errorUnionPayload(zcu);
4155
4156    const result: WValue = result: {
4157        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4158            if (op_is_ptr) {
4159                break :result cg.reuseOperand(ty_op.operand, operand);
4160            } else {
4161                break :result .none;
4162            }
4163        }
4164
4165        const pl_offset: u32 = @intCast(errUnionPayloadOffset(payload_ty, zcu));
4166        if (op_is_ptr or isByRef(payload_ty, zcu, cg.target)) {
4167            break :result try cg.buildPointerOffset(operand, pl_offset, .new);
4168        } else {
4169            assert(isByRef(eu_ty, zcu, cg.target));
4170            break :result try cg.load(operand, payload_ty, pl_offset);
4171        }
4172    };
4173    return cg.finishAir(inst, result, &.{ty_op.operand});
4174}
4175
4176/// E!T -> E op_is_ptr == false
4177/// *(E!T) -> E op_is_prt == true
4178/// NOTE: op_is_ptr will not change return type
4179fn airUnwrapErrUnionError(cg: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void {
4180    const zcu = cg.pt.zcu;
4181    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4182
4183    const operand = try cg.resolveInst(ty_op.operand);
4184    const op_ty = cg.typeOf(ty_op.operand);
4185    const eu_ty = if (op_is_ptr) op_ty.childType(zcu) else op_ty;
4186    const payload_ty = eu_ty.errorUnionPayload(zcu);
4187
4188    const result: WValue = result: {
4189        if (eu_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) {
4190            break :result .{ .imm32 = 0 };
4191        }
4192
4193        const err_offset: u32 = @intCast(errUnionErrorOffset(payload_ty, zcu));
4194        if (op_is_ptr or isByRef(eu_ty, zcu, cg.target)) {
4195            break :result try cg.load(operand, Type.anyerror, err_offset);
4196        } else {
4197            assert(!payload_ty.hasRuntimeBitsIgnoreComptime(zcu));
4198            break :result cg.reuseOperand(ty_op.operand, operand);
4199        }
4200    };
4201    return cg.finishAir(inst, result, &.{ty_op.operand});
4202}
4203
4204fn airWrapErrUnionPayload(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4205    const zcu = cg.pt.zcu;
4206    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4207
4208    const operand = try cg.resolveInst(ty_op.operand);
4209    const err_ty = cg.typeOfIndex(inst);
4210
4211    const pl_ty = cg.typeOf(ty_op.operand);
4212    const result = result: {
4213        if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4214            break :result cg.reuseOperand(ty_op.operand, operand);
4215        }
4216
4217        const err_union = try cg.allocStack(err_ty);
4218        const payload_ptr = try cg.buildPointerOffset(err_union, @as(u32, @intCast(errUnionPayloadOffset(pl_ty, zcu))), .new);
4219        try cg.store(payload_ptr, operand, pl_ty, 0);
4220
4221        // ensure we also write '0' to the error part, so any present stack value gets overwritten by it.
4222        try cg.emitWValue(err_union);
4223        try cg.addImm32(0);
4224        const err_val_offset: u32 = @intCast(errUnionErrorOffset(pl_ty, zcu));
4225        try cg.addMemArg(.i32_store16, .{
4226            .offset = err_union.offset() + err_val_offset,
4227            .alignment = 2,
4228        });
4229        break :result err_union;
4230    };
4231    return cg.finishAir(inst, result, &.{ty_op.operand});
4232}
4233
4234fn airWrapErrUnionErr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4235    const zcu = cg.pt.zcu;
4236    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4237
4238    const operand = try cg.resolveInst(ty_op.operand);
4239    const err_ty = ty_op.ty.toType();
4240    const pl_ty = err_ty.errorUnionPayload(zcu);
4241
4242    const result = result: {
4243        if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4244            break :result cg.reuseOperand(ty_op.operand, operand);
4245        }
4246
4247        const err_union = try cg.allocStack(err_ty);
4248        // store error value
4249        try cg.store(err_union, operand, Type.anyerror, @intCast(errUnionErrorOffset(pl_ty, zcu)));
4250
4251        // write 'undefined' to the payload
4252        const payload_ptr = try cg.buildPointerOffset(err_union, @as(u32, @intCast(errUnionPayloadOffset(pl_ty, zcu))), .new);
4253        const len = @as(u32, @intCast(err_ty.errorUnionPayload(zcu).abiSize(zcu)));
4254        try cg.memset(Type.u8, payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaa });
4255
4256        break :result err_union;
4257    };
4258    return cg.finishAir(inst, result, &.{ty_op.operand});
4259}
4260
4261fn airIntcast(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4262    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4263
4264    const ty = ty_op.ty.toType();
4265    const operand = try cg.resolveInst(ty_op.operand);
4266    const operand_ty = cg.typeOf(ty_op.operand);
4267    const zcu = cg.pt.zcu;
4268    if (ty.zigTypeTag(zcu) == .vector or operand_ty.zigTypeTag(zcu) == .vector) {
4269        return cg.fail("todo Wasm intcast for vectors", .{});
4270    }
4271    if (ty.abiSize(zcu) > 16 or operand_ty.abiSize(zcu) > 16) {
4272        return cg.fail("todo Wasm intcast for bitsize > 128", .{});
4273    }
4274
4275    const op_bits = toWasmBits(@intCast(operand_ty.bitSize(zcu))).?;
4276    const wanted_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?;
4277    const result = if (op_bits == wanted_bits)
4278        cg.reuseOperand(ty_op.operand, operand)
4279    else
4280        try cg.intcast(operand, operand_ty, ty);
4281
4282    return cg.finishAir(inst, result, &.{ty_op.operand});
4283}
4284
4285/// Upcasts or downcasts an integer based on the given and wanted types,
4286/// and stores the result in a new operand.
4287/// Asserts type's bitsize <= 128
4288/// NOTE: May leave the result on the top of the stack.
4289fn intcast(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue {
4290    const zcu = cg.pt.zcu;
4291    const given_bitsize = @as(u16, @intCast(given.bitSize(zcu)));
4292    const wanted_bitsize = @as(u16, @intCast(wanted.bitSize(zcu)));
4293    assert(given_bitsize <= 128);
4294    assert(wanted_bitsize <= 128);
4295
4296    const op_bits = toWasmBits(given_bitsize).?;
4297    const wanted_bits = toWasmBits(wanted_bitsize).?;
4298    if (op_bits == wanted_bits) {
4299        return operand;
4300    }
4301
4302    if (op_bits == 64 and wanted_bits == 32) {
4303        try cg.emitWValue(operand);
4304        try cg.addTag(.i32_wrap_i64);
4305        return .stack;
4306    } else if (op_bits == 32 and wanted_bits == 64) {
4307        try cg.emitWValue(operand);
4308        try cg.addTag(if (wanted.isSignedInt(zcu)) .i64_extend_i32_s else .i64_extend_i32_u);
4309        return .stack;
4310    } else if (wanted_bits == 128) {
4311        // for 128bit integers we store the integer in the virtual stack, rather than a local
4312        const stack_ptr = try cg.allocStack(wanted);
4313        try cg.emitWValue(stack_ptr);
4314
4315        // for 32 bit integers, we first coerce the value into a 64 bit integer before storing it
4316        // meaning less store operations are required.
4317        const lhs = if (op_bits == 32) blk: {
4318            const sign_ty = if (wanted.isSignedInt(zcu)) Type.i64 else Type.u64;
4319            break :blk try (try cg.intcast(operand, given, sign_ty)).toLocal(cg, sign_ty);
4320        } else operand;
4321
4322        // store lsb first
4323        try cg.store(.stack, lhs, Type.u64, 0 + stack_ptr.offset());
4324
4325        // For signed integers we shift lsb by 63 (64bit integer - 1 sign bit) and store remaining value
4326        if (wanted.isSignedInt(zcu)) {
4327            try cg.emitWValue(stack_ptr);
4328            const shr = try cg.binOp(lhs, .{ .imm64 = 63 }, Type.i64, .shr);
4329            try cg.store(.stack, shr, Type.u64, 8 + stack_ptr.offset());
4330        } else {
4331            // Ensure memory of msb is zero'd
4332            try cg.store(stack_ptr, .{ .imm64 = 0 }, Type.u64, 8);
4333        }
4334        return stack_ptr;
4335    } else return cg.load(operand, wanted, 0);
4336}
4337
4338fn airIsNull(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void {
4339    const zcu = cg.pt.zcu;
4340    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
4341    const operand = try cg.resolveInst(un_op);
4342
4343    const op_ty = cg.typeOf(un_op);
4344    const optional_ty = if (op_kind == .ptr) op_ty.childType(zcu) else op_ty;
4345    const result = try cg.isNull(operand, optional_ty, opcode);
4346    return cg.finishAir(inst, result, &.{un_op});
4347}
4348
4349/// For a given type and operand, checks if it's considered `null`.
4350/// NOTE: Leaves the result on the stack
4351fn isNull(cg: *CodeGen, operand: WValue, optional_ty: Type, opcode: std.wasm.Opcode) InnerError!WValue {
4352    const pt = cg.pt;
4353    const zcu = pt.zcu;
4354    try cg.emitWValue(operand);
4355    const payload_ty = optional_ty.optionalChild(zcu);
4356    if (!optional_ty.optionalReprIsPayload(zcu)) {
4357        // When payload is zero-bits, we can treat operand as a value, rather than
4358        // a pointer to the stack value
4359        if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4360            const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse {
4361                return cg.fail("Optional type {f} too big to fit into stack frame", .{optional_ty.fmt(pt)});
4362            };
4363            try cg.addMemArg(.i32_load8_u, .{ .offset = operand.offset() + offset, .alignment = 1 });
4364        }
4365    } else if (payload_ty.isSlice(zcu)) {
4366        switch (cg.ptr_size) {
4367            .wasm32 => try cg.addMemArg(.i32_load, .{ .offset = operand.offset(), .alignment = 4 }),
4368            .wasm64 => try cg.addMemArg(.i64_load, .{ .offset = operand.offset(), .alignment = 8 }),
4369        }
4370    }
4371
4372    // Compare the null value with '0'
4373    try cg.addImm32(0);
4374    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
4375
4376    return .stack;
4377}
4378
4379fn airOptionalPayload(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4380    const zcu = cg.pt.zcu;
4381    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4382    const opt_ty = cg.typeOf(ty_op.operand);
4383    const payload_ty = cg.typeOfIndex(inst);
4384    if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4385        return cg.finishAir(inst, .none, &.{ty_op.operand});
4386    }
4387
4388    const result = result: {
4389        const operand = try cg.resolveInst(ty_op.operand);
4390        if (opt_ty.optionalReprIsPayload(zcu)) break :result cg.reuseOperand(ty_op.operand, operand);
4391
4392        if (isByRef(payload_ty, zcu, cg.target)) {
4393            break :result try cg.buildPointerOffset(operand, 0, .new);
4394        }
4395
4396        break :result try cg.load(operand, payload_ty, 0);
4397    };
4398    return cg.finishAir(inst, result, &.{ty_op.operand});
4399}
4400
4401fn airOptionalPayloadPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4402    const zcu = cg.pt.zcu;
4403    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4404    const operand = try cg.resolveInst(ty_op.operand);
4405    const opt_ty = cg.typeOf(ty_op.operand).childType(zcu);
4406
4407    const result = result: {
4408        const payload_ty = opt_ty.optionalChild(zcu);
4409        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu) or opt_ty.optionalReprIsPayload(zcu)) {
4410            break :result cg.reuseOperand(ty_op.operand, operand);
4411        }
4412
4413        break :result try cg.buildPointerOffset(operand, 0, .new);
4414    };
4415    return cg.finishAir(inst, result, &.{ty_op.operand});
4416}
4417
4418fn airOptionalPayloadPtrSet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4419    const pt = cg.pt;
4420    const zcu = pt.zcu;
4421    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4422    const operand = try cg.resolveInst(ty_op.operand);
4423    const opt_ty = cg.typeOf(ty_op.operand).childType(zcu);
4424    const payload_ty = opt_ty.optionalChild(zcu);
4425
4426    if (opt_ty.optionalReprIsPayload(zcu)) {
4427        return cg.finishAir(inst, operand, &.{ty_op.operand});
4428    }
4429
4430    const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse {
4431        return cg.fail("Optional type {f} too big to fit into stack frame", .{opt_ty.fmt(pt)});
4432    };
4433
4434    try cg.emitWValue(operand);
4435    try cg.addImm32(1);
4436    try cg.addMemArg(.i32_store8, .{ .offset = operand.offset() + offset, .alignment = 1 });
4437
4438    const result = try cg.buildPointerOffset(operand, 0, .new);
4439    return cg.finishAir(inst, result, &.{ty_op.operand});
4440}
4441
4442fn airWrapOptional(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4443    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4444    const payload_ty = cg.typeOf(ty_op.operand);
4445    const pt = cg.pt;
4446    const zcu = pt.zcu;
4447
4448    const result = result: {
4449        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4450            const non_null_bit = try cg.allocStack(Type.u1);
4451            try cg.emitWValue(non_null_bit);
4452            try cg.addImm32(1);
4453            try cg.addMemArg(.i32_store8, .{ .offset = non_null_bit.offset(), .alignment = 1 });
4454            break :result non_null_bit;
4455        }
4456
4457        const operand = try cg.resolveInst(ty_op.operand);
4458        const op_ty = cg.typeOfIndex(inst);
4459        if (op_ty.optionalReprIsPayload(zcu)) {
4460            break :result cg.reuseOperand(ty_op.operand, operand);
4461        }
4462        const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse {
4463            return cg.fail("Optional type {f} too big to fit into stack frame", .{op_ty.fmt(pt)});
4464        };
4465
4466        // Create optional type, set the non-null bit, and store the operand inside the optional type
4467        const result_ptr = try cg.allocStack(op_ty);
4468        try cg.emitWValue(result_ptr);
4469        try cg.addImm32(1);
4470        try cg.addMemArg(.i32_store8, .{ .offset = result_ptr.offset() + offset, .alignment = 1 });
4471
4472        const payload_ptr = try cg.buildPointerOffset(result_ptr, 0, .new);
4473        try cg.store(payload_ptr, operand, payload_ty, 0);
4474        break :result result_ptr;
4475    };
4476
4477    return cg.finishAir(inst, result, &.{ty_op.operand});
4478}
4479
4480fn airSlice(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4481    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
4482    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
4483
4484    const lhs = try cg.resolveInst(bin_op.lhs);
4485    const rhs = try cg.resolveInst(bin_op.rhs);
4486    const slice_ty = cg.typeOfIndex(inst);
4487
4488    const slice = try cg.allocStack(slice_ty);
4489    try cg.store(slice, lhs, Type.usize, 0);
4490    try cg.store(slice, rhs, Type.usize, cg.ptrSize());
4491
4492    return cg.finishAir(inst, slice, &.{ bin_op.lhs, bin_op.rhs });
4493}
4494
4495fn airSliceLen(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4496    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4497
4498    const operand = try cg.resolveInst(ty_op.operand);
4499    return cg.finishAir(inst, try cg.sliceLen(operand), &.{ty_op.operand});
4500}
4501
4502fn airSliceElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4503    const zcu = cg.pt.zcu;
4504    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
4505
4506    const slice_ty = cg.typeOf(bin_op.lhs);
4507    const slice = try cg.resolveInst(bin_op.lhs);
4508    const index = try cg.resolveInst(bin_op.rhs);
4509    const elem_ty = slice_ty.childType(zcu);
4510    const elem_size = elem_ty.abiSize(zcu);
4511
4512    // load pointer onto stack
4513    _ = try cg.load(slice, Type.usize, 0);
4514
4515    // calculate index into slice
4516    try cg.emitWValue(index);
4517    try cg.addImm32(@intCast(elem_size));
4518    try cg.addTag(.i32_mul);
4519    try cg.addTag(.i32_add);
4520
4521    const elem_result = if (isByRef(elem_ty, zcu, cg.target))
4522        .stack
4523    else
4524        try cg.load(.stack, elem_ty, 0);
4525
4526    return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs });
4527}
4528
4529fn airSliceElemPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4530    const zcu = cg.pt.zcu;
4531    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
4532    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
4533
4534    const elem_ty = ty_pl.ty.toType().childType(zcu);
4535    const elem_size = elem_ty.abiSize(zcu);
4536
4537    const slice = try cg.resolveInst(bin_op.lhs);
4538    const index = try cg.resolveInst(bin_op.rhs);
4539
4540    _ = try cg.load(slice, Type.usize, 0);
4541
4542    // calculate index into slice
4543    try cg.emitWValue(index);
4544    try cg.addImm32(@intCast(elem_size));
4545    try cg.addTag(.i32_mul);
4546    try cg.addTag(.i32_add);
4547
4548    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
4549}
4550
4551fn airSlicePtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4552    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4553    const operand = try cg.resolveInst(ty_op.operand);
4554    return cg.finishAir(inst, try cg.slicePtr(operand), &.{ty_op.operand});
4555}
4556
4557fn slicePtr(cg: *CodeGen, operand: WValue) InnerError!WValue {
4558    const ptr = try cg.load(operand, Type.usize, 0);
4559    return ptr.toLocal(cg, Type.usize);
4560}
4561
4562fn sliceLen(cg: *CodeGen, operand: WValue) InnerError!WValue {
4563    const len = try cg.load(operand, Type.usize, cg.ptrSize());
4564    return len.toLocal(cg, Type.usize);
4565}
4566
4567fn airTrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4568    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4569
4570    const operand = try cg.resolveInst(ty_op.operand);
4571    const wanted_ty: Type = ty_op.ty.toType();
4572    const op_ty = cg.typeOf(ty_op.operand);
4573    const zcu = cg.pt.zcu;
4574
4575    if (wanted_ty.zigTypeTag(zcu) == .vector or op_ty.zigTypeTag(zcu) == .vector) {
4576        return cg.fail("TODO: trunc for vectors", .{});
4577    }
4578
4579    const result = if (op_ty.bitSize(zcu) == wanted_ty.bitSize(zcu))
4580        cg.reuseOperand(ty_op.operand, operand)
4581    else
4582        try cg.trunc(operand, wanted_ty, op_ty);
4583
4584    return cg.finishAir(inst, result, &.{ty_op.operand});
4585}
4586
4587/// Truncates a given operand to a given type, discarding any overflown bits.
4588/// NOTE: Resulting value is left on the stack.
4589fn trunc(cg: *CodeGen, operand: WValue, wanted_ty: Type, given_ty: Type) InnerError!WValue {
4590    const zcu = cg.pt.zcu;
4591    const given_bits = @as(u16, @intCast(given_ty.bitSize(zcu)));
4592    if (toWasmBits(given_bits) == null) {
4593        return cg.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{given_bits});
4594    }
4595
4596    var result = try cg.intcast(operand, given_ty, wanted_ty);
4597    const wanted_bits = @as(u16, @intCast(wanted_ty.bitSize(zcu)));
4598    const wasm_bits = toWasmBits(wanted_bits).?;
4599    if (wasm_bits != wanted_bits) {
4600        result = try cg.wrapOperand(result, wanted_ty);
4601    }
4602    return result;
4603}
4604
4605fn airArrayToSlice(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4606    const zcu = cg.pt.zcu;
4607    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4608
4609    const operand = try cg.resolveInst(ty_op.operand);
4610    const array_ty = cg.typeOf(ty_op.operand).childType(zcu);
4611    const slice_ty = ty_op.ty.toType();
4612
4613    // create a slice on the stack
4614    const slice_local = try cg.allocStack(slice_ty);
4615
4616    // store the array ptr in the slice
4617    if (array_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
4618        try cg.store(slice_local, operand, Type.usize, 0);
4619    }
4620
4621    // store the length of the array in the slice
4622    const array_len: u32 = @intCast(array_ty.arrayLen(zcu));
4623    try cg.store(slice_local, .{ .imm32 = array_len }, Type.usize, cg.ptrSize());
4624
4625    return cg.finishAir(inst, slice_local, &.{ty_op.operand});
4626}
4627
4628fn airPtrElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4629    const zcu = cg.pt.zcu;
4630    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
4631
4632    const ptr_ty = cg.typeOf(bin_op.lhs);
4633    const ptr = try cg.resolveInst(bin_op.lhs);
4634    const index = try cg.resolveInst(bin_op.rhs);
4635    const elem_ty = ptr_ty.childType(zcu);
4636    const elem_size = elem_ty.abiSize(zcu);
4637
4638    // load pointer onto the stack
4639    if (ptr_ty.isSlice(zcu)) {
4640        _ = try cg.load(ptr, Type.usize, 0);
4641    } else {
4642        try cg.lowerToStack(ptr);
4643    }
4644
4645    // calculate index into slice
4646    try cg.emitWValue(index);
4647    try cg.addImm32(@intCast(elem_size));
4648    try cg.addTag(.i32_mul);
4649    try cg.addTag(.i32_add);
4650
4651    const elem_result = if (isByRef(elem_ty, zcu, cg.target))
4652        .stack
4653    else
4654        try cg.load(.stack, elem_ty, 0);
4655
4656    return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs });
4657}
4658
4659fn airPtrElemPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4660    const zcu = cg.pt.zcu;
4661    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
4662    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
4663
4664    const ptr_ty = cg.typeOf(bin_op.lhs);
4665    const elem_ty = ty_pl.ty.toType().childType(zcu);
4666    const elem_size = elem_ty.abiSize(zcu);
4667
4668    const ptr = try cg.resolveInst(bin_op.lhs);
4669    const index = try cg.resolveInst(bin_op.rhs);
4670
4671    // load pointer onto the stack
4672    if (ptr_ty.isSlice(zcu)) {
4673        _ = try cg.load(ptr, Type.usize, 0);
4674    } else {
4675        try cg.lowerToStack(ptr);
4676    }
4677
4678    // calculate index into ptr
4679    try cg.emitWValue(index);
4680    try cg.addImm32(@intCast(elem_size));
4681    try cg.addTag(.i32_mul);
4682    try cg.addTag(.i32_add);
4683
4684    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
4685}
4686
4687fn airPtrBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
4688    const zcu = cg.pt.zcu;
4689    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
4690    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
4691
4692    const ptr = try cg.resolveInst(bin_op.lhs);
4693    const offset = try cg.resolveInst(bin_op.rhs);
4694    const ptr_ty = cg.typeOf(bin_op.lhs);
4695    const pointee_ty = switch (ptr_ty.ptrSize(zcu)) {
4696        .one => ptr_ty.childType(zcu).childType(zcu), // ptr to array, so get array element type
4697        else => ptr_ty.childType(zcu),
4698    };
4699
4700    const valtype = typeToValtype(Type.usize, zcu, cg.target);
4701    const mul_opcode = buildOpcode(.{ .valtype1 = valtype, .op = .mul });
4702    const bin_opcode = buildOpcode(.{ .valtype1 = valtype, .op = op });
4703
4704    try cg.lowerToStack(ptr);
4705    try cg.emitWValue(offset);
4706    try cg.addImm32(@intCast(pointee_ty.abiSize(zcu)));
4707    try cg.addTag(Mir.Inst.Tag.fromOpcode(mul_opcode));
4708    try cg.addTag(Mir.Inst.Tag.fromOpcode(bin_opcode));
4709
4710    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
4711}
4712
4713fn airMemset(cg: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void {
4714    const zcu = cg.pt.zcu;
4715    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
4716
4717    const ptr = try cg.resolveInst(bin_op.lhs);
4718    const ptr_ty = cg.typeOf(bin_op.lhs);
4719    const value = try cg.resolveInst(bin_op.rhs);
4720    const len = switch (ptr_ty.ptrSize(zcu)) {
4721        .slice => try cg.sliceLen(ptr),
4722        .one => @as(WValue, .{ .imm32 = @as(u32, @intCast(ptr_ty.childType(zcu).arrayLen(zcu))) }),
4723        .c, .many => unreachable,
4724    };
4725
4726    const elem_ty = if (ptr_ty.ptrSize(zcu) == .one)
4727        ptr_ty.childType(zcu).childType(zcu)
4728    else
4729        ptr_ty.childType(zcu);
4730
4731    if (!safety and bin_op.rhs == .undef) {
4732        return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
4733    }
4734
4735    const dst_ptr = try cg.sliceOrArrayPtr(ptr, ptr_ty);
4736    try cg.memset(elem_ty, dst_ptr, len, value);
4737
4738    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
4739}
4740
4741/// Sets a region of memory at `ptr` to the value of `value`
4742/// When the user has enabled the bulk_memory feature, we lower
4743/// this to wasm's memset instruction. When the feature is not present,
4744/// we implement it manually.
4745fn memset(cg: *CodeGen, elem_ty: Type, ptr: WValue, len: WValue, value: WValue) InnerError!void {
4746    const zcu = cg.pt.zcu;
4747    const abi_size = @as(u32, @intCast(elem_ty.abiSize(zcu)));
4748
4749    // When bulk_memory is enabled, we lower it to wasm's memset instruction.
4750    // If not, we lower it ourselves.
4751    if (cg.target.cpu.has(.wasm, .bulk_memory) and abi_size == 1) {
4752        const len0_ok = cg.target.cpu.has(.wasm, .nontrapping_bulk_memory_len0);
4753
4754        if (!len0_ok) {
4755            try cg.startBlock(.block, .empty);
4756
4757            // Even if `len` is zero, the spec requires an implementation to trap if `ptr + len` is
4758            // out of memory bounds. This can easily happen in Zig in a case such as:
4759            //
4760            // const ptr: [*]u8 = undefined;
4761            // var len: usize = runtime_zero();
4762            // @memset(ptr[0..len], 42);
4763            //
4764            // So explicitly avoid using `memory.fill` in the `len == 0` case. Lovely design.
4765            try cg.emitWValue(len);
4766            try cg.addTag(.i32_eqz);
4767            try cg.addLabel(.br_if, 0);
4768        }
4769
4770        try cg.lowerToStack(ptr);
4771        try cg.emitWValue(value);
4772        try cg.emitWValue(len);
4773        try cg.addExtended(.memory_fill);
4774
4775        if (!len0_ok) {
4776            try cg.endBlock();
4777        }
4778
4779        return;
4780    }
4781
4782    const final_len: WValue = switch (len) {
4783        .imm32 => |val| .{ .imm32 = val * abi_size },
4784        .imm64 => |val| .{ .imm64 = val * abi_size },
4785        else => if (abi_size != 1) blk: {
4786            const new_len = try cg.ensureAllocLocal(Type.usize);
4787            try cg.emitWValue(len);
4788            switch (cg.ptr_size) {
4789                .wasm32 => {
4790                    try cg.emitWValue(.{ .imm32 = abi_size });
4791                    try cg.addTag(.i32_mul);
4792                },
4793                .wasm64 => {
4794                    try cg.emitWValue(.{ .imm64 = abi_size });
4795                    try cg.addTag(.i64_mul);
4796                },
4797            }
4798            try cg.addLocal(.local_set, new_len.local.value);
4799            break :blk new_len;
4800        } else len,
4801    };
4802
4803    var end_ptr = try cg.allocLocal(Type.usize);
4804    defer end_ptr.free(cg);
4805    var new_ptr = try cg.buildPointerOffset(ptr, 0, .new);
4806    defer new_ptr.free(cg);
4807
4808    // get the loop conditional: if current pointer address equals final pointer's address
4809    try cg.lowerToStack(ptr);
4810    try cg.emitWValue(final_len);
4811    switch (cg.ptr_size) {
4812        .wasm32 => try cg.addTag(.i32_add),
4813        .wasm64 => try cg.addTag(.i64_add),
4814    }
4815    try cg.addLocal(.local_set, end_ptr.local.value);
4816
4817    // outer block to jump to when loop is done
4818    try cg.startBlock(.block, .empty);
4819    try cg.startBlock(.loop, .empty);
4820
4821    // check for condition for loop end
4822    try cg.emitWValue(new_ptr);
4823    try cg.emitWValue(end_ptr);
4824    switch (cg.ptr_size) {
4825        .wasm32 => try cg.addTag(.i32_eq),
4826        .wasm64 => try cg.addTag(.i64_eq),
4827    }
4828    try cg.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
4829
4830    // store the value at the current position of the pointer
4831    try cg.store(new_ptr, value, elem_ty, 0);
4832
4833    // move the pointer to the next element
4834    try cg.emitWValue(new_ptr);
4835    switch (cg.ptr_size) {
4836        .wasm32 => {
4837            try cg.emitWValue(.{ .imm32 = abi_size });
4838            try cg.addTag(.i32_add);
4839        },
4840        .wasm64 => {
4841            try cg.emitWValue(.{ .imm64 = abi_size });
4842            try cg.addTag(.i64_add);
4843        },
4844    }
4845    try cg.addLocal(.local_set, new_ptr.local.value);
4846
4847    // end of loop
4848    try cg.addLabel(.br, 0); // jump to start of loop
4849    try cg.endBlock();
4850    try cg.endBlock();
4851}
4852
4853fn airArrayElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4854    const zcu = cg.pt.zcu;
4855    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
4856
4857    const array_ty = cg.typeOf(bin_op.lhs);
4858    const array = try cg.resolveInst(bin_op.lhs);
4859    const index = try cg.resolveInst(bin_op.rhs);
4860    const elem_ty = array_ty.childType(zcu);
4861    const elem_size = elem_ty.abiSize(zcu);
4862
4863    if (isByRef(array_ty, zcu, cg.target)) {
4864        try cg.lowerToStack(array);
4865        try cg.emitWValue(index);
4866        try cg.addImm32(@intCast(elem_size));
4867        try cg.addTag(.i32_mul);
4868        try cg.addTag(.i32_add);
4869    } else {
4870        assert(array_ty.zigTypeTag(zcu) == .vector);
4871
4872        switch (index) {
4873            inline .imm32, .imm64 => |lane| {
4874                const opcode: std.wasm.SimdOpcode = switch (elem_ty.bitSize(zcu)) {
4875                    8 => if (elem_ty.isSignedInt(zcu)) .i8x16_extract_lane_s else .i8x16_extract_lane_u,
4876                    16 => if (elem_ty.isSignedInt(zcu)) .i16x8_extract_lane_s else .i16x8_extract_lane_u,
4877                    32 => if (elem_ty.isInt(zcu)) .i32x4_extract_lane else .f32x4_extract_lane,
4878                    64 => if (elem_ty.isInt(zcu)) .i64x2_extract_lane else .f64x2_extract_lane,
4879                    else => unreachable,
4880                };
4881
4882                var operands = [_]u32{ @intFromEnum(opcode), @as(u8, @intCast(lane)) };
4883
4884                try cg.emitWValue(array);
4885
4886                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
4887                try cg.mir_extra.appendSlice(cg.gpa, &operands);
4888                try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
4889
4890                return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
4891            },
4892            else => {
4893                const stack_vec = try cg.allocStack(array_ty);
4894                try cg.store(stack_vec, array, array_ty, 0);
4895
4896                // Is a non-unrolled vector (v128)
4897                try cg.lowerToStack(stack_vec);
4898                try cg.emitWValue(index);
4899                try cg.addImm32(@intCast(elem_size));
4900                try cg.addTag(.i32_mul);
4901                try cg.addTag(.i32_add);
4902            },
4903        }
4904    }
4905
4906    const elem_result = if (isByRef(elem_ty, zcu, cg.target))
4907        .stack
4908    else
4909        try cg.load(.stack, elem_ty, 0);
4910
4911    return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs });
4912}
4913
4914fn airIntFromFloat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4915    const zcu = cg.pt.zcu;
4916    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4917
4918    const operand = try cg.resolveInst(ty_op.operand);
4919    const op_ty = cg.typeOf(ty_op.operand);
4920    const op_bits = op_ty.floatBits(cg.target);
4921
4922    const dest_ty = cg.typeOfIndex(inst);
4923    const dest_info = dest_ty.intInfo(zcu);
4924
4925    if (dest_info.bits > 128) {
4926        return cg.fail("TODO: intFromFloat for integers/floats with bitsize {}", .{dest_info.bits});
4927    }
4928
4929    if ((op_bits != 32 and op_bits != 64) or dest_info.bits > 64) {
4930        const dest_bitsize = if (dest_info.bits <= 32) 32 else std.math.ceilPowerOfTwoAssert(u16, dest_info.bits);
4931
4932        const intrinsic = switch (dest_info.signedness) {
4933            inline .signed, .unsigned => |ct_s| switch (op_bits) {
4934                inline 16, 32, 64, 80, 128 => |ct_op_bits| switch (dest_bitsize) {
4935                    inline 32, 64, 128 => |ct_dest_bits| @field(
4936                        Mir.Intrinsic,
4937                        "__fix" ++ switch (ct_s) {
4938                            .signed => "",
4939                            .unsigned => "uns",
4940                        } ++
4941                            compilerRtFloatAbbrev(ct_op_bits) ++ "f" ++
4942                            compilerRtIntAbbrev(ct_dest_bits) ++ "i",
4943                    ),
4944                    else => unreachable,
4945                },
4946                else => unreachable,
4947            },
4948        };
4949        const result = try cg.callIntrinsic(intrinsic, &.{op_ty.ip_index}, dest_ty, &.{operand});
4950        return cg.finishAir(inst, result, &.{ty_op.operand});
4951    }
4952
4953    try cg.emitWValue(operand);
4954    const op = buildOpcode(.{
4955        .op = .trunc,
4956        .valtype1 = typeToValtype(dest_ty, zcu, cg.target),
4957        .valtype2 = typeToValtype(op_ty, zcu, cg.target),
4958        .signedness = dest_info.signedness,
4959    });
4960    try cg.addTag(Mir.Inst.Tag.fromOpcode(op));
4961    const result = try cg.wrapOperand(.stack, dest_ty);
4962    return cg.finishAir(inst, result, &.{ty_op.operand});
4963}
4964
4965fn airFloatFromInt(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
4966    const zcu = cg.pt.zcu;
4967    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
4968
4969    const operand = try cg.resolveInst(ty_op.operand);
4970    const op_ty = cg.typeOf(ty_op.operand);
4971    const op_info = op_ty.intInfo(zcu);
4972
4973    const dest_ty = cg.typeOfIndex(inst);
4974    const dest_bits = dest_ty.floatBits(cg.target);
4975
4976    if (op_info.bits > 128) {
4977        return cg.fail("TODO: floatFromInt for integers/floats with bitsize {d} bits", .{op_info.bits});
4978    }
4979
4980    if (op_info.bits > 64 or (dest_bits > 64 or dest_bits < 32)) {
4981        const op_bitsize = if (op_info.bits <= 32) 32 else std.math.ceilPowerOfTwoAssert(u16, op_info.bits);
4982
4983        const intrinsic = switch (op_info.signedness) {
4984            inline .signed, .unsigned => |ct_s| switch (op_bitsize) {
4985                inline 32, 64, 128 => |ct_int_bits| switch (dest_bits) {
4986                    inline 16, 32, 64, 80, 128 => |ct_float_bits| @field(
4987                        Mir.Intrinsic,
4988                        "__float" ++ switch (ct_s) {
4989                            .signed => "",
4990                            .unsigned => "un",
4991                        } ++
4992                            compilerRtIntAbbrev(ct_int_bits) ++ "i" ++
4993                            compilerRtFloatAbbrev(ct_float_bits) ++ "f",
4994                    ),
4995                    else => unreachable,
4996                },
4997                else => unreachable,
4998            },
4999        };
5000
5001        const result = try cg.callIntrinsic(intrinsic, &.{op_ty.ip_index}, dest_ty, &.{operand});
5002        return cg.finishAir(inst, result, &.{ty_op.operand});
5003    }
5004
5005    try cg.emitWValue(operand);
5006    const op = buildOpcode(.{
5007        .op = .convert,
5008        .valtype1 = typeToValtype(dest_ty, zcu, cg.target),
5009        .valtype2 = typeToValtype(op_ty, zcu, cg.target),
5010        .signedness = op_info.signedness,
5011    });
5012    try cg.addTag(Mir.Inst.Tag.fromOpcode(op));
5013
5014    return cg.finishAir(inst, .stack, &.{ty_op.operand});
5015}
5016
5017fn airSplat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5018    const zcu = cg.pt.zcu;
5019    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5020    const operand = try cg.resolveInst(ty_op.operand);
5021    const ty = cg.typeOfIndex(inst);
5022    const elem_ty = ty.childType(zcu);
5023
5024    if (determineSimdStoreStrategy(ty, zcu, cg.target) == .direct) blk: {
5025        switch (operand) {
5026            // when the operand lives in the linear memory section, we can directly
5027            // load and splat the value at once. Meaning we do not first have to load
5028            // the scalar value onto the stack.
5029            .stack_offset, .nav_ref, .uav_ref => {
5030                const opcode = switch (elem_ty.bitSize(zcu)) {
5031                    8 => @intFromEnum(std.wasm.SimdOpcode.v128_load8_splat),
5032                    16 => @intFromEnum(std.wasm.SimdOpcode.v128_load16_splat),
5033                    32 => @intFromEnum(std.wasm.SimdOpcode.v128_load32_splat),
5034                    64 => @intFromEnum(std.wasm.SimdOpcode.v128_load64_splat),
5035                    else => break :blk, // Cannot make use of simd-instructions
5036                };
5037                try cg.emitWValue(operand);
5038                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
5039                // stores as := opcode, offset, alignment (opcode::memarg)
5040                try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
5041                    opcode,
5042                    operand.offset(),
5043                    @intCast(elem_ty.abiAlignment(zcu).toByteUnits().?),
5044                });
5045                try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
5046                return cg.finishAir(inst, .stack, &.{ty_op.operand});
5047            },
5048            .local => {
5049                const opcode = switch (elem_ty.bitSize(zcu)) {
5050                    8 => @intFromEnum(std.wasm.SimdOpcode.i8x16_splat),
5051                    16 => @intFromEnum(std.wasm.SimdOpcode.i16x8_splat),
5052                    32 => if (elem_ty.isInt(zcu)) @intFromEnum(std.wasm.SimdOpcode.i32x4_splat) else @intFromEnum(std.wasm.SimdOpcode.f32x4_splat),
5053                    64 => if (elem_ty.isInt(zcu)) @intFromEnum(std.wasm.SimdOpcode.i64x2_splat) else @intFromEnum(std.wasm.SimdOpcode.f64x2_splat),
5054                    else => break :blk, // Cannot make use of simd-instructions
5055                };
5056                try cg.emitWValue(operand);
5057                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
5058                try cg.mir_extra.append(cg.gpa, opcode);
5059                try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
5060                return cg.finishAir(inst, .stack, &.{ty_op.operand});
5061            },
5062            else => unreachable,
5063        }
5064    }
5065    const elem_size = elem_ty.bitSize(zcu);
5066    const vector_len = @as(usize, @intCast(ty.vectorLen(zcu)));
5067    if ((!std.math.isPowerOfTwo(elem_size) or elem_size % 8 != 0) and vector_len > 1) {
5068        return cg.fail("TODO: WebAssembly `@splat` for arbitrary element bitsize {d}", .{elem_size});
5069    }
5070
5071    const result = try cg.allocStack(ty);
5072    const elem_byte_size = @as(u32, @intCast(elem_ty.abiSize(zcu)));
5073    var index: usize = 0;
5074    var offset: u32 = 0;
5075    while (index < vector_len) : (index += 1) {
5076        try cg.store(result, operand, elem_ty, offset);
5077        offset += elem_byte_size;
5078    }
5079
5080    return cg.finishAir(inst, result, &.{ty_op.operand});
5081}
5082
5083fn airSelect(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5084    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
5085    const operand = try cg.resolveInst(pl_op.operand);
5086
5087    _ = operand;
5088    return cg.fail("TODO: Implement wasm airSelect", .{});
5089}
5090
5091fn airShuffleOne(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5092    const pt = cg.pt;
5093    const zcu = pt.zcu;
5094
5095    const unwrapped = cg.air.unwrapShuffleOne(zcu, inst);
5096    const result_ty = unwrapped.result_ty;
5097    const mask = unwrapped.mask;
5098    const operand = try cg.resolveInst(unwrapped.operand);
5099
5100    const elem_ty = result_ty.childType(zcu);
5101    const elem_size = elem_ty.abiSize(zcu);
5102
5103    // TODO: this function could have an `i8x16_shuffle` fast path like `airShuffleTwo` if we were
5104    // to lower the comptime-known operands to a non-by-ref vector value.
5105
5106    // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
5107    // I tried to fix it, but I couldn't make much sense of how this backend handles memory.
5108    if (!isByRef(result_ty, zcu, cg.target) or
5109        !isByRef(cg.typeOf(unwrapped.operand), zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{});
5110
5111    const dest_alloc = try cg.allocStack(result_ty);
5112    for (mask, 0..) |mask_elem, out_idx| {
5113        try cg.emitWValue(dest_alloc);
5114        const elem_val = switch (mask_elem.unwrap()) {
5115            .elem => |idx| try cg.load(operand, elem_ty, @intCast(elem_size * idx)),
5116            .value => |val| try cg.lowerConstant(.fromInterned(val), elem_ty),
5117        };
5118        try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
5119    }
5120    return cg.finishAir(inst, dest_alloc, &.{unwrapped.operand});
5121}
5122
5123fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5124    const pt = cg.pt;
5125    const zcu = pt.zcu;
5126
5127    const unwrapped = cg.air.unwrapShuffleTwo(zcu, inst);
5128    const result_ty = unwrapped.result_ty;
5129    const mask = unwrapped.mask;
5130    const operand_a = try cg.resolveInst(unwrapped.operand_a);
5131    const operand_b = try cg.resolveInst(unwrapped.operand_b);
5132
5133    const a_ty = cg.typeOf(unwrapped.operand_a);
5134    const b_ty = cg.typeOf(unwrapped.operand_b);
5135    const elem_ty = result_ty.childType(zcu);
5136    const elem_size = elem_ty.abiSize(zcu);
5137
5138    // WASM has `i8x16_shuffle`, which we can apply if the element type bit size is a multiple of 8
5139    // and the input and output vectors have a bit size of 128 (and are hence not by-ref). Otherwise,
5140    // we fall back to a naive loop lowering.
5141    if (!isByRef(a_ty, zcu, cg.target) and
5142        !isByRef(b_ty, zcu, cg.target) and
5143        !isByRef(result_ty, zcu, cg.target) and
5144        elem_ty.bitSize(zcu) % 8 == 0)
5145    {
5146        var lane_map: [16]u8 align(4) = undefined;
5147        const lanes_per_elem: usize = @intCast(elem_ty.bitSize(zcu) / 8);
5148        for (mask, 0..) |mask_elem, out_idx| {
5149            const out_first_lane = out_idx * lanes_per_elem;
5150            const in_first_lane = switch (mask_elem.unwrap()) {
5151                .a_elem => |i| i * lanes_per_elem,
5152                .b_elem => |i| i * lanes_per_elem + 16,
5153                .undef => 0, // doesn't matter
5154            };
5155            for (lane_map[out_first_lane..][0..lanes_per_elem], in_first_lane..) |*out, in| {
5156                out.* = @intCast(in);
5157            }
5158        }
5159        try cg.emitWValue(operand_a);
5160        try cg.emitWValue(operand_b);
5161        const extra_index: u32 = @intCast(cg.mir_extra.items.len);
5162        try cg.mir_extra.appendSlice(cg.gpa, &.{
5163            @intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle),
5164            @bitCast(lane_map[0..4].*),
5165            @bitCast(lane_map[4..8].*),
5166            @bitCast(lane_map[8..12].*),
5167            @bitCast(lane_map[12..].*),
5168        });
5169        try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
5170        return cg.finishAir(inst, .stack, &.{ unwrapped.operand_a, unwrapped.operand_b });
5171    }
5172
5173    // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
5174    // I tried to fix it, but I couldn't make much sense of how this backend handles memory.
5175    if (!isByRef(result_ty, zcu, cg.target) or
5176        !isByRef(a_ty, zcu, cg.target) or
5177        !isByRef(b_ty, zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{});
5178
5179    const dest_alloc = try cg.allocStack(result_ty);
5180    for (mask, 0..) |mask_elem, out_idx| {
5181        try cg.emitWValue(dest_alloc);
5182        const elem_val = switch (mask_elem.unwrap()) {
5183            .a_elem => |idx| try cg.load(operand_a, elem_ty, @intCast(elem_size * idx)),
5184            .b_elem => |idx| try cg.load(operand_b, elem_ty, @intCast(elem_size * idx)),
5185            .undef => try cg.emitUndefined(elem_ty),
5186        };
5187        try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
5188    }
5189    return cg.finishAir(inst, dest_alloc, &.{ unwrapped.operand_a, unwrapped.operand_b });
5190}
5191
5192fn airReduce(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5193    const reduce = cg.air.instructions.items(.data)[@intFromEnum(inst)].reduce;
5194    const operand = try cg.resolveInst(reduce.operand);
5195
5196    _ = operand;
5197    return cg.fail("TODO: Implement wasm airReduce", .{});
5198}
5199
5200fn airAggregateInit(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5201    const pt = cg.pt;
5202    const zcu = pt.zcu;
5203    const ip = &zcu.intern_pool;
5204    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
5205    const result_ty = cg.typeOfIndex(inst);
5206    const len = @as(usize, @intCast(result_ty.arrayLen(zcu)));
5207    const elements: []const Air.Inst.Ref = @ptrCast(cg.air.extra.items[ty_pl.payload..][0..len]);
5208
5209    const result: WValue = result_value: {
5210        switch (result_ty.zigTypeTag(zcu)) {
5211            .array => {
5212                const result = try cg.allocStack(result_ty);
5213                const elem_ty = result_ty.childType(zcu);
5214                const elem_size = @as(u32, @intCast(elem_ty.abiSize(zcu)));
5215                const sentinel = result_ty.sentinel(zcu);
5216
5217                // When the element type is by reference, we must copy the entire
5218                // value. It is therefore safer to move the offset pointer and store
5219                // each value individually, instead of using store offsets.
5220                if (isByRef(elem_ty, zcu, cg.target)) {
5221                    // copy stack pointer into a temporary local, which is
5222                    // moved for each element to store each value in the right position.
5223                    const offset = try cg.buildPointerOffset(result, 0, .new);
5224                    for (elements, 0..) |elem, elem_index| {
5225                        const elem_val = try cg.resolveInst(elem);
5226                        try cg.store(offset, elem_val, elem_ty, 0);
5227
5228                        if (elem_index < elements.len - 1 or sentinel != null) {
5229                            _ = try cg.buildPointerOffset(offset, elem_size, .modify);
5230                        }
5231                    }
5232                    if (sentinel) |s| {
5233                        const val = try cg.resolveValue(s);
5234                        try cg.store(offset, val, elem_ty, 0);
5235                    }
5236                } else {
5237                    var offset: u32 = 0;
5238                    for (elements) |elem| {
5239                        const elem_val = try cg.resolveInst(elem);
5240                        try cg.store(result, elem_val, elem_ty, offset);
5241                        offset += elem_size;
5242                    }
5243                    if (sentinel) |s| {
5244                        const val = try cg.resolveValue(s);
5245                        try cg.store(result, val, elem_ty, offset);
5246                    }
5247                }
5248                break :result_value result;
5249            },
5250            .@"struct" => switch (result_ty.containerLayout(zcu)) {
5251                .@"packed" => {
5252                    if (isByRef(result_ty, zcu, cg.target)) {
5253                        return cg.fail("TODO: airAggregateInit for packed structs larger than 64 bits", .{});
5254                    }
5255                    const packed_struct = zcu.typeToPackedStruct(result_ty).?;
5256                    const field_types = packed_struct.field_types;
5257                    const backing_type = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip));
5258
5259                    // ensure the result is zero'd
5260                    const result = try cg.allocLocal(backing_type);
5261                    if (backing_type.bitSize(zcu) <= 32)
5262                        try cg.addImm32(0)
5263                    else
5264                        try cg.addImm64(0);
5265                    try cg.addLocal(.local_set, result.local.value);
5266
5267                    var current_bit: u16 = 0;
5268                    for (elements, 0..) |elem, elem_index| {
5269                        const field_ty = Type.fromInterned(field_types.get(ip)[elem_index]);
5270                        if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
5271
5272                        const shift_val: WValue = if (backing_type.bitSize(zcu) <= 32)
5273                            .{ .imm32 = current_bit }
5274                        else
5275                            .{ .imm64 = current_bit };
5276
5277                        const value = try cg.resolveInst(elem);
5278                        const value_bit_size: u16 = @intCast(field_ty.bitSize(zcu));
5279                        const int_ty = try pt.intType(.unsigned, value_bit_size);
5280
5281                        // load our current result on stack so we can perform all transformations
5282                        // using only stack values. Saving the cost of loads and stores.
5283                        try cg.emitWValue(result);
5284                        const bitcasted = try cg.bitcast(int_ty, field_ty, value);
5285                        const extended_val = try cg.intcast(bitcasted, int_ty, backing_type);
5286                        // no need to shift any values when the current offset is 0
5287                        const shifted = if (current_bit != 0) shifted: {
5288                            break :shifted try cg.binOp(extended_val, shift_val, backing_type, .shl);
5289                        } else extended_val;
5290                        // we ignore the result as we keep it on the stack to assign it directly to `result`
5291                        _ = try cg.binOp(.stack, shifted, backing_type, .@"or");
5292                        try cg.addLocal(.local_set, result.local.value);
5293                        current_bit += value_bit_size;
5294                    }
5295                    break :result_value result;
5296                },
5297                else => {
5298                    const result = try cg.allocStack(result_ty);
5299                    const offset = try cg.buildPointerOffset(result, 0, .new); // pointer to offset
5300                    var prev_field_offset: u64 = 0;
5301                    for (elements, 0..) |elem, elem_index| {
5302                        if (try result_ty.structFieldValueComptime(pt, elem_index) != null) continue;
5303
5304                        const elem_ty = result_ty.fieldType(elem_index, zcu);
5305                        const field_offset = result_ty.structFieldOffset(elem_index, zcu);
5306                        _ = try cg.buildPointerOffset(offset, @intCast(field_offset - prev_field_offset), .modify);
5307                        prev_field_offset = field_offset;
5308
5309                        const value = try cg.resolveInst(elem);
5310                        try cg.store(offset, value, elem_ty, 0);
5311                    }
5312
5313                    break :result_value result;
5314                },
5315            },
5316            .vector => return cg.fail("TODO: Wasm backend: implement airAggregateInit for vectors", .{}),
5317            else => unreachable,
5318        }
5319    };
5320
5321    if (elements.len <= Air.Liveness.bpi - 1) {
5322        var buf = [1]Air.Inst.Ref{.none} ** (Air.Liveness.bpi - 1);
5323        @memcpy(buf[0..elements.len], elements);
5324        return cg.finishAir(inst, result, &buf);
5325    }
5326    var bt = try cg.iterateBigTomb(inst, elements.len);
5327    for (elements) |arg| bt.feed(arg);
5328    return bt.finishAir(result);
5329}
5330
5331fn airUnionInit(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5332    const pt = cg.pt;
5333    const zcu = pt.zcu;
5334    const ip = &zcu.intern_pool;
5335    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
5336    const extra = cg.air.extraData(Air.UnionInit, ty_pl.payload).data;
5337
5338    const result = result: {
5339        const union_ty = cg.typeOfIndex(inst);
5340        const layout = union_ty.unionGetLayout(zcu);
5341        const union_obj = zcu.typeToUnion(union_ty).?;
5342        const field_ty = Type.fromInterned(union_obj.field_types.get(ip)[extra.field_index]);
5343        const field_name = union_obj.loadTagType(ip).names.get(ip)[extra.field_index];
5344
5345        const tag_int = blk: {
5346            const tag_ty = union_ty.unionTagTypeHypothetical(zcu);
5347            const enum_field_index = tag_ty.enumFieldIndex(field_name, zcu).?;
5348            const tag_val = try pt.enumValueFieldIndex(tag_ty, enum_field_index);
5349            break :blk try cg.lowerConstant(tag_val, tag_ty);
5350        };
5351        if (layout.payload_size == 0) {
5352            if (layout.tag_size == 0) {
5353                break :result .none;
5354            }
5355            assert(!isByRef(union_ty, zcu, cg.target));
5356            break :result tag_int;
5357        }
5358
5359        if (isByRef(union_ty, zcu, cg.target)) {
5360            const result_ptr = try cg.allocStack(union_ty);
5361            const payload = try cg.resolveInst(extra.init);
5362            if (layout.tag_align.compare(.gte, layout.payload_align)) {
5363                if (isByRef(field_ty, zcu, cg.target)) {
5364                    const payload_ptr = try cg.buildPointerOffset(result_ptr, layout.tag_size, .new);
5365                    try cg.store(payload_ptr, payload, field_ty, 0);
5366                } else {
5367                    try cg.store(result_ptr, payload, field_ty, @intCast(layout.tag_size));
5368                }
5369
5370                if (layout.tag_size > 0) {
5371                    try cg.store(result_ptr, tag_int, Type.fromInterned(union_obj.enum_tag_ty), 0);
5372                }
5373            } else {
5374                try cg.store(result_ptr, payload, field_ty, 0);
5375                if (layout.tag_size > 0) {
5376                    try cg.store(
5377                        result_ptr,
5378                        tag_int,
5379                        Type.fromInterned(union_obj.enum_tag_ty),
5380                        @intCast(layout.payload_size),
5381                    );
5382                }
5383            }
5384            break :result result_ptr;
5385        } else {
5386            const operand = try cg.resolveInst(extra.init);
5387            const union_int_type = try pt.intType(.unsigned, @as(u16, @intCast(union_ty.bitSize(zcu))));
5388            if (field_ty.zigTypeTag(zcu) == .float) {
5389                const int_type = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
5390                const bitcasted = try cg.bitcast(field_ty, int_type, operand);
5391                break :result try cg.trunc(bitcasted, int_type, union_int_type);
5392            } else if (field_ty.isPtrAtRuntime(zcu)) {
5393                const int_type = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
5394                break :result try cg.intcast(operand, int_type, union_int_type);
5395            }
5396            break :result try cg.intcast(operand, field_ty, union_int_type);
5397        }
5398    };
5399
5400    return cg.finishAir(inst, result, &.{extra.init});
5401}
5402
5403fn airPrefetch(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5404    const prefetch = cg.air.instructions.items(.data)[@intFromEnum(inst)].prefetch;
5405    return cg.finishAir(inst, .none, &.{prefetch.ptr});
5406}
5407
5408fn airWasmMemorySize(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5409    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
5410
5411    try cg.addLabel(.memory_size, pl_op.payload);
5412    return cg.finishAir(inst, .stack, &.{pl_op.operand});
5413}
5414
5415fn airWasmMemoryGrow(cg: *CodeGen, inst: Air.Inst.Index) !void {
5416    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
5417
5418    const operand = try cg.resolveInst(pl_op.operand);
5419    try cg.emitWValue(operand);
5420    try cg.addLabel(.memory_grow, pl_op.payload);
5421    return cg.finishAir(inst, .stack, &.{pl_op.operand});
5422}
5423
5424fn cmpOptionals(cg: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue {
5425    const zcu = cg.pt.zcu;
5426    assert(operand_ty.hasRuntimeBitsIgnoreComptime(zcu));
5427    assert(op == .eq or op == .neq);
5428    const payload_ty = operand_ty.optionalChild(zcu);
5429    assert(!isByRef(payload_ty, zcu, cg.target));
5430
5431    var result = try cg.allocLocal(Type.i32);
5432    defer result.free(cg);
5433
5434    var lhs_null = try cg.allocLocal(Type.i32);
5435    defer lhs_null.free(cg);
5436
5437    try cg.startBlock(.block, .empty);
5438
5439    try cg.addImm32(if (op == .eq) 0 else 1);
5440    try cg.addLocal(.local_set, result.local.value);
5441
5442    _ = try cg.isNull(lhs, operand_ty, .i32_eq);
5443    try cg.addLocal(.local_tee, lhs_null.local.value);
5444    _ = try cg.isNull(rhs, operand_ty, .i32_eq);
5445    try cg.addTag(.i32_ne);
5446    try cg.addLabel(.br_if, 0); // only one is null
5447
5448    try cg.addImm32(if (op == .eq) 1 else 0);
5449    try cg.addLocal(.local_set, result.local.value);
5450
5451    try cg.addLocal(.local_get, lhs_null.local.value);
5452    try cg.addLabel(.br_if, 0); // both are null
5453
5454    _ = try cg.load(lhs, payload_ty, 0);
5455    _ = try cg.load(rhs, payload_ty, 0);
5456    _ = try cg.cmp(.stack, .stack, payload_ty, op);
5457    try cg.addLocal(.local_set, result.local.value);
5458
5459    try cg.endBlock();
5460
5461    try cg.addLocal(.local_get, result.local.value);
5462
5463    return .stack;
5464}
5465
5466/// Compares big integers by checking both its high bits and low bits.
5467/// NOTE: Leaves the result of the comparison on top of the stack.
5468/// TODO: Lower this to compiler_rt call when bitsize > 128
5469fn cmpBigInt(cg: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue {
5470    const zcu = cg.pt.zcu;
5471    assert(operand_ty.abiSize(zcu) >= 16);
5472    assert(!(lhs != .stack and rhs == .stack));
5473    if (operand_ty.bitSize(zcu) > 128) {
5474        return cg.fail("TODO: Support cmpBigInt for integer bitsize: '{d}'", .{operand_ty.bitSize(zcu)});
5475    }
5476
5477    var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64);
5478    defer lhs_msb.free(cg);
5479    var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64);
5480    defer rhs_msb.free(cg);
5481
5482    switch (op) {
5483        .eq, .neq => {
5484            const xor_high = try cg.binOp(lhs_msb, rhs_msb, Type.u64, .xor);
5485            const lhs_lsb = try cg.load(lhs, Type.u64, 0);
5486            const rhs_lsb = try cg.load(rhs, Type.u64, 0);
5487            const xor_low = try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, .xor);
5488            const or_result = try cg.binOp(xor_high, xor_low, Type.u64, .@"or");
5489
5490            switch (op) {
5491                .eq => return cg.cmp(or_result, .{ .imm64 = 0 }, Type.u64, .eq),
5492                .neq => return cg.cmp(or_result, .{ .imm64 = 0 }, Type.u64, .neq),
5493                else => unreachable,
5494            }
5495        },
5496        else => {
5497            const ty = if (operand_ty.isSignedInt(zcu)) Type.i64 else Type.u64;
5498            // leave those value on top of the stack for '.select'
5499            const lhs_lsb = try cg.load(lhs, Type.u64, 0);
5500            const rhs_lsb = try cg.load(rhs, Type.u64, 0);
5501            _ = try cg.cmp(lhs_lsb, rhs_lsb, Type.u64, op);
5502            _ = try cg.cmp(lhs_msb, rhs_msb, ty, op);
5503            _ = try cg.cmp(lhs_msb, rhs_msb, ty, .eq);
5504            try cg.addTag(.select);
5505        },
5506    }
5507
5508    return .stack;
5509}
5510
5511fn airSetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5512    const pt = cg.pt;
5513    const zcu = pt.zcu;
5514    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
5515    const un_ty = cg.typeOf(bin_op.lhs).childType(zcu);
5516    const tag_ty = cg.typeOf(bin_op.rhs);
5517    const layout = un_ty.unionGetLayout(zcu);
5518    if (layout.tag_size == 0) return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
5519
5520    const union_ptr = try cg.resolveInst(bin_op.lhs);
5521    const new_tag = try cg.resolveInst(bin_op.rhs);
5522    if (layout.payload_size == 0) {
5523        try cg.store(union_ptr, new_tag, tag_ty, 0);
5524        return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
5525    }
5526
5527    // when the tag alignment is smaller than the payload, the field will be stored
5528    // after the payload.
5529    const offset: u32 = if (layout.tag_align.compare(.lt, layout.payload_align)) blk: {
5530        break :blk @intCast(layout.payload_size);
5531    } else 0;
5532    try cg.store(union_ptr, new_tag, tag_ty, offset);
5533    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
5534}
5535
5536fn airGetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5537    const zcu = cg.pt.zcu;
5538    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5539
5540    const un_ty = cg.typeOf(ty_op.operand);
5541    const tag_ty = cg.typeOfIndex(inst);
5542    const layout = un_ty.unionGetLayout(zcu);
5543    if (layout.tag_size == 0) return cg.finishAir(inst, .none, &.{ty_op.operand});
5544
5545    const operand = try cg.resolveInst(ty_op.operand);
5546    // when the tag alignment is smaller than the payload, the field will be stored
5547    // after the payload.
5548    const offset: u32 = if (layout.tag_align.compare(.lt, layout.payload_align))
5549        @intCast(layout.payload_size)
5550    else
5551        0;
5552    const result = try cg.load(operand, tag_ty, offset);
5553    return cg.finishAir(inst, result, &.{ty_op.operand});
5554}
5555
5556fn airFpext(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5557    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5558
5559    const dest_ty = cg.typeOfIndex(inst);
5560    const operand = try cg.resolveInst(ty_op.operand);
5561    const result = try cg.fpext(operand, cg.typeOf(ty_op.operand), dest_ty);
5562    return cg.finishAir(inst, result, &.{ty_op.operand});
5563}
5564
5565/// Extends a float from a given `Type` to a larger wanted `Type`, leaving the
5566/// result on the stack.
5567fn fpext(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue {
5568    const given_bits = given.floatBits(cg.target);
5569    const wanted_bits = wanted.floatBits(cg.target);
5570
5571    const intrinsic: Mir.Intrinsic = switch (given_bits) {
5572        16 => switch (wanted_bits) {
5573            32 => {
5574                assert(.stack == try cg.callIntrinsic(.__extendhfsf2, &.{.f16_type}, Type.f32, &.{operand}));
5575                return .stack;
5576            },
5577            64 => {
5578                assert(.stack == try cg.callIntrinsic(.__extendhfsf2, &.{.f16_type}, Type.f32, &.{operand}));
5579                try cg.addTag(.f64_promote_f32);
5580                return .stack;
5581            },
5582            80 => .__extendhfxf2,
5583            128 => .__extendhftf2,
5584            else => unreachable,
5585        },
5586        32 => switch (wanted_bits) {
5587            64 => {
5588                try cg.emitWValue(operand);
5589                try cg.addTag(.f64_promote_f32);
5590                return .stack;
5591            },
5592            80 => .__extendsfxf2,
5593            128 => .__extendsftf2,
5594            else => unreachable,
5595        },
5596        64 => switch (wanted_bits) {
5597            80 => .__extenddfxf2,
5598            128 => .__extenddftf2,
5599            else => unreachable,
5600        },
5601        80 => switch (wanted_bits) {
5602            128 => .__extendxftf2,
5603            else => unreachable,
5604        },
5605        else => unreachable,
5606    };
5607    return cg.callIntrinsic(intrinsic, &.{given.ip_index}, wanted, &.{operand});
5608}
5609
5610fn airFptrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5611    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5612
5613    const dest_ty = cg.typeOfIndex(inst);
5614    const operand = try cg.resolveInst(ty_op.operand);
5615    const result = try cg.fptrunc(operand, cg.typeOf(ty_op.operand), dest_ty);
5616    return cg.finishAir(inst, result, &.{ty_op.operand});
5617}
5618
5619/// Truncates a float from a given `Type` to its wanted `Type`, leaving the
5620/// result on the stack.
5621fn fptrunc(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue {
5622    const given_bits = given.floatBits(cg.target);
5623    const wanted_bits = wanted.floatBits(cg.target);
5624
5625    const intrinsic: Mir.Intrinsic = switch (given_bits) {
5626        32 => switch (wanted_bits) {
5627            16 => {
5628                return cg.callIntrinsic(.__truncsfhf2, &.{.f32_type}, Type.f16, &.{operand});
5629            },
5630            else => unreachable,
5631        },
5632        64 => switch (wanted_bits) {
5633            16 => {
5634                try cg.emitWValue(operand);
5635                try cg.addTag(.f32_demote_f64);
5636                return cg.callIntrinsic(.__truncsfhf2, &.{.f32_type}, Type.f16, &.{.stack});
5637            },
5638            32 => {
5639                try cg.emitWValue(operand);
5640                try cg.addTag(.f32_demote_f64);
5641                return .stack;
5642            },
5643            else => unreachable,
5644        },
5645        80 => switch (wanted_bits) {
5646            16 => .__truncxfhf2,
5647            32 => .__truncxfsf2,
5648            64 => .__truncxfdf2,
5649            else => unreachable,
5650        },
5651        128 => switch (wanted_bits) {
5652            16 => .__trunctfhf2,
5653            32 => .__trunctfsf2,
5654            64 => .__trunctfdf2,
5655            80 => .__trunctfxf2,
5656            else => unreachable,
5657        },
5658        else => unreachable,
5659    };
5660    return cg.callIntrinsic(intrinsic, &.{given.ip_index}, wanted, &.{operand});
5661}
5662
5663fn airErrUnionPayloadPtrSet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5664    const zcu = cg.pt.zcu;
5665    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5666
5667    const err_set_ty = cg.typeOf(ty_op.operand).childType(zcu);
5668    const payload_ty = err_set_ty.errorUnionPayload(zcu);
5669    const operand = try cg.resolveInst(ty_op.operand);
5670
5671    // set error-tag to '0' to annotate error union is non-error
5672    try cg.store(
5673        operand,
5674        .{ .imm32 = 0 },
5675        Type.anyerror,
5676        @intCast(errUnionErrorOffset(payload_ty, zcu)),
5677    );
5678
5679    const result = result: {
5680        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
5681            break :result cg.reuseOperand(ty_op.operand, operand);
5682        }
5683
5684        break :result try cg.buildPointerOffset(operand, @as(u32, @intCast(errUnionPayloadOffset(payload_ty, zcu))), .new);
5685    };
5686    return cg.finishAir(inst, result, &.{ty_op.operand});
5687}
5688
5689fn airFieldParentPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5690    const pt = cg.pt;
5691    const zcu = pt.zcu;
5692    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
5693    const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
5694
5695    const field_ptr = try cg.resolveInst(extra.field_ptr);
5696    const parent_ptr_ty = cg.typeOfIndex(inst);
5697    const parent_ty = parent_ptr_ty.childType(zcu);
5698    const field_ptr_ty = cg.typeOf(extra.field_ptr);
5699    const field_index = extra.field_index;
5700    const field_offset = switch (parent_ty.containerLayout(zcu)) {
5701        .auto, .@"extern" => parent_ty.structFieldOffset(field_index, zcu),
5702        .@"packed" => offset: {
5703            const parent_ptr_offset = parent_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset;
5704            const field_offset = if (zcu.typeToStruct(parent_ty)) |loaded_struct| zcu.structPackedFieldBitOffset(loaded_struct, field_index) else 0;
5705            const field_ptr_offset = field_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset;
5706            break :offset @divExact(parent_ptr_offset + field_offset - field_ptr_offset, 8);
5707        },
5708    };
5709
5710    const result = if (field_offset != 0) result: {
5711        const base = try cg.buildPointerOffset(field_ptr, 0, .new);
5712        try cg.addLocal(.local_get, base.local.value);
5713        try cg.addImm32(@intCast(field_offset));
5714        try cg.addTag(.i32_sub);
5715        try cg.addLocal(.local_set, base.local.value);
5716        break :result base;
5717    } else cg.reuseOperand(extra.field_ptr, field_ptr);
5718
5719    return cg.finishAir(inst, result, &.{extra.field_ptr});
5720}
5721
5722fn sliceOrArrayPtr(cg: *CodeGen, ptr: WValue, ptr_ty: Type) InnerError!WValue {
5723    const zcu = cg.pt.zcu;
5724    if (ptr_ty.isSlice(zcu)) {
5725        return cg.slicePtr(ptr);
5726    } else {
5727        return ptr;
5728    }
5729}
5730
5731fn airMemcpy(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5732    const zcu = cg.pt.zcu;
5733    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
5734    const dst = try cg.resolveInst(bin_op.lhs);
5735    const dst_ty = cg.typeOf(bin_op.lhs);
5736    const ptr_elem_ty = dst_ty.childType(zcu);
5737    const src = try cg.resolveInst(bin_op.rhs);
5738    const src_ty = cg.typeOf(bin_op.rhs);
5739    const len = switch (dst_ty.ptrSize(zcu)) {
5740        .slice => blk: {
5741            const slice_len = try cg.sliceLen(dst);
5742            if (ptr_elem_ty.abiSize(zcu) != 1) {
5743                try cg.emitWValue(slice_len);
5744                try cg.emitWValue(.{ .imm32 = @as(u32, @intCast(ptr_elem_ty.abiSize(zcu))) });
5745                try cg.addTag(.i32_mul);
5746                try cg.addLocal(.local_set, slice_len.local.value);
5747            }
5748            break :blk slice_len;
5749        },
5750        .one => @as(WValue, .{
5751            .imm32 = @as(u32, @intCast(ptr_elem_ty.arrayLen(zcu) * ptr_elem_ty.childType(zcu).abiSize(zcu))),
5752        }),
5753        .c, .many => unreachable,
5754    };
5755    const dst_ptr = try cg.sliceOrArrayPtr(dst, dst_ty);
5756    const src_ptr = try cg.sliceOrArrayPtr(src, src_ty);
5757    try cg.memcpy(dst_ptr, src_ptr, len);
5758
5759    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
5760}
5761
5762fn airRetAddr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5763    // TODO: Implement this properly once stack serialization is solved
5764    return cg.finishAir(inst, switch (cg.ptr_size) {
5765        .wasm32 => .{ .imm32 = 0 },
5766        .wasm64 => .{ .imm64 = 0 },
5767    }, &.{});
5768}
5769
5770fn airPopcount(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5771    const pt = cg.pt;
5772    const zcu = pt.zcu;
5773    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5774
5775    const operand = try cg.resolveInst(ty_op.operand);
5776    const op_ty = cg.typeOf(ty_op.operand);
5777
5778    if (op_ty.zigTypeTag(zcu) == .vector) {
5779        return cg.fail("TODO: Implement @popCount for vectors", .{});
5780    }
5781
5782    const int_info = op_ty.intInfo(zcu);
5783    const bits = int_info.bits;
5784    const wasm_bits = toWasmBits(bits) orelse {
5785        return cg.fail("TODO: Implement @popCount for integers with bitsize '{d}'", .{bits});
5786    };
5787
5788    switch (wasm_bits) {
5789        32 => {
5790            try cg.emitWValue(operand);
5791            if (op_ty.isSignedInt(zcu) and bits != wasm_bits) {
5792                _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits));
5793            }
5794            try cg.addTag(.i32_popcnt);
5795        },
5796        64 => {
5797            try cg.emitWValue(operand);
5798            if (op_ty.isSignedInt(zcu) and bits != wasm_bits) {
5799                _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits));
5800            }
5801            try cg.addTag(.i64_popcnt);
5802            try cg.addTag(.i32_wrap_i64);
5803            try cg.emitWValue(operand);
5804        },
5805        128 => {
5806            _ = try cg.load(operand, Type.u64, 0);
5807            try cg.addTag(.i64_popcnt);
5808            _ = try cg.load(operand, Type.u64, 8);
5809            if (op_ty.isSignedInt(zcu) and bits != wasm_bits) {
5810                _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits - 64));
5811            }
5812            try cg.addTag(.i64_popcnt);
5813            try cg.addTag(.i64_add);
5814            try cg.addTag(.i32_wrap_i64);
5815        },
5816        else => unreachable,
5817    }
5818
5819    return cg.finishAir(inst, .stack, &.{ty_op.operand});
5820}
5821
5822fn airBitReverse(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5823    const zcu = cg.pt.zcu;
5824    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5825
5826    const operand = try cg.resolveInst(ty_op.operand);
5827    const ty = cg.typeOf(ty_op.operand);
5828
5829    if (ty.zigTypeTag(zcu) == .vector) {
5830        return cg.fail("TODO: Implement @bitReverse for vectors", .{});
5831    }
5832
5833    const int_info = ty.intInfo(zcu);
5834    const bits = int_info.bits;
5835    const wasm_bits = toWasmBits(bits) orelse {
5836        return cg.fail("TODO: Implement @bitReverse for integers with bitsize '{d}'", .{bits});
5837    };
5838
5839    switch (wasm_bits) {
5840        32 => {
5841            const intrin_ret = try cg.callIntrinsic(
5842                .__bitreversesi2,
5843                &.{.u32_type},
5844                Type.u32,
5845                &.{operand},
5846            );
5847            const result = if (bits == 32)
5848                intrin_ret
5849            else
5850                try cg.binOp(intrin_ret, .{ .imm32 = 32 - bits }, ty, .shr);
5851            return cg.finishAir(inst, result, &.{ty_op.operand});
5852        },
5853        64 => {
5854            const intrin_ret = try cg.callIntrinsic(
5855                .__bitreversedi2,
5856                &.{.u64_type},
5857                Type.u64,
5858                &.{operand},
5859            );
5860            const result = if (bits == 64)
5861                intrin_ret
5862            else
5863                try cg.binOp(intrin_ret, .{ .imm64 = 64 - bits }, ty, .shr);
5864            return cg.finishAir(inst, result, &.{ty_op.operand});
5865        },
5866        128 => {
5867            const result = try cg.allocStack(ty);
5868
5869            try cg.emitWValue(result);
5870            const first_half = try cg.load(operand, Type.u64, 8);
5871            const intrin_ret_first = try cg.callIntrinsic(
5872                .__bitreversedi2,
5873                &.{.u64_type},
5874                Type.u64,
5875                &.{first_half},
5876            );
5877            try cg.emitWValue(intrin_ret_first);
5878            if (bits < 128) {
5879                try cg.emitWValue(.{ .imm64 = 128 - bits });
5880                try cg.addTag(.i64_shr_u);
5881            }
5882            try cg.emitWValue(result);
5883            const second_half = try cg.load(operand, Type.u64, 0);
5884            const intrin_ret_second = try cg.callIntrinsic(
5885                .__bitreversedi2,
5886                &.{.u64_type},
5887                Type.u64,
5888                &.{second_half},
5889            );
5890            try cg.emitWValue(intrin_ret_second);
5891            if (bits == 128) {
5892                try cg.store(.stack, .stack, Type.u64, result.offset() + 8);
5893                try cg.store(.stack, .stack, Type.u64, result.offset());
5894            } else {
5895                var tmp = try cg.allocLocal(Type.u64);
5896                defer tmp.free(cg);
5897                try cg.addLocal(.local_tee, tmp.local.value);
5898                try cg.emitWValue(.{ .imm64 = 128 - bits });
5899                if (ty.isSignedInt(zcu)) {
5900                    try cg.addTag(.i64_shr_s);
5901                } else {
5902                    try cg.addTag(.i64_shr_u);
5903                }
5904                try cg.store(.stack, .stack, Type.u64, result.offset() + 8);
5905                try cg.addLocal(.local_get, tmp.local.value);
5906                try cg.emitWValue(.{ .imm64 = bits - 64 });
5907                try cg.addTag(.i64_shl);
5908                try cg.addTag(.i64_or);
5909                try cg.store(.stack, .stack, Type.u64, result.offset());
5910            }
5911            return cg.finishAir(inst, result, &.{ty_op.operand});
5912        },
5913        else => unreachable,
5914    }
5915}
5916
5917fn airErrorName(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
5918    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
5919    const operand = try cg.resolveInst(un_op);
5920    // Each entry to this table is a slice (ptr+len).
5921    // The operand in this instruction represents the index within this table.
5922    // This means to get the final name, we emit the base pointer and then perform
5923    // pointer arithmetic to find the pointer to this slice and return that.
5924    //
5925    // As the names are global and the slice elements are constant, we do not have
5926    // to make a copy of the ptr+value but can point towards them directly.
5927    const pt = cg.pt;
5928    const name_ty = Type.slice_const_u8_sentinel_0;
5929    const abi_size = name_ty.abiSize(pt.zcu);
5930
5931    // Lowers to a i32.const or i64.const with the error table memory address.
5932    cg.error_name_table_ref_count += 1;
5933    try cg.addTag(.error_name_table_ref);
5934    try cg.emitWValue(operand);
5935    switch (cg.ptr_size) {
5936        .wasm32 => {
5937            try cg.addImm32(@intCast(abi_size));
5938            try cg.addTag(.i32_mul);
5939            try cg.addTag(.i32_add);
5940        },
5941        .wasm64 => {
5942            try cg.addImm64(abi_size);
5943            try cg.addTag(.i64_mul);
5944            try cg.addTag(.i64_add);
5945        },
5946    }
5947
5948    return cg.finishAir(inst, .stack, &.{un_op});
5949}
5950
5951fn airPtrSliceFieldPtr(cg: *CodeGen, inst: Air.Inst.Index, offset: u32) InnerError!void {
5952    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
5953    const slice_ptr = try cg.resolveInst(ty_op.operand);
5954    const result = try cg.buildPointerOffset(slice_ptr, offset, .new);
5955    return cg.finishAir(inst, result, &.{ty_op.operand});
5956}
5957
5958/// NOTE: Allocates place for result on virtual stack, when integer size > 64 bits
5959fn intZeroValue(cg: *CodeGen, ty: Type) InnerError!WValue {
5960    const zcu = cg.pt.zcu;
5961    const int_info = ty.intInfo(zcu);
5962    const wasm_bits = toWasmBits(int_info.bits) orelse {
5963        return cg.fail("TODO: Implement intZeroValue for integer bitsize: {d}", .{int_info.bits});
5964    };
5965    switch (wasm_bits) {
5966        32 => return .{ .imm32 = 0 },
5967        64 => return .{ .imm64 = 0 },
5968        128 => {
5969            const result = try cg.allocStack(ty);
5970            try cg.store(result, .{ .imm64 = 0 }, Type.u64, 0);
5971            try cg.store(result, .{ .imm64 = 0 }, Type.u64, 8);
5972            return result;
5973        },
5974        else => unreachable,
5975    }
5976}
5977
5978fn airAddSubWithOverflow(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
5979    assert(op == .add or op == .sub);
5980    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
5981    const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data;
5982
5983    const lhs = try cg.resolveInst(extra.lhs);
5984    const rhs = try cg.resolveInst(extra.rhs);
5985    const ty = cg.typeOf(extra.lhs);
5986    const pt = cg.pt;
5987    const zcu = pt.zcu;
5988
5989    if (ty.zigTypeTag(zcu) == .vector) {
5990        return cg.fail("TODO: Implement overflow arithmetic for vectors", .{});
5991    }
5992
5993    const int_info = ty.intInfo(zcu);
5994    const is_signed = int_info.signedness == .signed;
5995    if (int_info.bits > 128) {
5996        return cg.fail("TODO: Implement {{add/sub}}_with_overflow for integer bitsize: {d}", .{int_info.bits});
5997    }
5998
5999    const op_result = try cg.wrapBinOp(lhs, rhs, ty, op);
6000    var op_tmp = try op_result.toLocal(cg, ty);
6001    defer op_tmp.free(cg);
6002
6003    const cmp_op: std.math.CompareOperator = switch (op) {
6004        .add => .lt,
6005        .sub => .gt,
6006        else => unreachable,
6007    };
6008    const overflow_bit = if (is_signed) blk: {
6009        const zero = try intZeroValue(cg, ty);
6010        const rhs_is_neg = try cg.cmp(rhs, zero, ty, .lt);
6011        const overflow_cmp = try cg.cmp(op_tmp, lhs, ty, cmp_op);
6012        break :blk try cg.cmp(rhs_is_neg, overflow_cmp, Type.u1, .neq);
6013    } else try cg.cmp(op_tmp, lhs, ty, cmp_op);
6014    var bit_tmp = try overflow_bit.toLocal(cg, Type.u1);
6015    defer bit_tmp.free(cg);
6016
6017    const result = try cg.allocStack(cg.typeOfIndex(inst));
6018    const offset: u32 = @intCast(ty.abiSize(zcu));
6019    try cg.store(result, op_tmp, ty, 0);
6020    try cg.store(result, bit_tmp, Type.u1, offset);
6021
6022    return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs });
6023}
6024
6025fn airShlWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6026    const pt = cg.pt;
6027    const zcu = pt.zcu;
6028    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
6029    const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data;
6030
6031    const lhs = try cg.resolveInst(extra.lhs);
6032    const rhs = try cg.resolveInst(extra.rhs);
6033    const ty = cg.typeOf(extra.lhs);
6034    const rhs_ty = cg.typeOf(extra.rhs);
6035
6036    if (ty.isVector(zcu)) {
6037        if (!rhs_ty.isVector(zcu)) {
6038            return cg.fail("TODO: implement vector 'shl_with_overflow' with scalar rhs", .{});
6039        } else {
6040            return cg.fail("TODO: implement vector 'shl_with_overflow'", .{});
6041        }
6042    }
6043
6044    const int_info = ty.intInfo(zcu);
6045    const wasm_bits = toWasmBits(int_info.bits) orelse {
6046        return cg.fail("TODO: implement 'shl_with_overflow' for integer bitsize: {d}", .{int_info.bits});
6047    };
6048
6049    // Ensure rhs is coerced to lhs as they must have the same WebAssembly types
6050    // before we can perform any binary operation.
6051    const rhs_wasm_bits = toWasmBits(rhs_ty.intInfo(zcu).bits).?;
6052    // If wasm_bits == 128, compiler-rt expects i32 for shift
6053    const rhs_final = if (wasm_bits != rhs_wasm_bits and wasm_bits == 64) blk: {
6054        const rhs_casted = try cg.intcast(rhs, rhs_ty, ty);
6055        break :blk try rhs_casted.toLocal(cg, ty);
6056    } else rhs;
6057
6058    var shl = try (try cg.wrapBinOp(lhs, rhs_final, ty, .shl)).toLocal(cg, ty);
6059    defer shl.free(cg);
6060
6061    const overflow_bit = blk: {
6062        const shr = try cg.binOp(shl, rhs_final, ty, .shr);
6063        break :blk try cg.cmp(shr, lhs, ty, .neq);
6064    };
6065    var overflow_local = try overflow_bit.toLocal(cg, Type.u1);
6066    defer overflow_local.free(cg);
6067
6068    const result = try cg.allocStack(cg.typeOfIndex(inst));
6069    const offset: u32 = @intCast(ty.abiSize(zcu));
6070    try cg.store(result, shl, ty, 0);
6071    try cg.store(result, overflow_local, Type.u1, offset);
6072
6073    return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs });
6074}
6075
6076fn airMulWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6077    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
6078    const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data;
6079
6080    const lhs = try cg.resolveInst(extra.lhs);
6081    const rhs = try cg.resolveInst(extra.rhs);
6082    const ty = cg.typeOf(extra.lhs);
6083    const pt = cg.pt;
6084    const zcu = pt.zcu;
6085
6086    if (ty.zigTypeTag(zcu) == .vector) {
6087        return cg.fail("TODO: Implement overflow arithmetic for vectors", .{});
6088    }
6089
6090    // We store the bit if it's overflowed or not in this. As it's zero-initialized
6091    // we only need to update it if an overflow (or underflow) occurred.
6092    var overflow_bit = try cg.ensureAllocLocal(Type.u1);
6093    defer overflow_bit.free(cg);
6094
6095    const int_info = ty.intInfo(zcu);
6096    const wasm_bits = toWasmBits(int_info.bits) orelse {
6097        return cg.fail("TODO: Implement `@mulWithOverflow` for integer bitsize: {d}", .{int_info.bits});
6098    };
6099
6100    const zero: WValue = switch (wasm_bits) {
6101        32 => .{ .imm32 = 0 },
6102        64, 128 => .{ .imm64 = 0 },
6103        else => unreachable,
6104    };
6105
6106    // for 32 bit integers we upcast it to a 64bit integer
6107    const mul = if (wasm_bits == 32) blk: {
6108        const new_ty = if (int_info.signedness == .signed) Type.i64 else Type.u64;
6109        const lhs_upcast = try cg.intcast(lhs, ty, new_ty);
6110        const rhs_upcast = try cg.intcast(rhs, ty, new_ty);
6111        const bin_op = try (try cg.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(cg, new_ty);
6112        const res = try (try cg.trunc(bin_op, ty, new_ty)).toLocal(cg, ty);
6113        const res_upcast = try cg.intcast(res, ty, new_ty);
6114        _ = try cg.cmp(res_upcast, bin_op, new_ty, .neq);
6115        try cg.addLocal(.local_set, overflow_bit.local.value);
6116        break :blk res;
6117    } else if (wasm_bits == 64) blk: {
6118        const new_ty = if (int_info.signedness == .signed) Type.i128 else Type.u128;
6119        const lhs_upcast = try cg.intcast(lhs, ty, new_ty);
6120        const rhs_upcast = try cg.intcast(rhs, ty, new_ty);
6121        const bin_op = try (try cg.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(cg, new_ty);
6122        const res = try (try cg.trunc(bin_op, ty, new_ty)).toLocal(cg, ty);
6123        const res_upcast = try cg.intcast(res, ty, new_ty);
6124        _ = try cg.cmp(res_upcast, bin_op, new_ty, .neq);
6125        try cg.addLocal(.local_set, overflow_bit.local.value);
6126        break :blk res;
6127    } else if (int_info.bits == 128 and int_info.signedness == .unsigned) blk: {
6128        var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64);
6129        defer lhs_lsb.free(cg);
6130        var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64);
6131        defer lhs_msb.free(cg);
6132        var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64);
6133        defer rhs_lsb.free(cg);
6134        var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64);
6135        defer rhs_msb.free(cg);
6136
6137        const cross_1 = try cg.callIntrinsic(
6138            .__multi3,
6139            &[_]InternPool.Index{.i64_type} ** 4,
6140            Type.i128,
6141            &.{ lhs_msb, zero, rhs_lsb, zero },
6142        );
6143        const cross_2 = try cg.callIntrinsic(
6144            .__multi3,
6145            &[_]InternPool.Index{.i64_type} ** 4,
6146            Type.i128,
6147            &.{ rhs_msb, zero, lhs_lsb, zero },
6148        );
6149        const mul_lsb = try cg.callIntrinsic(
6150            .__multi3,
6151            &[_]InternPool.Index{.i64_type} ** 4,
6152            Type.i128,
6153            &.{ rhs_lsb, zero, lhs_lsb, zero },
6154        );
6155
6156        const rhs_msb_not_zero = try cg.cmp(rhs_msb, zero, Type.u64, .neq);
6157        const lhs_msb_not_zero = try cg.cmp(lhs_msb, zero, Type.u64, .neq);
6158        const both_msb_not_zero = try cg.binOp(rhs_msb_not_zero, lhs_msb_not_zero, Type.bool, .@"and");
6159        const cross_1_msb = try cg.load(cross_1, Type.u64, 8);
6160        const cross_1_msb_not_zero = try cg.cmp(cross_1_msb, zero, Type.u64, .neq);
6161        const cond_1 = try cg.binOp(both_msb_not_zero, cross_1_msb_not_zero, Type.bool, .@"or");
6162        const cross_2_msb = try cg.load(cross_2, Type.u64, 8);
6163        const cross_2_msb_not_zero = try cg.cmp(cross_2_msb, zero, Type.u64, .neq);
6164        const cond_2 = try cg.binOp(cond_1, cross_2_msb_not_zero, Type.bool, .@"or");
6165
6166        const cross_1_lsb = try cg.load(cross_1, Type.u64, 0);
6167        const cross_2_lsb = try cg.load(cross_2, Type.u64, 0);
6168        const cross_add = try cg.binOp(cross_1_lsb, cross_2_lsb, Type.u64, .add);
6169
6170        var mul_lsb_msb = try (try cg.load(mul_lsb, Type.u64, 8)).toLocal(cg, Type.u64);
6171        defer mul_lsb_msb.free(cg);
6172        var all_add = try (try cg.binOp(cross_add, mul_lsb_msb, Type.u64, .add)).toLocal(cg, Type.u64);
6173        defer all_add.free(cg);
6174        const add_overflow = try cg.cmp(all_add, mul_lsb_msb, Type.u64, .lt);
6175
6176        // result for overflow bit
6177        _ = try cg.binOp(cond_2, add_overflow, Type.bool, .@"or");
6178        try cg.addLocal(.local_set, overflow_bit.local.value);
6179
6180        const tmp_result = try cg.allocStack(Type.u128);
6181        try cg.emitWValue(tmp_result);
6182        const mul_lsb_lsb = try cg.load(mul_lsb, Type.u64, 0);
6183        try cg.store(.stack, mul_lsb_lsb, Type.u64, tmp_result.offset());
6184        try cg.store(tmp_result, all_add, Type.u64, 8);
6185        break :blk tmp_result;
6186    } else if (int_info.bits == 128 and int_info.signedness == .signed) blk: {
6187        const overflow_ret = try cg.allocStack(Type.i32);
6188        const res = try cg.callIntrinsic(
6189            .__muloti4,
6190            &[_]InternPool.Index{ .i128_type, .i128_type, .usize_type },
6191            Type.i128,
6192            &.{ lhs, rhs, overflow_ret },
6193        );
6194        _ = try cg.load(overflow_ret, Type.i32, 0);
6195        try cg.addLocal(.local_set, overflow_bit.local.value);
6196        break :blk res;
6197    } else return cg.fail("TODO: @mulWithOverflow for {f}", .{ty.fmt(pt)});
6198    var bin_op_local = try mul.toLocal(cg, ty);
6199    defer bin_op_local.free(cg);
6200
6201    const result = try cg.allocStack(cg.typeOfIndex(inst));
6202    const offset: u32 = @intCast(ty.abiSize(zcu));
6203    try cg.store(result, bin_op_local, ty, 0);
6204    try cg.store(result, overflow_bit, Type.u1, offset);
6205
6206    return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs });
6207}
6208
6209fn airMaxMin(
6210    cg: *CodeGen,
6211    inst: Air.Inst.Index,
6212    op: enum { fmax, fmin },
6213    cmp_op: std.math.CompareOperator,
6214) InnerError!void {
6215    const zcu = cg.pt.zcu;
6216    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6217
6218    const ty = cg.typeOfIndex(inst);
6219    if (ty.zigTypeTag(zcu) == .vector) {
6220        return cg.fail("TODO: `@maximum` and `@minimum` for vectors", .{});
6221    }
6222
6223    if (ty.abiSize(zcu) > 16) {
6224        return cg.fail("TODO: `@maximum` and `@minimum` for types larger than 16 bytes", .{});
6225    }
6226
6227    const lhs = try cg.resolveInst(bin_op.lhs);
6228    const rhs = try cg.resolveInst(bin_op.rhs);
6229
6230    if (ty.zigTypeTag(zcu) == .float) {
6231        const intrinsic = switch (op) {
6232            inline .fmin, .fmax => |ct_op| switch (ty.floatBits(cg.target)) {
6233                inline 16, 32, 64, 80, 128 => |bits| @field(
6234                    Mir.Intrinsic,
6235                    libcFloatPrefix(bits) ++ @tagName(ct_op) ++ libcFloatSuffix(bits),
6236                ),
6237                else => unreachable,
6238            },
6239        };
6240        const result = try cg.callIntrinsic(intrinsic, &.{ ty.ip_index, ty.ip_index }, ty, &.{ lhs, rhs });
6241        try cg.lowerToStack(result);
6242    } else {
6243        // operands to select from
6244        try cg.lowerToStack(lhs);
6245        try cg.lowerToStack(rhs);
6246        _ = try cg.cmp(lhs, rhs, ty, cmp_op);
6247
6248        // based on the result from comparison, return operand 0 or 1.
6249        try cg.addTag(.select);
6250    }
6251
6252    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
6253}
6254
6255fn airMulAdd(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6256    const zcu = cg.pt.zcu;
6257    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
6258    const bin_op = cg.air.extraData(Air.Bin, pl_op.payload).data;
6259
6260    const ty = cg.typeOfIndex(inst);
6261    if (ty.zigTypeTag(zcu) == .vector) {
6262        return cg.fail("TODO: `@mulAdd` for vectors", .{});
6263    }
6264
6265    const addend = try cg.resolveInst(pl_op.operand);
6266    const lhs = try cg.resolveInst(bin_op.lhs);
6267    const rhs = try cg.resolveInst(bin_op.rhs);
6268
6269    const result = if (ty.floatBits(cg.target) == 16) fl_result: {
6270        const rhs_ext = try cg.fpext(rhs, ty, Type.f32);
6271        const lhs_ext = try cg.fpext(lhs, ty, Type.f32);
6272        const addend_ext = try cg.fpext(addend, ty, Type.f32);
6273        // call to compiler-rt `fn fmaf(f32, f32, f32) f32`
6274        const result = try cg.callIntrinsic(
6275            .fmaf,
6276            &.{ .f32_type, .f32_type, .f32_type },
6277            Type.f32,
6278            &.{ rhs_ext, lhs_ext, addend_ext },
6279        );
6280        break :fl_result try cg.fptrunc(result, Type.f32, ty);
6281    } else result: {
6282        const mul_result = try cg.binOp(lhs, rhs, ty, .mul);
6283        break :result try cg.binOp(mul_result, addend, ty, .add);
6284    };
6285
6286    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs, pl_op.operand });
6287}
6288
6289fn airClz(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6290    const zcu = cg.pt.zcu;
6291    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
6292
6293    const ty = cg.typeOf(ty_op.operand);
6294    if (ty.zigTypeTag(zcu) == .vector) {
6295        return cg.fail("TODO: `@clz` for vectors", .{});
6296    }
6297
6298    const operand = try cg.resolveInst(ty_op.operand);
6299    const int_info = ty.intInfo(zcu);
6300    const wasm_bits = toWasmBits(int_info.bits) orelse {
6301        return cg.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits});
6302    };
6303
6304    switch (wasm_bits) {
6305        32 => {
6306            if (int_info.signedness == .signed) {
6307                const mask = ~@as(u32, 0) >> @intCast(32 - int_info.bits);
6308                _ = try cg.binOp(operand, .{ .imm32 = mask }, ty, .@"and");
6309            } else {
6310                try cg.emitWValue(operand);
6311            }
6312            try cg.addTag(.i32_clz);
6313        },
6314        64 => {
6315            if (int_info.signedness == .signed) {
6316                const mask = ~@as(u64, 0) >> @intCast(64 - int_info.bits);
6317                _ = try cg.binOp(operand, .{ .imm64 = mask }, ty, .@"and");
6318            } else {
6319                try cg.emitWValue(operand);
6320            }
6321            try cg.addTag(.i64_clz);
6322            try cg.addTag(.i32_wrap_i64);
6323        },
6324        128 => {
6325            var msb = try (try cg.load(operand, Type.u64, 8)).toLocal(cg, Type.u64);
6326            defer msb.free(cg);
6327
6328            try cg.emitWValue(msb);
6329            try cg.addTag(.i64_clz);
6330            _ = try cg.load(operand, Type.u64, 0);
6331            try cg.addTag(.i64_clz);
6332            try cg.emitWValue(.{ .imm64 = 64 });
6333            try cg.addTag(.i64_add);
6334            _ = try cg.cmp(msb, .{ .imm64 = 0 }, Type.u64, .neq);
6335            try cg.addTag(.select);
6336            try cg.addTag(.i32_wrap_i64);
6337        },
6338        else => unreachable,
6339    }
6340
6341    if (wasm_bits != int_info.bits) {
6342        try cg.emitWValue(.{ .imm32 = wasm_bits - int_info.bits });
6343        try cg.addTag(.i32_sub);
6344    }
6345
6346    return cg.finishAir(inst, .stack, &.{ty_op.operand});
6347}
6348
6349fn airCtz(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6350    const zcu = cg.pt.zcu;
6351    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
6352
6353    const ty = cg.typeOf(ty_op.operand);
6354
6355    if (ty.zigTypeTag(zcu) == .vector) {
6356        return cg.fail("TODO: `@ctz` for vectors", .{});
6357    }
6358
6359    const operand = try cg.resolveInst(ty_op.operand);
6360    const int_info = ty.intInfo(zcu);
6361    const wasm_bits = toWasmBits(int_info.bits) orelse {
6362        return cg.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits});
6363    };
6364
6365    switch (wasm_bits) {
6366        32 => {
6367            if (wasm_bits != int_info.bits) {
6368                const val: u32 = @as(u32, 1) << @as(u5, @intCast(int_info.bits));
6369                // leave value on the stack
6370                _ = try cg.binOp(operand, .{ .imm32 = val }, ty, .@"or");
6371            } else try cg.emitWValue(operand);
6372            try cg.addTag(.i32_ctz);
6373        },
6374        64 => {
6375            if (wasm_bits != int_info.bits) {
6376                const val: u64 = @as(u64, 1) << @as(u6, @intCast(int_info.bits));
6377                // leave value on the stack
6378                _ = try cg.binOp(operand, .{ .imm64 = val }, ty, .@"or");
6379            } else try cg.emitWValue(operand);
6380            try cg.addTag(.i64_ctz);
6381            try cg.addTag(.i32_wrap_i64);
6382        },
6383        128 => {
6384            var lsb = try (try cg.load(operand, Type.u64, 0)).toLocal(cg, Type.u64);
6385            defer lsb.free(cg);
6386
6387            try cg.emitWValue(lsb);
6388            try cg.addTag(.i64_ctz);
6389            _ = try cg.load(operand, Type.u64, 8);
6390            if (wasm_bits != int_info.bits) {
6391                try cg.addImm64(@as(u64, 1) << @as(u6, @intCast(int_info.bits - 64)));
6392                try cg.addTag(.i64_or);
6393            }
6394            try cg.addTag(.i64_ctz);
6395            try cg.addImm64(64);
6396            if (wasm_bits != int_info.bits) {
6397                try cg.addTag(.i64_or);
6398            } else {
6399                try cg.addTag(.i64_add);
6400            }
6401            _ = try cg.cmp(lsb, .{ .imm64 = 0 }, Type.u64, .neq);
6402            try cg.addTag(.select);
6403            try cg.addTag(.i32_wrap_i64);
6404        },
6405        else => unreachable,
6406    }
6407
6408    return cg.finishAir(inst, .stack, &.{ty_op.operand});
6409}
6410
6411fn airDbgStmt(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6412    const dbg_stmt = cg.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt;
6413    try cg.addInst(.{ .tag = .dbg_line, .data = .{
6414        .payload = try cg.addExtra(Mir.DbgLineColumn{
6415            .line = dbg_stmt.line,
6416            .column = dbg_stmt.column,
6417        }),
6418    } });
6419    return cg.finishAir(inst, .none, &.{});
6420}
6421
6422fn airDbgInlineBlock(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6423    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
6424    const extra = cg.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
6425    // TODO
6426    try cg.lowerBlock(inst, ty_pl.ty.toType(), @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]));
6427}
6428
6429fn airDbgVar(
6430    cg: *CodeGen,
6431    inst: Air.Inst.Index,
6432    local_tag: link.File.Dwarf.WipNav.LocalVarTag,
6433    is_ptr: bool,
6434) InnerError!void {
6435    _ = is_ptr;
6436    _ = local_tag;
6437    return cg.finishAir(inst, .none, &.{});
6438}
6439
6440fn airTry(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6441    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
6442    const err_union = try cg.resolveInst(pl_op.operand);
6443    const extra = cg.air.extraData(Air.Try, pl_op.payload);
6444    const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]);
6445    const err_union_ty = cg.typeOf(pl_op.operand);
6446    const result = try lowerTry(cg, inst, err_union, body, err_union_ty, false);
6447    return cg.finishAir(inst, result, &.{pl_op.operand});
6448}
6449
6450fn airTryPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6451    const zcu = cg.pt.zcu;
6452    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
6453    const extra = cg.air.extraData(Air.TryPtr, ty_pl.payload);
6454    const err_union_ptr = try cg.resolveInst(extra.data.ptr);
6455    const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]);
6456    const err_union_ty = cg.typeOf(extra.data.ptr).childType(zcu);
6457    const result = try lowerTry(cg, inst, err_union_ptr, body, err_union_ty, true);
6458    return cg.finishAir(inst, result, &.{extra.data.ptr});
6459}
6460
6461fn lowerTry(
6462    cg: *CodeGen,
6463    inst: Air.Inst.Index,
6464    err_union: WValue,
6465    body: []const Air.Inst.Index,
6466    err_union_ty: Type,
6467    operand_is_ptr: bool,
6468) InnerError!WValue {
6469    const zcu = cg.pt.zcu;
6470
6471    const pl_ty = err_union_ty.errorUnionPayload(zcu);
6472    const pl_has_bits = pl_ty.hasRuntimeBitsIgnoreComptime(zcu);
6473
6474    if (!err_union_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) {
6475        // Block we can jump out of when error is not set
6476        try cg.startBlock(.block, .empty);
6477
6478        // check if the error tag is set for the error union.
6479        try cg.emitWValue(err_union);
6480        if (pl_has_bits or operand_is_ptr) {
6481            const err_offset: u32 = @intCast(errUnionErrorOffset(pl_ty, zcu));
6482            try cg.addMemArg(.i32_load16_u, .{
6483                .offset = err_union.offset() + err_offset,
6484                .alignment = @intCast(Type.anyerror.abiAlignment(zcu).toByteUnits().?),
6485            });
6486        }
6487        try cg.addTag(.i32_eqz);
6488        try cg.addLabel(.br_if, 0); // jump out of block when error is '0'
6489
6490        const liveness = cg.liveness.getCondBr(inst);
6491        try cg.branches.append(cg.gpa, .{});
6492        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.else_deaths.len + liveness.then_deaths.len);
6493        defer {
6494            var branch = cg.branches.pop().?;
6495            branch.deinit(cg.gpa);
6496        }
6497        try cg.genBody(body);
6498        try cg.endBlock();
6499    }
6500
6501    // if we reach here it means error was not set, and we want the payload
6502    if (!pl_has_bits and !operand_is_ptr) {
6503        return .none;
6504    }
6505
6506    const pl_offset: u32 = @intCast(errUnionPayloadOffset(pl_ty, zcu));
6507    if (operand_is_ptr or isByRef(pl_ty, zcu, cg.target)) {
6508        return buildPointerOffset(cg, err_union, pl_offset, .new);
6509    }
6510    const payload = try cg.load(err_union, pl_ty, pl_offset);
6511    return payload.toLocal(cg, pl_ty);
6512}
6513
6514fn airByteSwap(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6515    const zcu = cg.pt.zcu;
6516    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
6517
6518    const ty = cg.typeOfIndex(inst);
6519    const operand = try cg.resolveInst(ty_op.operand);
6520
6521    if (ty.zigTypeTag(zcu) == .vector) {
6522        return cg.fail("TODO: @byteSwap for vectors", .{});
6523    }
6524    const int_info = ty.intInfo(zcu);
6525    const wasm_bits = toWasmBits(int_info.bits) orelse {
6526        return cg.fail("TODO: @byteSwap for integers with bitsize {d}", .{int_info.bits});
6527    };
6528
6529    // bytes are no-op
6530    if (int_info.bits == 8) {
6531        return cg.finishAir(inst, cg.reuseOperand(ty_op.operand, operand), &.{ty_op.operand});
6532    }
6533
6534    const result = result: {
6535        switch (wasm_bits) {
6536            32 => {
6537                const intrin_ret = try cg.callIntrinsic(
6538                    .__bswapsi2,
6539                    &.{.u32_type},
6540                    Type.u32,
6541                    &.{operand},
6542                );
6543                break :result if (int_info.bits == 32)
6544                    intrin_ret
6545                else
6546                    try cg.binOp(intrin_ret, .{ .imm32 = 32 - int_info.bits }, ty, .shr);
6547            },
6548            64 => {
6549                const intrin_ret = try cg.callIntrinsic(
6550                    .__bswapdi2,
6551                    &.{.u64_type},
6552                    Type.u64,
6553                    &.{operand},
6554                );
6555                break :result if (int_info.bits == 64)
6556                    intrin_ret
6557                else
6558                    try cg.binOp(intrin_ret, .{ .imm64 = 64 - int_info.bits }, ty, .shr);
6559            },
6560            else => return cg.fail("TODO: @byteSwap for integers with bitsize {d}", .{int_info.bits}),
6561        }
6562    };
6563    return cg.finishAir(inst, result, &.{ty_op.operand});
6564}
6565
6566fn airDiv(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6567    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6568
6569    const ty = cg.typeOfIndex(inst);
6570    const lhs = try cg.resolveInst(bin_op.lhs);
6571    const rhs = try cg.resolveInst(bin_op.rhs);
6572
6573    const result = try cg.binOp(lhs, rhs, ty, .div);
6574    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
6575}
6576
6577fn airDivTrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6578    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6579
6580    const ty = cg.typeOfIndex(inst);
6581    const lhs = try cg.resolveInst(bin_op.lhs);
6582    const rhs = try cg.resolveInst(bin_op.rhs);
6583
6584    const div_result = try cg.binOp(lhs, rhs, ty, .div);
6585
6586    if (ty.isAnyFloat()) {
6587        const trunc_result = try cg.floatOp(.trunc, ty, &.{div_result});
6588        return cg.finishAir(inst, trunc_result, &.{ bin_op.lhs, bin_op.rhs });
6589    }
6590
6591    return cg.finishAir(inst, div_result, &.{ bin_op.lhs, bin_op.rhs });
6592}
6593
6594fn airDivFloor(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6595    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6596
6597    const zcu = cg.pt.zcu;
6598    const ty = cg.typeOfIndex(inst);
6599    const lhs = try cg.resolveInst(bin_op.lhs);
6600    const rhs = try cg.resolveInst(bin_op.rhs);
6601
6602    if (ty.isUnsignedInt(zcu)) {
6603        _ = try cg.binOp(lhs, rhs, ty, .div);
6604    } else if (ty.isSignedInt(zcu)) {
6605        const int_bits = ty.intInfo(zcu).bits;
6606        const wasm_bits = toWasmBits(int_bits) orelse {
6607            return cg.fail("TODO: `@divFloor` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
6608        };
6609
6610        if (wasm_bits > 64) {
6611            return cg.fail("TODO: `@divFloor` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
6612        }
6613
6614        const zero: WValue = switch (wasm_bits) {
6615            32 => .{ .imm32 = 0 },
6616            64 => .{ .imm64 = 0 },
6617            else => unreachable,
6618        };
6619
6620        // tee leaves the value on the stack and stores it in a local.
6621        const quotient = try cg.allocLocal(ty);
6622        _ = try cg.binOp(lhs, rhs, ty, .div);
6623        try cg.addLocal(.local_tee, quotient.local.value);
6624
6625        // select takes a 32 bit value as the condition, so in the 64 bit case we use eqz to narrow
6626        // the 64 bit value we want to use as the condition to 32 bits.
6627        // This also inverts the condition (non 0 => 0, 0 => 1), so we put the adjusted and
6628        // non-adjusted quotients on the stack in the opposite order for 32 vs 64 bits.
6629        if (wasm_bits == 64) {
6630            try cg.emitWValue(quotient);
6631        }
6632
6633        // 0 if the signs of rhs_wasm and lhs_wasm are the same, 1 otherwise.
6634        _ = try cg.binOp(lhs, rhs, ty, .xor);
6635        _ = try cg.cmp(.stack, zero, ty, .lt);
6636
6637        switch (wasm_bits) {
6638            32 => {
6639                try cg.addTag(.i32_sub);
6640                try cg.emitWValue(quotient);
6641            },
6642            64 => {
6643                try cg.addTag(.i64_extend_i32_u);
6644                try cg.addTag(.i64_sub);
6645            },
6646            else => unreachable,
6647        }
6648
6649        _ = try cg.binOp(lhs, rhs, ty, .rem);
6650
6651        if (wasm_bits == 64) {
6652            try cg.addTag(.i64_eqz);
6653        }
6654
6655        try cg.addTag(.select);
6656
6657        // We need to zero the high bits because N bit comparisons consider all 32 or 64 bits, and
6658        // expect all but the lowest N bits to be 0.
6659        // TODO: Should we be zeroing the high bits here or should we be ignoring the high bits
6660        // when performing comparisons?
6661        if (int_bits != wasm_bits) {
6662            _ = try cg.wrapOperand(.stack, ty);
6663        }
6664    } else {
6665        const float_bits = ty.floatBits(cg.target);
6666        if (float_bits > 64) {
6667            return cg.fail("TODO: `@divFloor` for floats with bitsize: {d}", .{float_bits});
6668        }
6669        const is_f16 = float_bits == 16;
6670
6671        const lhs_wasm = if (is_f16) try cg.fpext(lhs, Type.f16, Type.f32) else lhs;
6672        const rhs_wasm = if (is_f16) try cg.fpext(rhs, Type.f16, Type.f32) else rhs;
6673
6674        try cg.emitWValue(lhs_wasm);
6675        try cg.emitWValue(rhs_wasm);
6676
6677        switch (float_bits) {
6678            16, 32 => {
6679                try cg.addTag(.f32_div);
6680                try cg.addTag(.f32_floor);
6681            },
6682            64 => {
6683                try cg.addTag(.f64_div);
6684                try cg.addTag(.f64_floor);
6685            },
6686            else => unreachable,
6687        }
6688
6689        if (is_f16) {
6690            _ = try cg.fptrunc(.stack, Type.f32, Type.f16);
6691        }
6692    }
6693
6694    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
6695}
6696
6697fn airRem(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6698    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6699
6700    const ty = cg.typeOfIndex(inst);
6701    const lhs = try cg.resolveInst(bin_op.lhs);
6702    const rhs = try cg.resolveInst(bin_op.rhs);
6703
6704    const result = try cg.binOp(lhs, rhs, ty, .rem);
6705
6706    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
6707}
6708
6709/// Remainder after floor division, defined by:
6710/// @divFloor(a, b) * b + @mod(a, b) = a
6711fn airMod(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6712    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6713
6714    const pt = cg.pt;
6715    const zcu = pt.zcu;
6716    const ty = cg.typeOfIndex(inst);
6717    const lhs = try cg.resolveInst(bin_op.lhs);
6718    const rhs = try cg.resolveInst(bin_op.rhs);
6719
6720    const result = result: {
6721        if (ty.isUnsignedInt(zcu)) {
6722            break :result try cg.binOp(lhs, rhs, ty, .rem);
6723        }
6724        if (ty.isSignedInt(zcu)) {
6725            // The wasm rem instruction gives the remainder after truncating division (rounding towards
6726            // 0), equivalent to @rem.
6727            // We make use of the fact that:
6728            // @mod(a, b) = @rem(@rem(a, b) + b, b)
6729            const int_bits = ty.intInfo(zcu).bits;
6730            const wasm_bits = toWasmBits(int_bits) orelse {
6731                return cg.fail("TODO: `@mod` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
6732            };
6733
6734            if (wasm_bits > 64) {
6735                return cg.fail("TODO: `@mod` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
6736            }
6737
6738            _ = try cg.binOp(lhs, rhs, ty, .rem);
6739            _ = try cg.binOp(.stack, rhs, ty, .add);
6740            break :result try cg.binOp(.stack, rhs, ty, .rem);
6741        }
6742        if (ty.isAnyFloat()) {
6743            const rem = try cg.binOp(lhs, rhs, ty, .rem);
6744            const add = try cg.binOp(rem, rhs, ty, .add);
6745            break :result try cg.binOp(add, rhs, ty, .rem);
6746        }
6747        return cg.fail("TODO: @mod for {f}", .{ty.fmt(pt)});
6748    };
6749
6750    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
6751}
6752
6753fn airSatMul(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6754    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6755
6756    const pt = cg.pt;
6757    const zcu = pt.zcu;
6758    const ty = cg.typeOfIndex(inst);
6759    const int_info = ty.intInfo(zcu);
6760    const is_signed = int_info.signedness == .signed;
6761
6762    const lhs = try cg.resolveInst(bin_op.lhs);
6763    const rhs = try cg.resolveInst(bin_op.rhs);
6764    const wasm_bits = toWasmBits(int_info.bits) orelse {
6765        return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)});
6766    };
6767
6768    switch (wasm_bits) {
6769        32 => {
6770            const upcast_ty: Type = if (is_signed) Type.i64 else Type.u64;
6771            const lhs_up = try cg.intcast(lhs, ty, upcast_ty);
6772            const rhs_up = try cg.intcast(rhs, ty, upcast_ty);
6773            var mul_res = try (try cg.binOp(lhs_up, rhs_up, upcast_ty, .mul)).toLocal(cg, upcast_ty);
6774            defer mul_res.free(cg);
6775            if (is_signed) {
6776                const imm_max: WValue = .{ .imm64 = ~@as(u64, 0) >> @intCast(64 - (int_info.bits - 1)) };
6777                try cg.emitWValue(mul_res);
6778                try cg.emitWValue(imm_max);
6779                _ = try cg.cmp(mul_res, imm_max, upcast_ty, .lt);
6780                try cg.addTag(.select);
6781
6782                var tmp = try cg.allocLocal(upcast_ty);
6783                defer tmp.free(cg);
6784                try cg.addLocal(.local_set, tmp.local.value);
6785
6786                const imm_min: WValue = .{ .imm64 = ~@as(u64, 0) << @intCast(int_info.bits - 1) };
6787                try cg.emitWValue(tmp);
6788                try cg.emitWValue(imm_min);
6789                _ = try cg.cmp(tmp, imm_min, upcast_ty, .gt);
6790                try cg.addTag(.select);
6791            } else {
6792                const imm_max: WValue = .{ .imm64 = ~@as(u64, 0) >> @intCast(64 - int_info.bits) };
6793                try cg.emitWValue(mul_res);
6794                try cg.emitWValue(imm_max);
6795                _ = try cg.cmp(mul_res, imm_max, upcast_ty, .lt);
6796                try cg.addTag(.select);
6797            }
6798            try cg.addTag(.i32_wrap_i64);
6799        },
6800        64 => {
6801            if (!(int_info.bits == 64 and int_info.signedness == .signed)) {
6802                return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)});
6803            }
6804            const overflow_ret = try cg.allocStack(Type.i32);
6805            _ = try cg.callIntrinsic(
6806                .__mulodi4,
6807                &[_]InternPool.Index{ .i64_type, .i64_type, .usize_type },
6808                Type.i64,
6809                &.{ lhs, rhs, overflow_ret },
6810            );
6811            const xor = try cg.binOp(lhs, rhs, Type.i64, .xor);
6812            const sign_v = try cg.binOp(xor, .{ .imm64 = 63 }, Type.i64, .shr);
6813            _ = try cg.binOp(sign_v, .{ .imm64 = ~@as(u63, 0) }, Type.i64, .xor);
6814            _ = try cg.load(overflow_ret, Type.i32, 0);
6815            try cg.addTag(.i32_eqz);
6816            try cg.addTag(.select);
6817        },
6818        128 => {
6819            if (!(int_info.bits == 128 and int_info.signedness == .signed)) {
6820                return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)});
6821            }
6822            const overflow_ret = try cg.allocStack(Type.i32);
6823            const ret = try cg.callIntrinsic(
6824                .__muloti4,
6825                &[_]InternPool.Index{ .i128_type, .i128_type, .usize_type },
6826                Type.i128,
6827                &.{ lhs, rhs, overflow_ret },
6828            );
6829            try cg.lowerToStack(ret);
6830            const xor = try cg.binOp(lhs, rhs, Type.i128, .xor);
6831            const sign_v = try cg.binOp(xor, .{ .imm32 = 127 }, Type.i128, .shr);
6832
6833            // xor ~@as(u127, 0)
6834            try cg.emitWValue(sign_v);
6835            const lsb = try cg.load(sign_v, Type.u64, 0);
6836            _ = try cg.binOp(lsb, .{ .imm64 = ~@as(u64, 0) }, Type.u64, .xor);
6837            try cg.store(.stack, .stack, Type.u64, sign_v.offset());
6838            try cg.emitWValue(sign_v);
6839            const msb = try cg.load(sign_v, Type.u64, 8);
6840            _ = try cg.binOp(msb, .{ .imm64 = ~@as(u63, 0) }, Type.u64, .xor);
6841            try cg.store(.stack, .stack, Type.u64, sign_v.offset() + 8);
6842
6843            try cg.lowerToStack(sign_v);
6844            _ = try cg.load(overflow_ret, Type.i32, 0);
6845            try cg.addTag(.i32_eqz);
6846            try cg.addTag(.select);
6847        },
6848        else => unreachable,
6849    }
6850    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
6851}
6852
6853fn airSatBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
6854    assert(op == .add or op == .sub);
6855    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6856
6857    const zcu = cg.pt.zcu;
6858    const ty = cg.typeOfIndex(inst);
6859    const lhs = try cg.resolveInst(bin_op.lhs);
6860    const rhs = try cg.resolveInst(bin_op.rhs);
6861
6862    const int_info = ty.intInfo(zcu);
6863    const is_signed = int_info.signedness == .signed;
6864
6865    if (int_info.bits > 64) {
6866        return cg.fail("TODO: saturating arithmetic for integers with bitsize '{d}'", .{int_info.bits});
6867    }
6868
6869    if (is_signed) {
6870        const result = try signedSat(cg, lhs, rhs, ty, op);
6871        return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
6872    }
6873
6874    const wasm_bits = toWasmBits(int_info.bits).?;
6875    var bin_result = try (try cg.binOp(lhs, rhs, ty, op)).toLocal(cg, ty);
6876    defer bin_result.free(cg);
6877    if (wasm_bits != int_info.bits and op == .add) {
6878        const val: u64 = @as(u64, @intCast((@as(u65, 1) << @as(u7, @intCast(int_info.bits))) - 1));
6879        const imm_val: WValue = switch (wasm_bits) {
6880            32 => .{ .imm32 = @intCast(val) },
6881            64 => .{ .imm64 = val },
6882            else => unreachable,
6883        };
6884
6885        try cg.emitWValue(bin_result);
6886        try cg.emitWValue(imm_val);
6887        _ = try cg.cmp(bin_result, imm_val, ty, .lt);
6888    } else {
6889        switch (wasm_bits) {
6890            32 => try cg.addImm32(if (op == .add) std.math.maxInt(u32) else 0),
6891            64 => try cg.addImm64(if (op == .add) std.math.maxInt(u64) else 0),
6892            else => unreachable,
6893        }
6894        try cg.emitWValue(bin_result);
6895        _ = try cg.cmp(bin_result, lhs, ty, if (op == .add) .lt else .gt);
6896    }
6897
6898    try cg.addTag(.select);
6899    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
6900}
6901
6902fn signedSat(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
6903    const pt = cg.pt;
6904    const zcu = pt.zcu;
6905    const int_info = ty.intInfo(zcu);
6906    const wasm_bits = toWasmBits(int_info.bits).?;
6907    const is_wasm_bits = wasm_bits == int_info.bits;
6908    const ext_ty = if (!is_wasm_bits) try pt.intType(int_info.signedness, wasm_bits) else ty;
6909
6910    const max_val: u64 = @as(u64, @intCast((@as(u65, 1) << @as(u7, @intCast(int_info.bits - 1))) - 1));
6911    const min_val: i64 = (-@as(i64, @intCast(@as(u63, @intCast(max_val))))) - 1;
6912    const max_wvalue: WValue = switch (wasm_bits) {
6913        32 => .{ .imm32 = @truncate(max_val) },
6914        64 => .{ .imm64 = max_val },
6915        else => unreachable,
6916    };
6917    const min_wvalue: WValue = switch (wasm_bits) {
6918        32 => .{ .imm32 = @bitCast(@as(i32, @truncate(min_val))) },
6919        64 => .{ .imm64 = @bitCast(min_val) },
6920        else => unreachable,
6921    };
6922
6923    var bin_result = try (try cg.binOp(lhs, rhs, ext_ty, op)).toLocal(cg, ext_ty);
6924    if (!is_wasm_bits) {
6925        defer bin_result.free(cg); // not returned in this branch
6926        try cg.emitWValue(bin_result);
6927        try cg.emitWValue(max_wvalue);
6928        _ = try cg.cmp(bin_result, max_wvalue, ext_ty, .lt);
6929        try cg.addTag(.select);
6930        try cg.addLocal(.local_set, bin_result.local.value); // re-use local
6931
6932        try cg.emitWValue(bin_result);
6933        try cg.emitWValue(min_wvalue);
6934        _ = try cg.cmp(bin_result, min_wvalue, ext_ty, .gt);
6935        try cg.addTag(.select);
6936        try cg.addLocal(.local_set, bin_result.local.value); // re-use local
6937        return (try cg.wrapOperand(bin_result, ty)).toLocal(cg, ty);
6938    } else {
6939        const zero: WValue = switch (wasm_bits) {
6940            32 => .{ .imm32 = 0 },
6941            64 => .{ .imm64 = 0 },
6942            else => unreachable,
6943        };
6944        try cg.emitWValue(max_wvalue);
6945        try cg.emitWValue(min_wvalue);
6946        _ = try cg.cmp(bin_result, zero, ty, .lt);
6947        try cg.addTag(.select);
6948        try cg.emitWValue(bin_result);
6949        // leave on stack
6950        const cmp_zero_result = try cg.cmp(rhs, zero, ty, if (op == .add) .lt else .gt);
6951        const cmp_bin_result = try cg.cmp(bin_result, lhs, ty, .lt);
6952        _ = try cg.binOp(cmp_zero_result, cmp_bin_result, Type.u32, .xor); // comparisons always return i32, so provide u32 as type to xor.
6953        try cg.addTag(.select);
6954        try cg.addLocal(.local_set, bin_result.local.value); // re-use local
6955        return bin_result;
6956    }
6957}
6958
6959fn airShlSat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
6960    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
6961
6962    const pt = cg.pt;
6963    const zcu = pt.zcu;
6964
6965    if (cg.typeOf(bin_op.lhs).isVector(zcu) and !cg.typeOf(bin_op.rhs).isVector(zcu)) {
6966        return cg.fail("TODO: implement vector 'shl_sat' with scalar rhs", .{});
6967    }
6968
6969    const ty = cg.typeOfIndex(inst);
6970    const int_info = ty.intInfo(zcu);
6971    const is_signed = int_info.signedness == .signed;
6972    if (int_info.bits > 64) {
6973        return cg.fail("TODO: Saturating shifting left for integers with bitsize '{d}'", .{int_info.bits});
6974    }
6975
6976    const lhs = try cg.resolveInst(bin_op.lhs);
6977    const rhs = try cg.resolveInst(bin_op.rhs);
6978    const wasm_bits = toWasmBits(int_info.bits).?;
6979    const result = try cg.allocLocal(ty);
6980
6981    if (wasm_bits == int_info.bits) {
6982        var shl = try (try cg.binOp(lhs, rhs, ty, .shl)).toLocal(cg, ty);
6983        defer shl.free(cg);
6984        var shr = try (try cg.binOp(shl, rhs, ty, .shr)).toLocal(cg, ty);
6985        defer shr.free(cg);
6986
6987        switch (wasm_bits) {
6988            32 => blk: {
6989                if (!is_signed) {
6990                    try cg.addImm32(std.math.maxInt(u32));
6991                    break :blk;
6992                }
6993                try cg.addImm32(@bitCast(@as(i32, std.math.minInt(i32))));
6994                try cg.addImm32(@bitCast(@as(i32, std.math.maxInt(i32))));
6995                _ = try cg.cmp(lhs, .{ .imm32 = 0 }, ty, .lt);
6996                try cg.addTag(.select);
6997            },
6998            64 => blk: {
6999                if (!is_signed) {
7000                    try cg.addImm64(std.math.maxInt(u64));
7001                    break :blk;
7002                }
7003                try cg.addImm64(@bitCast(@as(i64, std.math.minInt(i64))));
7004                try cg.addImm64(@bitCast(@as(i64, std.math.maxInt(i64))));
7005                _ = try cg.cmp(lhs, .{ .imm64 = 0 }, ty, .lt);
7006                try cg.addTag(.select);
7007            },
7008            else => unreachable,
7009        }
7010        try cg.emitWValue(shl);
7011        _ = try cg.cmp(lhs, shr, ty, .neq);
7012        try cg.addTag(.select);
7013        try cg.addLocal(.local_set, result.local.value);
7014    } else {
7015        const shift_size = wasm_bits - int_info.bits;
7016        const shift_value: WValue = switch (wasm_bits) {
7017            32 => .{ .imm32 = shift_size },
7018            64 => .{ .imm64 = shift_size },
7019            else => unreachable,
7020        };
7021        const ext_ty = try pt.intType(int_info.signedness, wasm_bits);
7022
7023        var shl_res = try (try cg.binOp(lhs, shift_value, ext_ty, .shl)).toLocal(cg, ext_ty);
7024        defer shl_res.free(cg);
7025        var shl = try (try cg.binOp(shl_res, rhs, ext_ty, .shl)).toLocal(cg, ext_ty);
7026        defer shl.free(cg);
7027        var shr = try (try cg.binOp(shl, rhs, ext_ty, .shr)).toLocal(cg, ext_ty);
7028        defer shr.free(cg);
7029
7030        switch (wasm_bits) {
7031            32 => blk: {
7032                if (!is_signed) {
7033                    try cg.addImm32(std.math.maxInt(u32));
7034                    break :blk;
7035                }
7036
7037                try cg.addImm32(@bitCast(@as(i32, std.math.minInt(i32))));
7038                try cg.addImm32(@bitCast(@as(i32, std.math.maxInt(i32))));
7039                _ = try cg.cmp(shl_res, .{ .imm32 = 0 }, ext_ty, .lt);
7040                try cg.addTag(.select);
7041            },
7042            64 => blk: {
7043                if (!is_signed) {
7044                    try cg.addImm64(std.math.maxInt(u64));
7045                    break :blk;
7046                }
7047
7048                try cg.addImm64(@bitCast(@as(i64, std.math.minInt(i64))));
7049                try cg.addImm64(@bitCast(@as(i64, std.math.maxInt(i64))));
7050                _ = try cg.cmp(shl_res, .{ .imm64 = 0 }, ext_ty, .lt);
7051                try cg.addTag(.select);
7052            },
7053            else => unreachable,
7054        }
7055        try cg.emitWValue(shl);
7056        _ = try cg.cmp(shl_res, shr, ext_ty, .neq);
7057        try cg.addTag(.select);
7058        try cg.addLocal(.local_set, result.local.value);
7059        var shift_result = try cg.binOp(result, shift_value, ext_ty, .shr);
7060        if (is_signed) {
7061            shift_result = try cg.wrapOperand(shift_result, ty);
7062        }
7063        try cg.addLocal(.local_set, result.local.value);
7064    }
7065
7066    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
7067}
7068
7069/// Calls a compiler-rt intrinsic by creating an undefined symbol,
7070/// then lowering the arguments and calling the symbol as a function call.
7071/// This function call assumes the C-ABI.
7072/// Asserts arguments are not stack values when the return value is
7073/// passed as the first parameter.
7074/// May leave the return value on the stack.
7075fn callIntrinsic(
7076    cg: *CodeGen,
7077    intrinsic: Mir.Intrinsic,
7078    param_types: []const InternPool.Index,
7079    return_type: Type,
7080    args: []const WValue,
7081) InnerError!WValue {
7082    assert(param_types.len == args.len);
7083    const zcu = cg.pt.zcu;
7084
7085    // Always pass over C-ABI
7086
7087    const want_sret_param = firstParamSRet(.{ .wasm_mvp = .{} }, return_type, zcu, cg.target);
7088    // if we want return as first param, we allocate a pointer to stack,
7089    // and emit it as our first argument
7090    const sret = if (want_sret_param) blk: {
7091        const sret_local = try cg.allocStack(return_type);
7092        try cg.lowerToStack(sret_local);
7093        break :blk sret_local;
7094    } else .none;
7095
7096    // Lower all arguments to the stack before we call our function
7097    for (args, 0..) |arg, arg_i| {
7098        assert(!(want_sret_param and arg == .stack));
7099        assert(Type.fromInterned(param_types[arg_i]).hasRuntimeBitsIgnoreComptime(zcu));
7100        try cg.lowerArg(.{ .wasm_mvp = .{} }, Type.fromInterned(param_types[arg_i]), arg);
7101    }
7102
7103    try cg.addInst(.{ .tag = .call_intrinsic, .data = .{ .intrinsic = intrinsic } });
7104
7105    if (!return_type.hasRuntimeBitsIgnoreComptime(zcu)) {
7106        return .none;
7107    } else if (return_type.isNoReturn(zcu)) {
7108        try cg.addTag(.@"unreachable");
7109        return .none;
7110    } else if (want_sret_param) {
7111        return sret;
7112    } else {
7113        return .stack;
7114    }
7115}
7116
7117fn airTagName(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7118    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
7119    const operand = try cg.resolveInst(un_op);
7120    const enum_ty = cg.typeOf(un_op);
7121
7122    const result_ptr = try cg.allocStack(cg.typeOfIndex(inst));
7123    try cg.lowerToStack(result_ptr);
7124    try cg.emitWValue(operand);
7125    try cg.addInst(.{ .tag = .call_tag_name, .data = .{ .ip_index = enum_ty.toIntern() } });
7126
7127    return cg.finishAir(inst, result_ptr, &.{un_op});
7128}
7129
7130fn airErrorSetHasValue(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7131    const zcu = cg.pt.zcu;
7132    const ip = &zcu.intern_pool;
7133    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
7134
7135    const operand = try cg.resolveInst(ty_op.operand);
7136    const error_set_ty = ty_op.ty.toType();
7137    const result = try cg.allocLocal(Type.bool);
7138
7139    const names = error_set_ty.errorSetNames(zcu);
7140    var values = try std.array_list.Managed(u32).initCapacity(cg.gpa, names.len);
7141    defer values.deinit();
7142
7143    var lowest: ?u32 = null;
7144    var highest: ?u32 = null;
7145    for (0..names.len) |name_index| {
7146        const err_int = ip.getErrorValueIfExists(names.get(ip)[name_index]).?;
7147        if (lowest) |*l| {
7148            if (err_int < l.*) {
7149                l.* = err_int;
7150            }
7151        } else {
7152            lowest = err_int;
7153        }
7154        if (highest) |*h| {
7155            if (err_int > h.*) {
7156                highest = err_int;
7157            }
7158        } else {
7159            highest = err_int;
7160        }
7161
7162        values.appendAssumeCapacity(err_int);
7163    }
7164
7165    // start block for 'true' branch
7166    try cg.startBlock(.block, .empty);
7167    // start block for 'false' branch
7168    try cg.startBlock(.block, .empty);
7169    // block for the jump table itself
7170    try cg.startBlock(.block, .empty);
7171
7172    // lower operand to determine jump table target
7173    try cg.emitWValue(operand);
7174    try cg.addImm32(lowest.?);
7175    try cg.addTag(.i32_sub);
7176
7177    // Account for default branch so always add '1'
7178    const depth = @as(u32, @intCast(highest.? - lowest.? + 1));
7179    const jump_table: Mir.JumpTable = .{ .length = depth };
7180    const table_extra_index = try cg.addExtra(jump_table);
7181    try cg.addInst(.{ .tag = .br_table, .data = .{ .payload = table_extra_index } });
7182    try cg.mir_extra.ensureUnusedCapacity(cg.gpa, depth);
7183
7184    var value: u32 = lowest.?;
7185    while (value <= highest.?) : (value += 1) {
7186        const idx: u32 = blk: {
7187            for (values.items) |val| {
7188                if (val == value) break :blk 1;
7189            }
7190            break :blk 0;
7191        };
7192        cg.mir_extra.appendAssumeCapacity(idx);
7193    }
7194    try cg.endBlock();
7195
7196    // 'false' branch (i.e. error set does not have value
7197    // ensure we set local to 0 in case the local was re-used.
7198    try cg.addImm32(0);
7199    try cg.addLocal(.local_set, result.local.value);
7200    try cg.addLabel(.br, 1);
7201    try cg.endBlock();
7202
7203    // 'true' branch
7204    try cg.addImm32(1);
7205    try cg.addLocal(.local_set, result.local.value);
7206    try cg.addLabel(.br, 0);
7207    try cg.endBlock();
7208
7209    return cg.finishAir(inst, result, &.{ty_op.operand});
7210}
7211
7212inline fn useAtomicFeature(cg: *const CodeGen) bool {
7213    return cg.target.cpu.has(.wasm, .atomics);
7214}
7215
7216fn airCmpxchg(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7217    const zcu = cg.pt.zcu;
7218    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
7219    const extra = cg.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
7220
7221    const ptr_ty = cg.typeOf(extra.ptr);
7222    const ty = ptr_ty.childType(zcu);
7223    const result_ty = cg.typeOfIndex(inst);
7224
7225    const ptr_operand = try cg.resolveInst(extra.ptr);
7226    const expected_val = try cg.resolveInst(extra.expected_value);
7227    const new_val = try cg.resolveInst(extra.new_value);
7228
7229    const cmp_result = try cg.allocLocal(Type.bool);
7230
7231    const ptr_val = if (cg.useAtomicFeature()) val: {
7232        const val_local = try cg.allocLocal(ty);
7233        try cg.emitWValue(ptr_operand);
7234        try cg.lowerToStack(expected_val);
7235        try cg.lowerToStack(new_val);
7236        try cg.addAtomicMemArg(switch (ty.abiSize(zcu)) {
7237            1 => .i32_atomic_rmw8_cmpxchg_u,
7238            2 => .i32_atomic_rmw16_cmpxchg_u,
7239            4 => .i32_atomic_rmw_cmpxchg,
7240            8 => .i32_atomic_rmw_cmpxchg,
7241            else => |size| return cg.fail("TODO: implement `@cmpxchg` for types with abi size '{d}'", .{size}),
7242        }, .{
7243            .offset = ptr_operand.offset(),
7244            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
7245        });
7246        try cg.addLocal(.local_tee, val_local.local.value);
7247        _ = try cg.cmp(.stack, expected_val, ty, .eq);
7248        try cg.addLocal(.local_set, cmp_result.local.value);
7249        break :val val_local;
7250    } else val: {
7251        if (ty.abiSize(zcu) > 8) {
7252            return cg.fail("TODO: Implement `@cmpxchg` for types larger than abi size of 8 bytes", .{});
7253        }
7254        const ptr_val = try WValue.toLocal(try cg.load(ptr_operand, ty, 0), cg, ty);
7255
7256        try cg.lowerToStack(ptr_operand);
7257        try cg.lowerToStack(new_val);
7258        try cg.emitWValue(ptr_val);
7259        _ = try cg.cmp(ptr_val, expected_val, ty, .eq);
7260        try cg.addLocal(.local_tee, cmp_result.local.value);
7261        try cg.addTag(.select);
7262        try cg.store(.stack, .stack, ty, 0);
7263
7264        break :val ptr_val;
7265    };
7266
7267    const result = if (isByRef(result_ty, zcu, cg.target)) val: {
7268        try cg.emitWValue(cmp_result);
7269        try cg.addImm32(~@as(u32, 0));
7270        try cg.addTag(.i32_xor);
7271        try cg.addImm32(1);
7272        try cg.addTag(.i32_and);
7273        const and_result = try WValue.toLocal(.stack, cg, Type.bool);
7274        const result_ptr = try cg.allocStack(result_ty);
7275        try cg.store(result_ptr, and_result, Type.bool, @as(u32, @intCast(ty.abiSize(zcu))));
7276        try cg.store(result_ptr, ptr_val, ty, 0);
7277        break :val result_ptr;
7278    } else val: {
7279        try cg.addImm32(0);
7280        try cg.emitWValue(ptr_val);
7281        try cg.emitWValue(cmp_result);
7282        try cg.addTag(.select);
7283        break :val .stack;
7284    };
7285
7286    return cg.finishAir(inst, result, &.{ extra.ptr, extra.expected_value, extra.new_value });
7287}
7288
7289fn airAtomicLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7290    const zcu = cg.pt.zcu;
7291    const atomic_load = cg.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load;
7292    const ptr = try cg.resolveInst(atomic_load.ptr);
7293    const ty = cg.typeOfIndex(inst);
7294
7295    if (cg.useAtomicFeature()) {
7296        const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) {
7297            1 => .i32_atomic_load8_u,
7298            2 => .i32_atomic_load16_u,
7299            4 => .i32_atomic_load,
7300            8 => .i64_atomic_load,
7301            else => |size| return cg.fail("TODO: @atomicLoad for types with abi size {d}", .{size}),
7302        };
7303        try cg.emitWValue(ptr);
7304        try cg.addAtomicMemArg(tag, .{
7305            .offset = ptr.offset(),
7306            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
7307        });
7308    } else {
7309        _ = try cg.load(ptr, ty, 0);
7310    }
7311
7312    return cg.finishAir(inst, .stack, &.{atomic_load.ptr});
7313}
7314
7315fn airAtomicRmw(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7316    const zcu = cg.pt.zcu;
7317    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
7318    const extra = cg.air.extraData(Air.AtomicRmw, pl_op.payload).data;
7319
7320    const ptr = try cg.resolveInst(pl_op.operand);
7321    const operand = try cg.resolveInst(extra.operand);
7322    const ty = cg.typeOfIndex(inst);
7323    const op: std.builtin.AtomicRmwOp = extra.op();
7324
7325    if (cg.useAtomicFeature()) {
7326        switch (op) {
7327            .Max,
7328            .Min,
7329            .Nand,
7330            => {
7331                const tmp = try cg.load(ptr, ty, 0);
7332                const value = try tmp.toLocal(cg, ty);
7333
7334                // create a loop to cmpxchg the new value
7335                try cg.startBlock(.loop, .empty);
7336
7337                try cg.emitWValue(ptr);
7338                try cg.emitWValue(value);
7339                if (op == .Nand) {
7340                    const wasm_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?;
7341
7342                    const and_res = try cg.binOp(value, operand, ty, .@"and");
7343                    if (wasm_bits == 32)
7344                        try cg.addImm32(~@as(u32, 0))
7345                    else if (wasm_bits == 64)
7346                        try cg.addImm64(~@as(u64, 0))
7347                    else
7348                        return cg.fail("TODO: `@atomicRmw` with operator `Nand` for types larger than 64 bits", .{});
7349                    _ = try cg.binOp(and_res, .stack, ty, .xor);
7350                } else {
7351                    try cg.emitWValue(value);
7352                    try cg.emitWValue(operand);
7353                    _ = try cg.cmp(value, operand, ty, if (op == .Max) .gt else .lt);
7354                    try cg.addTag(.select);
7355                }
7356                try cg.addAtomicMemArg(
7357                    switch (ty.abiSize(zcu)) {
7358                        1 => .i32_atomic_rmw8_cmpxchg_u,
7359                        2 => .i32_atomic_rmw16_cmpxchg_u,
7360                        4 => .i32_atomic_rmw_cmpxchg,
7361                        8 => .i64_atomic_rmw_cmpxchg,
7362                        else => return cg.fail("TODO: implement `@atomicRmw` with operation `{s}` for types larger than 64 bits", .{@tagName(op)}),
7363                    },
7364                    .{
7365                        .offset = ptr.offset(),
7366                        .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
7367                    },
7368                );
7369                const select_res = try cg.allocLocal(ty);
7370                try cg.addLocal(.local_tee, select_res.local.value);
7371                _ = try cg.cmp(.stack, value, ty, .neq); // leave on stack so we can use it for br_if
7372
7373                try cg.emitWValue(select_res);
7374                try cg.addLocal(.local_set, value.local.value);
7375
7376                try cg.addLabel(.br_if, 0);
7377                try cg.endBlock();
7378                return cg.finishAir(inst, value, &.{ pl_op.operand, extra.operand });
7379            },
7380
7381            // the other operations have their own instructions for Wasm.
7382            else => {
7383                try cg.emitWValue(ptr);
7384                try cg.emitWValue(operand);
7385                const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) {
7386                    1 => switch (op) {
7387                        .Xchg => .i32_atomic_rmw8_xchg_u,
7388                        .Add => .i32_atomic_rmw8_add_u,
7389                        .Sub => .i32_atomic_rmw8_sub_u,
7390                        .And => .i32_atomic_rmw8_and_u,
7391                        .Or => .i32_atomic_rmw8_or_u,
7392                        .Xor => .i32_atomic_rmw8_xor_u,
7393                        else => unreachable,
7394                    },
7395                    2 => switch (op) {
7396                        .Xchg => .i32_atomic_rmw16_xchg_u,
7397                        .Add => .i32_atomic_rmw16_add_u,
7398                        .Sub => .i32_atomic_rmw16_sub_u,
7399                        .And => .i32_atomic_rmw16_and_u,
7400                        .Or => .i32_atomic_rmw16_or_u,
7401                        .Xor => .i32_atomic_rmw16_xor_u,
7402                        else => unreachable,
7403                    },
7404                    4 => switch (op) {
7405                        .Xchg => .i32_atomic_rmw_xchg,
7406                        .Add => .i32_atomic_rmw_add,
7407                        .Sub => .i32_atomic_rmw_sub,
7408                        .And => .i32_atomic_rmw_and,
7409                        .Or => .i32_atomic_rmw_or,
7410                        .Xor => .i32_atomic_rmw_xor,
7411                        else => unreachable,
7412                    },
7413                    8 => switch (op) {
7414                        .Xchg => .i64_atomic_rmw_xchg,
7415                        .Add => .i64_atomic_rmw_add,
7416                        .Sub => .i64_atomic_rmw_sub,
7417                        .And => .i64_atomic_rmw_and,
7418                        .Or => .i64_atomic_rmw_or,
7419                        .Xor => .i64_atomic_rmw_xor,
7420                        else => unreachable,
7421                    },
7422                    else => |size| return cg.fail("TODO: Implement `@atomicRmw` for types with abi size {d}", .{size}),
7423                };
7424                try cg.addAtomicMemArg(tag, .{
7425                    .offset = ptr.offset(),
7426                    .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
7427                });
7428                return cg.finishAir(inst, .stack, &.{ pl_op.operand, extra.operand });
7429            },
7430        }
7431    } else {
7432        const loaded = try cg.load(ptr, ty, 0);
7433        const result = try loaded.toLocal(cg, ty);
7434
7435        switch (op) {
7436            .Xchg => {
7437                try cg.store(ptr, operand, ty, 0);
7438            },
7439            .Add,
7440            .Sub,
7441            .And,
7442            .Or,
7443            .Xor,
7444            => {
7445                try cg.emitWValue(ptr);
7446                _ = try cg.binOp(result, operand, ty, switch (op) {
7447                    .Add => .add,
7448                    .Sub => .sub,
7449                    .And => .@"and",
7450                    .Or => .@"or",
7451                    .Xor => .xor,
7452                    else => unreachable,
7453                });
7454                if (ty.isInt(zcu) and (op == .Add or op == .Sub)) {
7455                    _ = try cg.wrapOperand(.stack, ty);
7456                }
7457                try cg.store(.stack, .stack, ty, ptr.offset());
7458            },
7459            .Max,
7460            .Min,
7461            => {
7462                try cg.emitWValue(ptr);
7463                try cg.emitWValue(result);
7464                try cg.emitWValue(operand);
7465                _ = try cg.cmp(result, operand, ty, if (op == .Max) .gt else .lt);
7466                try cg.addTag(.select);
7467                try cg.store(.stack, .stack, ty, ptr.offset());
7468            },
7469            .Nand => {
7470                const wasm_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?;
7471
7472                try cg.emitWValue(ptr);
7473                const and_res = try cg.binOp(result, operand, ty, .@"and");
7474                if (wasm_bits == 32)
7475                    try cg.addImm32(~@as(u32, 0))
7476                else if (wasm_bits == 64)
7477                    try cg.addImm64(~@as(u64, 0))
7478                else
7479                    return cg.fail("TODO: `@atomicRmw` with operator `Nand` for types larger than 64 bits", .{});
7480                _ = try cg.binOp(and_res, .stack, ty, .xor);
7481                try cg.store(.stack, .stack, ty, ptr.offset());
7482            },
7483        }
7484
7485        return cg.finishAir(inst, result, &.{ pl_op.operand, extra.operand });
7486    }
7487}
7488
7489fn airAtomicStore(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7490    const zcu = cg.pt.zcu;
7491    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
7492
7493    const ptr = try cg.resolveInst(bin_op.lhs);
7494    const operand = try cg.resolveInst(bin_op.rhs);
7495    const ptr_ty = cg.typeOf(bin_op.lhs);
7496    const ty = ptr_ty.childType(zcu);
7497
7498    if (cg.useAtomicFeature()) {
7499        const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) {
7500            1 => .i32_atomic_store8,
7501            2 => .i32_atomic_store16,
7502            4 => .i32_atomic_store,
7503            8 => .i64_atomic_store,
7504            else => |size| return cg.fail("TODO: @atomicLoad for types with abi size {d}", .{size}),
7505        };
7506        try cg.emitWValue(ptr);
7507        try cg.lowerToStack(operand);
7508        try cg.addAtomicMemArg(tag, .{
7509            .offset = ptr.offset(),
7510            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
7511        });
7512    } else {
7513        try cg.store(ptr, operand, ty, 0);
7514    }
7515
7516    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
7517}
7518
7519fn airFrameAddress(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7520    if (cg.initial_stack_value == .none) {
7521        try cg.initializeStack();
7522    }
7523    try cg.emitWValue(cg.bottom_stack_value);
7524    return cg.finishAir(inst, .stack, &.{});
7525}
7526
7527fn airRuntimeNavPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
7528    const ty_nav = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_nav;
7529    const mod = cg.pt.zcu.navFileScope(cg.owner_nav).mod.?;
7530    if (mod.single_threaded) {
7531        const result: WValue = .{ .nav_ref = .{
7532            .nav_index = ty_nav.nav,
7533            .offset = 0,
7534        } };
7535        return cg.finishAir(inst, result, &.{});
7536    }
7537    return cg.fail("TODO: thread-local variables", .{});
7538}
7539
7540fn typeOf(cg: *CodeGen, inst: Air.Inst.Ref) Type {
7541    const zcu = cg.pt.zcu;
7542    return cg.air.typeOf(inst, &zcu.intern_pool);
7543}
7544
7545fn typeOfIndex(cg: *CodeGen, inst: Air.Inst.Index) Type {
7546    const zcu = cg.pt.zcu;
7547    return cg.air.typeOfIndex(inst, &zcu.intern_pool);
7548}
7549
7550fn floatCmpIntrinsic(op: std.math.CompareOperator, bits: u16) Mir.Intrinsic {
7551    return switch (op) {
7552        .lt => switch (bits) {
7553            80 => .__ltxf2,
7554            128 => .__lttf2,
7555            else => unreachable,
7556        },
7557        .lte => switch (bits) {
7558            80 => .__lexf2,
7559            128 => .__letf2,
7560            else => unreachable,
7561        },
7562        .eq => switch (bits) {
7563            80 => .__eqxf2,
7564            128 => .__eqtf2,
7565            else => unreachable,
7566        },
7567        .neq => switch (bits) {
7568            80 => .__nexf2,
7569            128 => .__netf2,
7570            else => unreachable,
7571        },
7572        .gte => switch (bits) {
7573            80 => .__gexf2,
7574            128 => .__getf2,
7575            else => unreachable,
7576        },
7577        .gt => switch (bits) {
7578            80 => .__gtxf2,
7579            128 => .__gttf2,
7580            else => unreachable,
7581        },
7582    };
7583}