Commit 771523c675

Jacob Young <jacobly0@users.noreply.github.com>
2025-07-27 12:50:20
aarch64: implement var args
1 parent da408bd
Changed files (4)
src
test
behavior
src/codegen/aarch64/encoding.zig
@@ -10089,26 +10089,26 @@ pub const Instruction = packed union {
                             },
                         } } } };
                     },
-                    .signed_offset => |signed_offset| {
-                        assert(signed_offset.base.format.integer == .doubleword);
-                        return .{ .load_store = .{ .register_pair_offset = .{ .integer = .{
+                    .pre_index => |pre_index| {
+                        assert(pre_index.base.format.integer == .doubleword);
+                        return .{ .load_store = .{ .register_pair_pre_indexed = .{ .integer = .{
                             .ldp = .{
                                 .Rt = t1.alias.encode(.{}),
-                                .Rn = signed_offset.base.alias.encode(.{ .sp = true }),
+                                .Rn = pre_index.base.alias.encode(.{ .sp = true }),
                                 .Rt2 = t2.alias.encode(.{}),
-                                .imm7 = @intCast(@shrExact(signed_offset.offset, @as(u2, 2) + @intFromEnum(sf))),
+                                .imm7 = @intCast(@shrExact(pre_index.index, @as(u2, 2) + @intFromEnum(sf))),
                                 .sf = sf,
                             },
                         } } } };
                     },
-                    .pre_index => |pre_index| {
-                        assert(pre_index.base.format.integer == .doubleword);
-                        return .{ .load_store = .{ .register_pair_pre_indexed = .{ .integer = .{
+                    .signed_offset => |signed_offset| {
+                        assert(signed_offset.base.format.integer == .doubleword);
+                        return .{ .load_store = .{ .register_pair_offset = .{ .integer = .{
                             .ldp = .{
                                 .Rt = t1.alias.encode(.{}),
-                                .Rn = pre_index.base.alias.encode(.{ .sp = true }),
+                                .Rn = signed_offset.base.alias.encode(.{ .sp = true }),
                                 .Rt2 = t2.alias.encode(.{}),
-                                .imm7 = @intCast(@shrExact(pre_index.index, @as(u2, 2) + @intFromEnum(sf))),
+                                .imm7 = @intCast(@shrExact(signed_offset.offset, @as(u2, 2) + @intFromEnum(sf))),
                                 .sf = sf,
                             },
                         } } } };
@@ -11473,26 +11473,26 @@ pub const Instruction = packed union {
                             },
                         } } } };
                     },
-                    .signed_offset => |signed_offset| {
-                        assert(signed_offset.base.format.integer == .doubleword);
-                        return .{ .load_store = .{ .register_pair_offset = .{ .integer = .{
+                    .pre_index => |pre_index| {
+                        assert(pre_index.base.format.integer == .doubleword);
+                        return .{ .load_store = .{ .register_pair_pre_indexed = .{ .integer = .{
                             .stp = .{
                                 .Rt = t1.alias.encode(.{}),
-                                .Rn = signed_offset.base.alias.encode(.{ .sp = true }),
+                                .Rn = pre_index.base.alias.encode(.{ .sp = true }),
                                 .Rt2 = t2.alias.encode(.{}),
-                                .imm7 = @intCast(@shrExact(signed_offset.offset, @as(u2, 2) + @intFromEnum(sf))),
+                                .imm7 = @intCast(@shrExact(pre_index.index, @as(u2, 2) + @intFromEnum(sf))),
                                 .sf = sf,
                             },
                         } } } };
                     },
-                    .pre_index => |pre_index| {
-                        assert(pre_index.base.format.integer == .doubleword);
-                        return .{ .load_store = .{ .register_pair_pre_indexed = .{ .integer = .{
+                    .signed_offset => |signed_offset| {
+                        assert(signed_offset.base.format.integer == .doubleword);
+                        return .{ .load_store = .{ .register_pair_offset = .{ .integer = .{
                             .stp = .{
                                 .Rt = t1.alias.encode(.{}),
-                                .Rn = pre_index.base.alias.encode(.{ .sp = true }),
+                                .Rn = signed_offset.base.alias.encode(.{ .sp = true }),
                                 .Rt2 = t2.alias.encode(.{}),
-                                .imm7 = @intCast(@shrExact(pre_index.index, @as(u2, 2) + @intFromEnum(sf))),
+                                .imm7 = @intCast(@shrExact(signed_offset.offset, @as(u2, 2) + @intFromEnum(sf))),
                                 .sf = sf,
                             },
                         } } } };
src/codegen/aarch64/Select.zig
@@ -28,10 +28,15 @@ literal_relocs: std.ArrayListUnmanaged(codegen.aarch64.Mir.Reloc.Literal),
 
 // Stack Frame
 returns: bool,
-va_list: struct {
-    __stack: Value.Indirect,
-    __gr_top: Value.Indirect,
-    __vr_top: Value.Indirect,
+va_list: union(enum) {
+    other: Value.Indirect,
+    sysv: struct {
+        __stack: Value.Indirect,
+        __gr_top: Value.Indirect,
+        __vr_top: Value.Indirect,
+        __gr_offs: i32,
+        __vr_offs: i32,
+    },
 },
 stack_size: u24,
 stack_align: InternPool.Alignment,
@@ -408,13 +413,7 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void {
 
             air_body_index += 1;
         },
-        .breakpoint,
-        .dbg_stmt,
-        .dbg_empty_stmt,
-        .dbg_var_ptr,
-        .dbg_var_val,
-        .dbg_arg_inline,
-        => {
+        .breakpoint, .dbg_stmt, .dbg_empty_stmt, .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline, .c_va_end => {
             air_body_index += 1;
             air_inst_index = air_body[air_body_index];
             continue :air_tag air_tags[@intFromEnum(air_inst_index)];
@@ -428,23 +427,43 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void {
             const extra = isel.air.extraData(Air.Call, pl_op.payload);
             const args: []const Air.Inst.Ref = @ptrCast(isel.air.extra.items[extra.end..][0..extra.data.args_len]);
             isel.saved_registers.insert(.lr);
+            const callee_ty = isel.air.typeOf(pl_op.operand, ip);
+            const func_info = switch (ip.indexToKey(callee_ty.toIntern())) {
+                else => unreachable,
+                .func_type => |func_type| func_type,
+                .ptr_type => |ptr_type| ip.indexToKey(ptr_type.child).func_type,
+            };
 
             try isel.analyzeUse(pl_op.operand);
             var param_it: CallAbiIterator = .init;
-            for (args) |arg| {
+            for (args, 0..) |arg, arg_index| {
                 const restore_values_len = isel.values.items.len;
                 defer isel.values.shrinkRetainingCapacity(restore_values_len);
-                const param_vi = try param_it.param(isel, isel.air.typeOf(arg, ip)) orelse continue;
-                const param_parent = param_vi.parent(isel);
-                switch (switch (param_parent) {
-                    .unallocated, .stack_slot => param_parent,
+                const param_vi = param_vi: {
+                    const param_ty = isel.air.typeOf(arg, ip);
+                    if (arg_index >= func_info.param_types.len) {
+                        assert(func_info.is_var_args);
+                        switch (isel.va_list) {
+                            .other => break :param_vi try param_it.nonSysvVarArg(isel, param_ty),
+                            .sysv => {},
+                        }
+                    }
+                    break :param_vi try param_it.param(isel, param_ty);
+                } orelse continue;
+                defer param_vi.deref(isel);
+                const passed_vi = switch (param_vi.parent(isel)) {
+                    .unallocated, .stack_slot => param_vi,
                     .value, .constant => unreachable,
-                    .address => |address_vi| address_vi.parent(isel),
-                }) {
+                    .address => |address_vi| address_vi,
+                };
+                switch (passed_vi.parent(isel)) {
                     .unallocated => {},
                     .stack_slot => |stack_slot| {
                         assert(stack_slot.base == .sp);
-                        isel.stack_size = @max(isel.stack_size, stack_slot.offset);
+                        isel.stack_size = @max(
+                            isel.stack_size,
+                            stack_slot.offset + @as(u24, @intCast(passed_vi.size(isel))),
+                        );
                     },
                     .value, .constant, .address => unreachable,
                 }
@@ -802,7 +821,7 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void {
             air_inst_index = air_body[air_body_index];
             continue :air_tag air_tags[@intFromEnum(air_inst_index)];
         },
-        .set_err_return_trace, .c_va_end => {
+        .set_err_return_trace => {
             const un_op = air_data[@intFromEnum(air_inst_index)].un_op;
 
             try isel.analyzeUse(un_op);
@@ -2474,6 +2493,7 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
             }
             if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
         },
+        .inferred_alloc, .inferred_alloc_comptime => unreachable,
         .assembly => {
             const ty_pl = air.data(air.inst_index).ty_pl;
             const extra = isel.air.extraData(Air.Asm, ty_pl.payload);
@@ -3389,6 +3409,12 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
             const pl_op = air.data(air.inst_index).pl_op;
             const extra = isel.air.extraData(Air.Call, pl_op.payload);
             const args: []const Air.Inst.Ref = @ptrCast(isel.air.extra.items[extra.end..][0..extra.data.args_len]);
+            const callee_ty = isel.air.typeOf(pl_op.operand, ip);
+            const func_info = switch (ip.indexToKey(callee_ty.toIntern())) {
+                else => unreachable,
+                .func_type => |func_type| func_type,
+                .ptr_type => |ptr_type| ip.indexToKey(ptr_type.child).func_type,
+            };
 
             try call.prepareReturn(isel);
             const maybe_def_ret_vi = isel.live_values.fetchRemove(air.inst_index);
@@ -3455,41 +3481,50 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
                 ret_addr_vi.hint(isel).?,
             );
             var param_it: CallAbiIterator = .init;
-            for (args) |arg| {
-                const param_vi = try param_it.param(isel, isel.air.typeOf(arg, ip)) orelse continue;
+            for (args, 0..) |arg, arg_index| {
+                const param_ty = isel.air.typeOf(arg, ip);
+                const param_vi = param_vi: {
+                    if (arg_index >= func_info.param_types.len) {
+                        assert(func_info.is_var_args);
+                        switch (isel.va_list) {
+                            .other => break :param_vi try param_it.nonSysvVarArg(isel, param_ty),
+                            .sysv => {},
+                        }
+                    }
+                    break :param_vi try param_it.param(isel, param_ty);
+                } orelse continue;
                 defer param_vi.deref(isel);
                 const arg_vi = try isel.use(arg);
-                const passed_vi = switch (param_vi.parent(isel)) {
-                    .unallocated, .stack_slot => param_vi,
-                    .value, .constant => unreachable,
-                    .address => |address_vi| {
-                        try call.paramAddress(isel, arg_vi, address_vi.hint(isel).?);
-                        continue;
+                switch (param_vi.parent(isel)) {
+                    .unallocated => if (param_vi.hint(isel)) |param_ra| {
+                        try call.paramLiveOut(isel, arg_vi, param_ra);
+                    } else {
+                        var param_part_it = param_vi.parts(isel);
+                        var arg_part_it = arg_vi.parts(isel);
+                        if (arg_part_it.only()) |_| {
+                            try isel.values.ensureUnusedCapacity(gpa, param_part_it.remaining);
+                            arg_vi.setParts(isel, param_part_it.remaining);
+                            while (param_part_it.next()) |param_part_vi| _ = arg_vi.addPart(
+                                isel,
+                                param_part_vi.get(isel).offset_from_parent,
+                                param_part_vi.size(isel),
+                            );
+                            param_part_it = param_vi.parts(isel);
+                            arg_part_it = arg_vi.parts(isel);
+                        }
+                        while (param_part_it.next()) |param_part_vi| {
+                            const arg_part_vi = arg_part_it.next().?;
+                            assert(arg_part_vi.get(isel).offset_from_parent ==
+                                param_part_vi.get(isel).offset_from_parent);
+                            assert(arg_part_vi.size(isel) == param_part_vi.size(isel));
+                            try call.paramLiveOut(isel, arg_part_vi, param_part_vi.hint(isel).?);
+                        }
                     },
-                };
-                if (passed_vi.hint(isel)) |param_ra| {
-                    try call.paramLiveOut(isel, arg_vi, param_ra);
-                } else {
-                    var param_part_it = passed_vi.parts(isel);
-                    var arg_part_it = arg_vi.parts(isel);
-                    if (arg_part_it.only()) |_| {
-                        try isel.values.ensureUnusedCapacity(gpa, param_part_it.remaining);
-                        arg_vi.setParts(isel, param_part_it.remaining);
-                        while (param_part_it.next()) |param_part_vi| _ = arg_vi.addPart(
-                            isel,
-                            param_part_vi.get(isel).offset_from_parent,
-                            param_part_vi.size(isel),
-                        );
-                        param_part_it = passed_vi.parts(isel);
-                        arg_part_it = arg_vi.parts(isel);
-                    }
-                    while (param_part_it.next()) |param_part_vi| {
-                        const arg_part_vi = arg_part_it.next().?;
-                        assert(arg_part_vi.get(isel).offset_from_parent ==
-                            param_part_vi.get(isel).offset_from_parent);
-                        assert(arg_part_vi.size(isel) == param_part_vi.size(isel));
-                        try call.paramLiveOut(isel, arg_part_vi, param_part_vi.hint(isel).?);
-                    }
+                    .stack_slot => |stack_slot| try arg_vi.store(isel, param_ty, stack_slot.base, .{
+                        .offset = @intCast(stack_slot.offset),
+                    }),
+                    .value, .constant => unreachable,
+                    .address => |address_vi| try call.paramAddress(isel, arg_vi, address_vi.hint(isel).?),
                 }
             }
             try call.finishParams(isel);
@@ -4828,9 +4863,7 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
 
             if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
         },
-        .dbg_stmt => {
-            if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
-        },
+        .dbg_stmt => if (air.next()) |next_air_tag| continue :air_tag next_air_tag,
         .dbg_empty_stmt => {
             try isel.emit(.nop());
             if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
@@ -7079,6 +7112,7 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
             }
             if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
         },
+        .wasm_memory_size, .wasm_memory_grow => unreachable,
         .cmp_lt_errors_len => {
             if (isel.live_values.fetchRemove(air.inst_index)) |is_vi| unused: {
                 defer is_vi.value.deref(isel);
@@ -7135,16 +7169,266 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
             }
             if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
         },
-        .inferred_alloc,
-        .inferred_alloc_comptime,
-        .int_from_float_safe,
-        .int_from_float_optimized_safe,
-        .wasm_memory_size,
-        .wasm_memory_grow,
-        .work_item_id,
-        .work_group_size,
-        .work_group_id,
-        => unreachable,
+        .c_va_arg => {
+            const maybe_arg_vi = isel.live_values.fetchRemove(air.inst_index);
+            defer if (maybe_arg_vi) |arg_vi| arg_vi.value.deref(isel);
+            const ty_op = air.data(air.inst_index).ty_op;
+            const ty = ty_op.ty.toType();
+            var param_it: CallAbiIterator = .init;
+            const param_vi = try param_it.param(isel, ty);
+            defer param_vi.?.deref(isel);
+            const passed_vi = switch (param_vi.?.parent(isel)) {
+                .unallocated => param_vi.?,
+                .stack_slot, .value, .constant => unreachable,
+                .address => |address_vi| address_vi,
+            };
+            const passed_size: u5 = @intCast(passed_vi.alignment(isel).forward(passed_vi.size(isel)));
+            const passed_is_vector = passed_vi.isVector(isel);
+
+            const va_list_ptr_vi = try isel.use(ty_op.operand);
+            const va_list_ptr_mat = try va_list_ptr_vi.matReg(isel);
+            const offs_ra = try isel.allocIntReg();
+            defer isel.freeReg(offs_ra);
+            const stack_ra = try isel.allocIntReg();
+            defer isel.freeReg(stack_ra);
+
+            var part_vis: [2]Value.Index = undefined;
+            var arg_part_ras: [2]?Register.Alias = @splat(null);
+            const parts_len = parts_len: {
+                var parts_len: u2 = 0;
+                var part_it = passed_vi.parts(isel);
+                while (part_it.next()) |part_vi| : (parts_len += 1) {
+                    part_vis[parts_len] = part_vi;
+                    const arg_vi = maybe_arg_vi orelse continue;
+                    const part_offset, const part_size = part_vi.position(isel);
+                    var arg_part_it = arg_vi.value.field(ty, part_offset, part_size);
+                    const arg_part_vi = try arg_part_it.only(isel);
+                    arg_part_ras[parts_len] = try arg_part_vi.?.defReg(isel);
+                }
+                break :parts_len parts_len;
+            };
+
+            const done_label = isel.instructions.items.len;
+            try isel.emit(.str(stack_ra.x(), .{ .unsigned_offset = .{
+                .base = va_list_ptr_mat.ra.x(),
+                .offset = 0,
+            } }));
+            try isel.emit(switch (parts_len) {
+                else => unreachable,
+                1 => if (arg_part_ras[0]) |arg_part_ra| switch (part_vis[0].size(isel)) {
+                    else => unreachable,
+                    1 => if (arg_part_ra.isVector()) .ldr(arg_part_ra.b(), .{ .post_index = .{
+                        .base = stack_ra.x(),
+                        .index = passed_size,
+                    } }) else switch (part_vis[0].signedness(isel)) {
+                        .signed => .ldrsb(arg_part_ra.w(), .{ .post_index = .{
+                            .base = stack_ra.x(),
+                            .index = passed_size,
+                        } }),
+                        .unsigned => .ldrb(arg_part_ra.w(), .{ .post_index = .{
+                            .base = stack_ra.x(),
+                            .index = passed_size,
+                        } }),
+                    },
+                    2 => if (arg_part_ra.isVector()) .ldr(arg_part_ra.h(), .{ .post_index = .{
+                        .base = stack_ra.x(),
+                        .index = passed_size,
+                    } }) else switch (part_vis[0].signedness(isel)) {
+                        .signed => .ldrsh(arg_part_ra.w(), .{ .post_index = .{
+                            .base = stack_ra.x(),
+                            .index = passed_size,
+                        } }),
+                        .unsigned => .ldrh(arg_part_ra.w(), .{ .post_index = .{
+                            .base = stack_ra.x(),
+                            .index = passed_size,
+                        } }),
+                    },
+                    4 => .ldr(if (arg_part_ra.isVector()) arg_part_ra.s() else arg_part_ra.w(), .{ .post_index = .{
+                        .base = stack_ra.x(),
+                        .index = passed_size,
+                    } }),
+                    8 => .ldr(if (arg_part_ra.isVector()) arg_part_ra.d() else arg_part_ra.x(), .{ .post_index = .{
+                        .base = stack_ra.x(),
+                        .index = passed_size,
+                    } }),
+                    16 => .ldr(arg_part_ra.q(), .{ .post_index = .{
+                        .base = stack_ra.x(),
+                        .index = passed_size,
+                    } }),
+                } else .add(stack_ra.x(), stack_ra.x(), .{ .immediate = passed_size }),
+                2 => if (arg_part_ras[0] != null or arg_part_ras[1] != null) .ldp(
+                    @as(Register.Alias, arg_part_ras[0] orelse .zr).x(),
+                    @as(Register.Alias, arg_part_ras[1] orelse .zr).x(),
+                    .{ .post_index = .{
+                        .base = stack_ra.x(),
+                        .index = passed_size,
+                    } },
+                ) else .add(stack_ra.x(), stack_ra.x(), .{ .immediate = passed_size }),
+            });
+            try isel.emit(.ldr(stack_ra.x(), .{ .unsigned_offset = .{
+                .base = va_list_ptr_mat.ra.x(),
+                .offset = 0,
+            } }));
+            switch (isel.va_list) {
+                .other => {},
+                .sysv => {
+                    const stack_label = isel.instructions.items.len;
+                    try isel.emit(.b(
+                        @intCast((isel.instructions.items.len + 1 - done_label) << 2),
+                    ));
+                    switch (parts_len) {
+                        else => unreachable,
+                        1 => if (arg_part_ras[0]) |arg_part_ra| try isel.emit(switch (part_vis[0].size(isel)) {
+                            else => unreachable,
+                            1 => if (arg_part_ra.isVector()) .ldr(arg_part_ra.b(), .{ .extended_register = .{
+                                .base = stack_ra.x(),
+                                .index = offs_ra.w(),
+                                .extend = .{ .sxtw = 0 },
+                            } }) else switch (part_vis[0].signedness(isel)) {
+                                .signed => .ldrsb(arg_part_ra.w(), .{ .extended_register = .{
+                                    .base = stack_ra.x(),
+                                    .index = offs_ra.w(),
+                                    .extend = .{ .sxtw = 0 },
+                                } }),
+                                .unsigned => .ldrb(arg_part_ra.w(), .{ .extended_register = .{
+                                    .base = stack_ra.x(),
+                                    .index = offs_ra.w(),
+                                    .extend = .{ .sxtw = 0 },
+                                } }),
+                            },
+                            2 => if (arg_part_ra.isVector()) .ldr(arg_part_ra.h(), .{ .extended_register = .{
+                                .base = stack_ra.x(),
+                                .index = offs_ra.w(),
+                                .extend = .{ .sxtw = 0 },
+                            } }) else switch (part_vis[0].signedness(isel)) {
+                                .signed => .ldrsh(arg_part_ra.w(), .{ .extended_register = .{
+                                    .base = stack_ra.x(),
+                                    .index = offs_ra.w(),
+                                    .extend = .{ .sxtw = 0 },
+                                } }),
+                                .unsigned => .ldrh(arg_part_ra.w(), .{ .extended_register = .{
+                                    .base = stack_ra.x(),
+                                    .index = offs_ra.w(),
+                                    .extend = .{ .sxtw = 0 },
+                                } }),
+                            },
+                            4 => .ldr(if (arg_part_ra.isVector()) arg_part_ra.s() else arg_part_ra.w(), .{ .extended_register = .{
+                                .base = stack_ra.x(),
+                                .index = offs_ra.w(),
+                                .extend = .{ .sxtw = 0 },
+                            } }),
+                            8 => .ldr(if (arg_part_ra.isVector()) arg_part_ra.d() else arg_part_ra.x(), .{ .extended_register = .{
+                                .base = stack_ra.x(),
+                                .index = offs_ra.w(),
+                                .extend = .{ .sxtw = 0 },
+                            } }),
+                            16 => .ldr(arg_part_ra.q(), .{ .extended_register = .{
+                                .base = stack_ra.x(),
+                                .index = offs_ra.w(),
+                                .extend = .{ .sxtw = 0 },
+                            } }),
+                        }),
+                        2 => if (arg_part_ras[0] != null or arg_part_ras[1] != null) {
+                            try isel.emit(.ldp(
+                                @as(Register.Alias, arg_part_ras[0] orelse .zr).x(),
+                                @as(Register.Alias, arg_part_ras[1] orelse .zr).x(),
+                                .{ .base = stack_ra.x() },
+                            ));
+                            try isel.emit(.add(stack_ra.x(), stack_ra.x(), .{ .extended_register = .{
+                                .register = offs_ra.w(),
+                                .extend = .{ .sxtw = 0 },
+                            } }));
+                        },
+                    }
+                    try isel.emit(.ldr(stack_ra.x(), .{ .unsigned_offset = .{
+                        .base = va_list_ptr_mat.ra.x(),
+                        .offset = if (passed_is_vector) 16 else 8,
+                    } }));
+                    try isel.emit(.@"b."(
+                        .gt,
+                        @intCast((isel.instructions.items.len + 1 - stack_label) << 2),
+                    ));
+                    try isel.emit(.str(stack_ra.w(), .{ .unsigned_offset = .{
+                        .base = va_list_ptr_mat.ra.x(),
+                        .offset = if (passed_is_vector) 28 else 24,
+                    } }));
+                    try isel.emit(.adds(stack_ra.w(), offs_ra.w(), .{ .immediate = passed_size }));
+                    try isel.emit(.tbz(
+                        offs_ra.w(),
+                        31,
+                        @intCast((isel.instructions.items.len + 1 - stack_label) << 2),
+                    ));
+                    try isel.emit(.ldr(offs_ra.w(), .{ .unsigned_offset = .{
+                        .base = va_list_ptr_mat.ra.x(),
+                        .offset = if (passed_is_vector) 28 else 24,
+                    } }));
+                },
+            }
+            try va_list_ptr_mat.finish(isel);
+            if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
+        },
+        .c_va_copy => {
+            if (isel.live_values.fetchRemove(air.inst_index)) |va_list_vi| {
+                defer va_list_vi.value.deref(isel);
+                const ty_op = air.data(air.inst_index).ty_op;
+                const va_list_ptr_vi = try isel.use(ty_op.operand);
+                const va_list_ptr_mat = try va_list_ptr_vi.matReg(isel);
+                _ = try va_list_vi.value.load(isel, ty_op.ty.toType(), va_list_ptr_mat.ra, .{});
+                try va_list_ptr_mat.finish(isel);
+            }
+            if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
+        },
+        .c_va_end => if (air.next()) |next_air_tag| continue :air_tag next_air_tag,
+        .c_va_start => {
+            if (isel.live_values.fetchRemove(air.inst_index)) |va_list_vi| {
+                defer va_list_vi.value.deref(isel);
+                const ty = air.data(air.inst_index).ty;
+                switch (isel.va_list) {
+                    .other => |va_list| if (try va_list_vi.value.defReg(isel)) |va_list_ra| try isel.emit(.add(
+                        va_list_ra.x(),
+                        va_list.base.x(),
+                        .{ .immediate = @intCast(va_list.offset) },
+                    )),
+                    .sysv => |va_list| {
+                        var vr_offs_it = va_list_vi.value.field(ty, 28, 4);
+                        const vr_offs_vi = try vr_offs_it.only(isel);
+                        if (try vr_offs_vi.?.defReg(isel)) |vr_offs_ra| try isel.movImmediate(
+                            vr_offs_ra.w(),
+                            @as(u32, @bitCast(va_list.__vr_offs)),
+                        );
+                        var gr_offs_it = va_list_vi.value.field(ty, 24, 4);
+                        const gr_offs_vi = try gr_offs_it.only(isel);
+                        if (try gr_offs_vi.?.defReg(isel)) |gr_offs_ra| try isel.movImmediate(
+                            gr_offs_ra.w(),
+                            @as(u32, @bitCast(va_list.__gr_offs)),
+                        );
+                        var vr_top_it = va_list_vi.value.field(ty, 16, 8);
+                        const vr_top_vi = try vr_top_it.only(isel);
+                        if (try vr_top_vi.?.defReg(isel)) |vr_top_ra| try isel.emit(.add(
+                            vr_top_ra.x(),
+                            va_list.__vr_top.base.x(),
+                            .{ .immediate = @intCast(va_list.__vr_top.offset) },
+                        ));
+                        var gr_top_it = va_list_vi.value.field(ty, 8, 8);
+                        const gr_top_vi = try gr_top_it.only(isel);
+                        if (try gr_top_vi.?.defReg(isel)) |gr_top_ra| try isel.emit(.add(
+                            gr_top_ra.x(),
+                            va_list.__gr_top.base.x(),
+                            .{ .immediate = @intCast(va_list.__gr_top.offset) },
+                        ));
+                        var stack_it = va_list_vi.value.field(ty, 0, 8);
+                        const stack_vi = try stack_it.only(isel);
+                        if (try stack_vi.?.defReg(isel)) |stack_ra| try isel.emit(.add(
+                            stack_ra.x(),
+                            va_list.__stack.base.x(),
+                            .{ .immediate = @intCast(va_list.__stack.offset) },
+                        ));
+                    },
+                }
+            }
+            if (air.next()) |next_air_tag| continue :air_tag next_air_tag;
+        },
+        .work_item_id, .work_group_size, .work_group_id => unreachable,
     }
     assert(air.body_index == 0);
 }
@@ -7225,7 +7509,7 @@ pub fn verify(isel: *Select, check_values: bool) void {
 pub fn layout(
     isel: *Select,
     incoming: CallAbiIterator,
-    have_va: bool,
+    is_sysv_var_args: bool,
     saved_gra_len: u7,
     saved_vra_len: u7,
     mod: *const Package.Module,
@@ -7236,8 +7520,6 @@ pub fn layout(
     wip_mir_log.debug("{f}<body>:\n", .{nav.fqn.fmt(ip)});
 
     const stack_size: u24 = @intCast(InternPool.Alignment.@"16".forward(isel.stack_size));
-    const stack_size_lo: u12 = @truncate(stack_size >> 0);
-    const stack_size_hi: u12 = @truncate(stack_size >> 12);
 
     var saves_buf: [10 + 8 + 8 + 2 + 8]struct {
         class: enum { integer, vector },
@@ -7315,7 +7597,7 @@ pub fn layout(
 
         // incoming vr arguments
         save_ra = if (mod.strip) incoming.nsrn else CallAbiIterator.nsrn_start;
-        while (save_ra != if (have_va) CallAbiIterator.nsrn_end else incoming.nsrn) : (save_ra = @enumFromInt(@intFromEnum(save_ra) + 1)) {
+        while (save_ra != if (is_sysv_var_args) CallAbiIterator.nsrn_end else incoming.nsrn) : (save_ra = @enumFromInt(@intFromEnum(save_ra) + 1)) {
             saves_size = std.mem.alignForward(u10, saves_size, 16);
             saves_buf[saves_len] = .{
                 .class = .vector,
@@ -7370,7 +7652,7 @@ pub fn layout(
             1 => saves_size += 8,
         }
         save_ra = if (mod.strip) incoming.ngrn else CallAbiIterator.ngrn_start;
-        while (save_ra != if (have_va) CallAbiIterator.ngrn_end else incoming.ngrn) : (save_ra = @enumFromInt(@intFromEnum(save_ra) + 1)) {
+        while (save_ra != if (is_sysv_var_args) CallAbiIterator.ngrn_end else incoming.ngrn) : (save_ra = @enumFromInt(@intFromEnum(save_ra) + 1)) {
             saves_size = std.mem.alignForward(u10, saves_size, 8);
             saves_buf[saves_len] = .{
                 .class = .integer,
@@ -7434,6 +7716,8 @@ pub fn layout(
             .fp
         else
             .ip0;
+        const stack_size_lo: u12 = @truncate(stack_size >> 0);
+        const stack_size_hi: u12 = @truncate(stack_size >> 12);
         if (mod.stack_check) {
             if (stack_size_hi > 2) {
                 try isel.movImmediate(.ip1, stack_size_hi);
@@ -7481,6 +7765,7 @@ pub fn layout(
     if (isel.returns) {
         try isel.emit(.ret(.lr));
         var save_index: usize = 0;
+        var first_offset: ?u10 = null;
         while (save_index < saves.len) {
             if (save_index + 2 <= saves.len and saves[save_index + 1].needs_restore and
                 saves[save_index + 0].class == saves[save_index + 1].class and
@@ -7489,46 +7774,51 @@ pub fn layout(
                 try isel.emit(.ldp(
                     saves[save_index + 0].register,
                     saves[save_index + 1].register,
-                    switch (saves[save_index + 0].offset) {
-                        0 => .{ .post_index = .{
-                            .base = .sp,
-                            .index = @intCast(saves_size),
-                        } },
-                        else => |offset| .{ .signed_offset = .{
+                    if (first_offset) |offset| .{ .signed_offset = .{
+                        .base = .sp,
+                        .offset = @intCast(saves[save_index + 0].offset - offset),
+                    } } else form: {
+                        first_offset = @intCast(saves[save_index + 0].offset);
+                        break :form .{ .post_index = .{
                             .base = .sp,
-                            .offset = @intCast(offset),
-                        } },
+                            .index = @intCast(saves_size - first_offset.?),
+                        } };
                     },
                 ));
                 save_index += 2;
             } else if (saves[save_index].needs_restore) {
                 try isel.emit(.ldr(
                     saves[save_index].register,
-                    switch (saves[save_index].offset) {
-                        0 => .{ .post_index = .{
-                            .base = .sp,
-                            .index = @intCast(saves_size),
-                        } },
-                        else => |offset| .{ .unsigned_offset = .{
+                    if (first_offset) |offset| .{ .unsigned_offset = .{
+                        .base = .sp,
+                        .offset = saves[save_index + 0].offset - offset,
+                    } } else form: {
+                        const offset = saves[save_index + 0].offset;
+                        first_offset = offset;
+                        break :form .{ .post_index = .{
                             .base = .sp,
-                            .offset = @intCast(offset),
-                        } },
+                            .index = @intCast(saves_size - offset),
+                        } };
                     },
                 ));
                 save_index += 1;
             } else save_index += 1;
         }
-        if (isel.stack_align != .@"16" or (stack_size_lo > 0 and stack_size_hi > 0)) {
-            try isel.emit(switch (frame_record_offset) {
-                0 => .add(.sp, .fp, .{ .immediate = 0 }),
-                else => |offset| .sub(.sp, .fp, .{ .immediate = offset }),
-            });
+        const offset = stack_size + first_offset.?;
+        const offset_lo: u12 = @truncate(offset >> 0);
+        const offset_hi: u12 = @truncate(offset >> 12);
+        if (isel.stack_align != .@"16" or (offset_lo > 0 and offset_hi > 0)) {
+            const fp_offset = @as(i11, first_offset.?) - frame_record_offset;
+            try isel.emit(if (fp_offset >= 0)
+                .add(.sp, .fp, .{ .immediate = @intCast(fp_offset) })
+            else
+                .sub(.sp, .fp, .{ .immediate = @intCast(-fp_offset) }));
         } else {
-            if (stack_size_hi > 0) try isel.emit(.add(.sp, .sp, .{
-                .shifted_immediate = .{ .immediate = stack_size_hi, .lsl = .@"12" },
+            if (offset_hi > 0) try isel.emit(.add(.sp, .sp, .{
+                .shifted_immediate = .{ .immediate = offset_hi, .lsl = .@"12" },
             }));
-            if (stack_size_lo > 0) try isel.emit(.add(.sp, .sp, .{
-                .immediate = stack_size_lo,
+            if (offset_lo > 0) try isel.emit(.add(.sp, .sp, .{
+                .immediate = offset_lo,
             }));
         }
         wip_mir_log.debug("{f}<epilogue>:\n", .{nav.fqn.fmt(ip)});
@@ -9493,6 +9783,11 @@ pub const Value = struct {
             return it.vi;
         }
 
+        pub fn peek(it: PartIterator) ?Value.Index {
+            var it_mut = it;
+            return it_mut.next();
+        }
+
         pub fn only(it: PartIterator) ?Value.Index {
             return if (it.remaining == 1) it.vi else null;
         }
@@ -11607,6 +11902,16 @@ pub const CallAbiIterator = struct {
         return wip_vi.ref(isel);
     }
 
+    pub fn nonSysvVarArg(it: *CallAbiIterator, isel: *Select, ty: ZigType) !?Value.Index {
+        const ngrn = it.ngrn;
+        defer it.ngrn = ngrn;
+        it.ngrn = ngrn_end;
+        const nsrn = it.nsrn;
+        defer it.nsrn = nsrn;
+        it.nsrn = nsrn_end;
+        return it.param(isel, ty);
+    }
+
     pub fn ret(it: *CallAbiIterator, isel: *Select, ty: ZigType) !?Value.Index {
         const wip_vi = try it.param(isel, ty) orelse return null;
         switch (wip_vi.parent(isel)) {
src/codegen/aarch64.zig
@@ -19,8 +19,12 @@ pub fn generate(
 ) !Mir {
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
+    const ip = &zcu.intern_pool;
     const func = zcu.funcInfo(func_index);
-    const func_type = zcu.intern_pool.indexToKey(func.ty).func_type;
+    const func_zir = func.zir_body_inst.resolveFull(ip).?;
+    const file = zcu.fileByIndex(func_zir.file);
+    const named_params_len = file.zir.?.getParamBody(func_zir.inst).len;
+    const func_type = ip.indexToKey(func.ty).func_type;
     assert(liveness.* == null);
 
     const mod = zcu.navFileScope(func.owner_nav).mod.?;
@@ -61,23 +65,32 @@ pub fn generate(
         .values = .empty,
     };
     defer isel.deinit();
+    const is_sysv = !isel.target.os.tag.isDarwin() and isel.target.os.tag != .windows;
+    const is_sysv_var_args = is_sysv and func_type.is_var_args;
 
     const air_main_body = air.getMainBody();
     var param_it: Select.CallAbiIterator = .init;
     const air_args = for (air_main_body, 0..) |air_inst_index, body_index| {
         if (air.instructions.items(.tag)[@intFromEnum(air_inst_index)] != .arg) break air_main_body[0..body_index];
-        const param_ty = air.instructions.items(.data)[@intFromEnum(air_inst_index)].arg.ty.toType();
-        const param_vi = try param_it.param(&isel, param_ty);
+        const arg = air.instructions.items(.data)[@intFromEnum(air_inst_index)].arg;
+        const param_ty = arg.ty.toType();
+        const param_vi = param_vi: {
+            if (arg.zir_param_index >= named_params_len) {
+                assert(func_type.is_var_args);
+                if (!is_sysv) break :param_vi try param_it.nonSysvVarArg(&isel, param_ty);
+            }
+            break :param_vi try param_it.param(&isel, param_ty);
+        };
         tracking_log.debug("${d} <- %{d}", .{ @intFromEnum(param_vi.?), @intFromEnum(air_inst_index) });
         try isel.live_values.putNoClobber(gpa, air_inst_index, param_vi.?);
     } else unreachable;
 
     const saved_gra_start = if (mod.strip) param_it.ngrn else Select.CallAbiIterator.ngrn_start;
-    const saved_gra_end = if (func_type.is_var_args) Select.CallAbiIterator.ngrn_end else param_it.ngrn;
+    const saved_gra_end = if (is_sysv_var_args) Select.CallAbiIterator.ngrn_end else param_it.ngrn;
     const saved_gra_len = @intFromEnum(saved_gra_end) - @intFromEnum(saved_gra_start);
 
     const saved_vra_start = if (mod.strip) param_it.nsrn else Select.CallAbiIterator.nsrn_start;
-    const saved_vra_end = if (func_type.is_var_args) Select.CallAbiIterator.nsrn_end else param_it.nsrn;
+    const saved_vra_end = if (is_sysv_var_args) Select.CallAbiIterator.nsrn_end else param_it.nsrn;
     const saved_vra_len = @intFromEnum(saved_vra_end) - @intFromEnum(saved_vra_start);
 
     const frame_record = 2;
@@ -85,11 +98,16 @@ pub fn generate(
         .base = .fp,
         .offset = 8 * std.mem.alignForward(u7, frame_record + saved_gra_len, 2),
     };
-    isel.va_list = .{
-        .__stack = named_stack_args.withOffset(param_it.nsaa),
-        .__gr_top = named_stack_args,
-        .__vr_top = .{ .base = .fp, .offset = 0 },
-    };
+    const stack_var_args = named_stack_args.withOffset(param_it.nsaa);
+    const gr_top = named_stack_args;
+    const vr_top: Select.Value.Indirect = .{ .base = .fp, .offset = 0 };
+    isel.va_list = if (is_sysv) .{ .sysv = .{
+        .__stack = stack_var_args,
+        .__gr_top = gr_top,
+        .__vr_top = vr_top,
+        .__gr_offs = @as(i32, @intFromEnum(Select.CallAbiIterator.ngrn_end) - @intFromEnum(param_it.ngrn)) * -8,
+        .__vr_offs = @as(i32, @intFromEnum(Select.CallAbiIterator.nsrn_end) - @intFromEnum(param_it.nsrn)) * -16,
+    } } else .{ .other = stack_var_args };
 
     // translate arg locations from caller-based to callee-based
     for (air_args) |air_inst_index| {
@@ -106,11 +124,9 @@ pub fn generate(
                 const first_passed_part_vi = part_it.next().?;
                 const hint_ra = first_passed_part_vi.hint(&isel).?;
                 passed_vi.setParent(&isel, .{ .stack_slot = if (hint_ra.isVector())
-                    isel.va_list.__vr_top.withOffset(@as(i8, -16) *
-                        (@intFromEnum(saved_vra_end) - @intFromEnum(hint_ra)))
+                    vr_top.withOffset(@as(i8, -16) * (@intFromEnum(saved_vra_end) - @intFromEnum(hint_ra)))
                 else
-                    isel.va_list.__gr_top.withOffset(@as(i8, -8) *
-                        (@intFromEnum(saved_gra_end) - @intFromEnum(hint_ra))) });
+                    gr_top.withOffset(@as(i8, -8) * (@intFromEnum(saved_gra_end) - @intFromEnum(hint_ra))) });
             },
             .stack_slot => |stack_slot| {
                 assert(stack_slot.base == .sp);
@@ -152,13 +168,7 @@ pub fn generate(
     isel.verify(true);
 
     const prologue = isel.instructions.items.len;
-    const epilogue = try isel.layout(
-        param_it,
-        func_type.is_var_args,
-        saved_gra_len,
-        saved_vra_len,
-        mod,
-    );
+    const epilogue = try isel.layout(param_it, is_sysv_var_args, saved_gra_len, saved_vra_len, mod);
 
     const instructions = try isel.instructions.toOwnedSlice(gpa);
     var mir: Mir = .{
test/behavior/var_args.zig
@@ -92,7 +92,6 @@ fn doNothingWithFirstArg(args: anytype) void {
 }
 
 test "simple variadic function" {
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
@@ -154,7 +153,6 @@ test "simple variadic function" {
 }
 
 test "coerce reference to var arg" {
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
@@ -234,7 +232,6 @@ test "variadic functions" {
 }
 
 test "copy VaList" {
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
@@ -269,7 +266,6 @@ test "copy VaList" {
 }
 
 test "unused VaList arg" {
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;