Commit 400500a3af

Andrew Kelley <andrew@ziglang.org>
2019-08-06 22:37:25
improve async function semantics
* add safety panic for resuming a function which is returning, pending an await * remove IrInstructionResultPtr * add IrInstructionReturnBegin. This does the early return in async functions; does nothing in normal functions. * `await` gets a result location * `analyze_fn_async` will call `analyze_fn_body` if necessary. * async function frames have a result pointer field for themselves to access and one for the awaiter to supply before the atomic rmw. when returning, async functions copy the result to the awaiter result pointer, if it is non-null. * async function frames have a stack trace pointer which is supplied by the awaiter before the atomicrmw. Later in the frame is a stack trace struct and addresses, which is used for its own calls and awaits. * when awaiting an async function, if an early return occurred, the awaiter tail resumes the frame. * when an async function returns, early return does a suspend (in IrInstructionReturnBegin) before copying the error return trace data, result, and running the defers. After the last defer runs, the frame will no longer be accessed. * proper acquire/release atomic ordering attributes in async functions.
1 parent 20f63e5
src/all_types.hpp
@@ -1557,6 +1557,7 @@ enum PanicMsgId {
     PanicMsgIdBadReturn,
     PanicMsgIdResumedAnAwaitingFn,
     PanicMsgIdFrameTooSmall,
+    PanicMsgIdResumedFnPendingAwait,
 
     PanicMsgIdCount,
 };
@@ -1717,10 +1718,12 @@ struct CodeGen {
     LLVMTargetMachineRef target_machine;
     ZigLLVMDIFile *dummy_di_file;
     LLVMValueRef cur_ret_ptr;
+    LLVMValueRef cur_ret_ptr_ptr;
     LLVMValueRef cur_fn_val;
     LLVMValueRef cur_async_switch_instr;
     LLVMValueRef cur_async_resume_index_ptr;
     LLVMValueRef cur_async_awaiter_ptr;
+    LLVMValueRef cur_async_prev_val;
     LLVMBasicBlockRef cur_preamble_llvm_block;
     size_t cur_resume_block_count;
     LLVMValueRef cur_err_ret_trace_val_arg;
@@ -2223,6 +2226,7 @@ enum IrInstructionId {
     IrInstructionIdCallGen,
     IrInstructionIdConst,
     IrInstructionIdReturn,
+    IrInstructionIdReturnBegin,
     IrInstructionIdCast,
     IrInstructionIdResizeSlice,
     IrInstructionIdContainerInitList,
@@ -2326,7 +2330,6 @@ enum IrInstructionId {
     IrInstructionIdImplicitCast,
     IrInstructionIdResolveResult,
     IrInstructionIdResetResult,
-    IrInstructionIdResultPtr,
     IrInstructionIdOpaqueType,
     IrInstructionIdSetAlignStack,
     IrInstructionIdArgType,
@@ -2355,7 +2358,8 @@ enum IrInstructionId {
     IrInstructionIdUnionInitNamedField,
     IrInstructionIdSuspendBegin,
     IrInstructionIdSuspendFinish,
-    IrInstructionIdAwait,
+    IrInstructionIdAwaitSrc,
+    IrInstructionIdAwaitGen,
     IrInstructionIdCoroResume,
 };
 
@@ -2630,7 +2634,13 @@ struct IrInstructionConst {
 struct IrInstructionReturn {
     IrInstruction base;
 
-    IrInstruction *value;
+    IrInstruction *operand;
+};
+
+struct IrInstructionReturnBegin {
+    IrInstruction base;
+
+    IrInstruction *operand;
 };
 
 enum CastOp {
@@ -3136,6 +3146,7 @@ struct IrInstructionTestErrSrc {
     IrInstruction base;
 
     bool resolve_err_set;
+    bool base_ptr_is_payload;
     IrInstruction *base_ptr;
 };
 
@@ -3603,10 +3614,18 @@ struct IrInstructionSuspendFinish {
     IrInstructionSuspendBegin *begin;
 };
 
-struct IrInstructionAwait {
+struct IrInstructionAwaitSrc {
     IrInstruction base;
 
     IrInstruction *frame;
+    ResultLoc *result_loc;
+};
+
+struct IrInstructionAwaitGen {
+    IrInstruction base;
+
+    IrInstruction *frame;
+    IrInstruction *result_loc;
 };
 
 struct IrInstructionCoroResume {
src/analyze.cpp
@@ -3848,6 +3848,13 @@ static void analyze_fn_async(CodeGen *g, ZigFn *fn) {
 
         if (callee->type_entry->data.fn.fn_type_id.cc != CallingConventionUnspecified)
             continue;
+        if (callee->anal_state == FnAnalStateReady) {
+            analyze_fn_body(g, callee);
+            if (callee->anal_state == FnAnalStateInvalid) {
+                fn->anal_state = FnAnalStateInvalid;
+                return;
+            }
+        }
         assert(callee->anal_state == FnAnalStateComplete);
         analyze_fn_async(g, callee);
         if (callee->anal_state == FnAnalStateInvalid) {
@@ -5224,20 +5231,18 @@ static Error resolve_coro_frame(CodeGen *g, ZigType *frame_type) {
 
     FnTypeId *fn_type_id = &fn_type->data.fn.fn_type_id;
     ZigType *ptr_return_type = get_pointer_to_type(g, fn_type_id->return_type, false);
-    field_names.append("@ptr_result");
+    field_names.append("@result_ptr_callee");
+    field_types.append(ptr_return_type);
+
+    field_names.append("@result_ptr_awaiter");
     field_types.append(ptr_return_type);
 
     field_names.append("@result");
     field_types.append(fn_type_id->return_type);
 
     if (codegen_fn_has_err_ret_tracing_arg(g, fn_type_id->return_type)) {
-        (void)get_ptr_to_stack_trace_type(g); // populate g->stack_trace_type
-
-        field_names.append("@stack_trace");
-        field_types.append(g->stack_trace_type);
-
-        field_names.append("@instruction_addresses");
-        field_types.append(get_array_type(g, g->builtin_types.entry_usize, stack_trace_ptr_count));
+        field_names.append("@ptr_stack_trace");
+        field_types.append(get_ptr_to_stack_trace_type(g));
     }
 
     for (size_t arg_i = 0; arg_i < fn_type_id->param_count; arg_i += 1) {
@@ -5255,7 +5260,7 @@ static Error resolve_coro_frame(CodeGen *g, ZigType *frame_type) {
         field_types.append(param_type);
     }
 
-    if (codegen_fn_has_err_ret_tracing_stack(g, fn)) {
+    if (codegen_fn_has_err_ret_tracing_stack(g, fn, true)) {
         (void)get_ptr_to_stack_trace_type(g); // populate g->stack_trace_type
 
         field_names.append("@stack_trace");
@@ -7570,11 +7575,11 @@ static void resolve_llvm_types_any_frame(CodeGen *g, ZigType *any_frame_type, Re
 
     bool have_result_type = result_type != nullptr && type_has_bits(result_type);
     if (have_result_type) {
-        field_types.append(get_llvm_type(g, ptr_result_type)); // ptr_result
+        field_types.append(get_llvm_type(g, ptr_result_type)); // result_ptr_callee
+        field_types.append(get_llvm_type(g, ptr_result_type)); // result_ptr_awaiter
         field_types.append(get_llvm_type(g, result_type)); // result
         if (codegen_fn_has_err_ret_tracing_arg(g, result_type)) {
-            field_types.append(get_llvm_type(g, g->stack_trace_type)); // stack_trace
-            field_types.append(get_llvm_type(g, get_array_type(g, g->builtin_types.entry_usize, stack_trace_ptr_count))); // instruction_addresses
+            field_types.append(get_llvm_type(g, get_ptr_to_stack_trace_type(g))); // ptr_stack_trace
         }
     }
     LLVMStructSetBody(frame_header_type, field_types.items, field_types.length, false);
@@ -7607,7 +7612,15 @@ static void resolve_llvm_types_any_frame(CodeGen *g, ZigType *any_frame_type, Re
     if (have_result_type) {
         di_element_types.append(
             ZigLLVMCreateDebugMemberType(g->dbuilder,
-                ZigLLVMTypeToScope(any_frame_type->llvm_di_type), "ptr_result",
+                ZigLLVMTypeToScope(any_frame_type->llvm_di_type), "result_ptr_callee",
+                di_file, line,
+                8*LLVMABISizeOfType(g->target_data_ref, field_types.at(di_element_types.length)),
+                8*LLVMABIAlignmentOfType(g->target_data_ref, field_types.at(di_element_types.length)),
+                8*LLVMOffsetOfElement(g->target_data_ref, frame_header_type, di_element_types.length),
+                ZigLLVM_DIFlags_Zero, get_llvm_di_type(g, ptr_result_type)));
+        di_element_types.append(
+            ZigLLVMCreateDebugMemberType(g->dbuilder,
+                ZigLLVMTypeToScope(any_frame_type->llvm_di_type), "result_ptr_awaiter",
                 di_file, line,
                 8*LLVMABISizeOfType(g->target_data_ref, field_types.at(di_element_types.length)),
                 8*LLVMABIAlignmentOfType(g->target_data_ref, field_types.at(di_element_types.length)),
@@ -7625,20 +7638,12 @@ static void resolve_llvm_types_any_frame(CodeGen *g, ZigType *any_frame_type, Re
         if (codegen_fn_has_err_ret_tracing_arg(g, result_type)) {
             di_element_types.append(
                 ZigLLVMCreateDebugMemberType(g->dbuilder,
-                    ZigLLVMTypeToScope(any_frame_type->llvm_di_type), "stack_trace",
-                    di_file, line,
-                    8*LLVMABISizeOfType(g->target_data_ref, field_types.at(di_element_types.length)),
-                    8*LLVMABIAlignmentOfType(g->target_data_ref, field_types.at(di_element_types.length)),
-                    8*LLVMOffsetOfElement(g->target_data_ref, frame_header_type, di_element_types.length),
-                    ZigLLVM_DIFlags_Zero, get_llvm_di_type(g, g->stack_trace_type)));
-            di_element_types.append(
-                ZigLLVMCreateDebugMemberType(g->dbuilder,
-                    ZigLLVMTypeToScope(any_frame_type->llvm_di_type), "instruction_addresses",
+                    ZigLLVMTypeToScope(any_frame_type->llvm_di_type), "ptr_stack_trace",
                     di_file, line,
                     8*LLVMABISizeOfType(g->target_data_ref, field_types.at(di_element_types.length)),
                     8*LLVMABIAlignmentOfType(g->target_data_ref, field_types.at(di_element_types.length)),
                     8*LLVMOffsetOfElement(g->target_data_ref, frame_header_type, di_element_types.length),
-                    ZigLLVM_DIFlags_Zero, get_llvm_di_type(g, get_array_type(g, g->builtin_types.entry_usize, stack_trace_ptr_count))));
+                    ZigLLVM_DIFlags_Zero, get_llvm_di_type(g, get_ptr_to_stack_trace_type(g))));
         }
     };
 
src/codegen.cpp
@@ -24,6 +24,14 @@
 #include <stdio.h>
 #include <errno.h>
 
+enum ResumeId {
+    ResumeIdManual,
+    ResumeIdReturn,
+    ResumeIdCall,
+
+    ResumeIdAwaitEarlyReturn // must be last
+};
+
 static void init_darwin_native(CodeGen *g) {
     char *osx_target = getenv("MACOSX_DEPLOYMENT_TARGET");
     char *ios_target = getenv("IPHONEOS_DEPLOYMENT_TARGET");
@@ -298,25 +306,25 @@ static LLVMLinkage to_llvm_linkage(GlobalLinkageId id) {
 }
 
 // label (grep this): [coro_frame_struct_layout]
-static uint32_t frame_index_trace_arg(CodeGen *g, FnTypeId *fn_type_id) {
-    // [0] *ReturnType
-    // [1] ReturnType
-    uint32_t return_field_count = type_has_bits(fn_type_id->return_type) ? 2 : 0;
+static uint32_t frame_index_trace_arg(CodeGen *g, ZigType *return_type) {
+    // [0] *ReturnType (callee's)
+    // [1] *ReturnType (awaiter's)
+    // [2] ReturnType
+    uint32_t return_field_count = type_has_bits(return_type) ? 3 : 0;
     return coro_ret_start + return_field_count;
 }
 
 // label (grep this): [coro_frame_struct_layout]
-static uint32_t frame_index_arg(CodeGen *g, FnTypeId *fn_type_id) {
-    bool have_stack_trace = codegen_fn_has_err_ret_tracing_arg(g, fn_type_id->return_type);
-    // [0] StackTrace
-    // [1] [stack_trace_ptr_count]usize
-    uint32_t trace_field_count = have_stack_trace ? 2 : 0;
-    return frame_index_trace_arg(g, fn_type_id) + trace_field_count;
+static uint32_t frame_index_arg(CodeGen *g, ZigType *return_type) {
+    bool have_stack_trace = codegen_fn_has_err_ret_tracing_arg(g, return_type);
+    // [0] *StackTrace
+    uint32_t trace_field_count = have_stack_trace ? 1 : 0;
+    return frame_index_trace_arg(g, return_type) + trace_field_count;
 }
 
 // label (grep this): [coro_frame_struct_layout]
 static uint32_t frame_index_trace_stack(CodeGen *g, FnTypeId *fn_type_id) {
-    uint32_t result = frame_index_arg(g, fn_type_id);
+    uint32_t result = frame_index_arg(g, fn_type_id->return_type);
     for (size_t i = 0; i < fn_type_id->param_count; i += 1) {
         if (type_has_bits(fn_type_id->param_info->type)) {
             result += 1;
@@ -901,7 +909,7 @@ static Buf *panic_msg_buf(PanicMsgId msg_id) {
         case PanicMsgIdPtrCastNull:
             return buf_create_from_str("cast causes pointer to be null");
         case PanicMsgIdBadResume:
-            return buf_create_from_str("invalid resume of async function");
+            return buf_create_from_str("resumed an async function which already returned");
         case PanicMsgIdBadAwait:
             return buf_create_from_str("async function awaited twice");
         case PanicMsgIdBadReturn:
@@ -910,6 +918,8 @@ static Buf *panic_msg_buf(PanicMsgId msg_id) {
             return buf_create_from_str("awaiting function resumed");
         case PanicMsgIdFrameTooSmall:
             return buf_create_from_str("frame too small");
+        case PanicMsgIdResumedFnPendingAwait:
+            return buf_create_from_str("resumed an async function which can only be awaited");
     }
     zig_unreachable();
 }
@@ -1301,7 +1311,14 @@ static LLVMValueRef get_cur_err_ret_trace_val(CodeGen *g, Scope *scope) {
     if (g->cur_err_ret_trace_val_stack != nullptr) {
         return g->cur_err_ret_trace_val_stack;
     }
-    return g->cur_err_ret_trace_val_arg;
+    if (g->cur_err_ret_trace_val_arg != nullptr) {
+        if (fn_is_async(g->cur_fn)) {
+            return LLVMBuildLoad(g->builder, g->cur_err_ret_trace_val_arg, "");
+        } else {
+            return g->cur_err_ret_trace_val_arg;
+        }
+    }
+    return nullptr;
 }
 
 static void gen_safety_crash_for_err(CodeGen *g, LLVMValueRef err_val, Scope *scope) {
@@ -2023,99 +2040,191 @@ static LLVMValueRef ir_render_save_err_ret_addr(CodeGen *g, IrExecutable *execut
     return call_instruction;
 }
 
-static LLVMValueRef ir_render_return(CodeGen *g, IrExecutable *executable,
-        IrInstructionReturn *return_instruction)
+static void gen_assert_resume_id(CodeGen *g, IrInstruction *source_instr, ResumeId resume_id, PanicMsgId msg_id,
+        LLVMBasicBlockRef end_bb)
+{
+    LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+    LLVMBasicBlockRef bad_resume_block = LLVMAppendBasicBlock(g->cur_fn_val, "BadResume");
+    if (end_bb == nullptr) end_bb = LLVMAppendBasicBlock(g->cur_fn_val, "OkResume");
+    LLVMValueRef ok_bit;
+    if (resume_id == ResumeIdAwaitEarlyReturn) {
+        LLVMValueRef last_value = LLVMBuildSub(g->builder, LLVMConstAllOnes(usize_type_ref),
+                LLVMConstInt(usize_type_ref, ResumeIdAwaitEarlyReturn, false), "");
+        ok_bit = LLVMBuildICmp(g->builder, LLVMIntULT, LLVMGetParam(g->cur_fn_val, 1), last_value, "");
+    } else {
+        LLVMValueRef expected_value = LLVMBuildSub(g->builder, LLVMConstAllOnes(usize_type_ref),
+                LLVMConstInt(usize_type_ref, resume_id, false), "");
+        ok_bit = LLVMBuildICmp(g->builder, LLVMIntEQ, LLVMGetParam(g->cur_fn_val, 1), expected_value, "");
+    }
+    LLVMBuildCondBr(g->builder, ok_bit, end_bb, bad_resume_block);
+
+    LLVMPositionBuilderAtEnd(g->builder, bad_resume_block);
+    gen_assertion(g, msg_id, source_instr);
+
+    LLVMPositionBuilderAtEnd(g->builder, end_bb);
+}
+
+static LLVMValueRef gen_resume(CodeGen *g, LLVMValueRef fn_val, LLVMValueRef target_frame_ptr,
+        ResumeId resume_id, LLVMValueRef arg_val)
+{
+    LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+    if (fn_val == nullptr) {
+        if (g->anyframe_fn_type == nullptr) {
+            (void)get_llvm_type(g, get_any_frame_type(g, nullptr));
+        }
+        LLVMValueRef fn_ptr_ptr = LLVMBuildStructGEP(g->builder, target_frame_ptr, coro_fn_ptr_index, "");
+        fn_val = LLVMBuildLoad(g->builder, fn_ptr_ptr, "");
+    }
+    if (arg_val == nullptr) {
+        arg_val = LLVMBuildSub(g->builder, LLVMConstAllOnes(usize_type_ref),
+                LLVMConstInt(usize_type_ref, resume_id, false), "");
+    } else {
+        assert(resume_id == ResumeIdAwaitEarlyReturn);
+    }
+    LLVMValueRef args[] = {target_frame_ptr, arg_val};
+    return ZigLLVMBuildCall(g->builder, fn_val, args, 2, LLVMFastCallConv, ZigLLVM_FnInlineAuto, "");
+}
+
+static LLVMValueRef ir_render_return_begin(CodeGen *g, IrExecutable *executable,
+        IrInstructionReturnBegin *instruction)
 {
+    if (!fn_is_async(g->cur_fn)) return nullptr;
+
+    LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+
+    bool ret_type_has_bits = instruction->operand != nullptr &&
+        type_has_bits(instruction->operand->value.type);
+    ZigType *ret_type = ret_type_has_bits ? instruction->operand->value.type : nullptr;
+    if (ret_type_has_bits && !handle_is_ptr(ret_type)) {
+        // It's a scalar, so it didn't get written to the result ptr. Do that before the atomic rmw.
+        LLVMValueRef result_ptr = LLVMBuildLoad(g->builder, g->cur_ret_ptr_ptr, "");
+        LLVMBuildStore(g->builder, ir_llvm_value(g, instruction->operand), result_ptr);
+    }
+
+    // Prepare to be suspended. We might end up not having to suspend though.
+    LLVMBasicBlockRef resume_bb = LLVMAppendBasicBlock(g->cur_fn_val, "ReturnResume");
+    size_t new_block_index = g->cur_resume_block_count;
+    g->cur_resume_block_count += 1;
+    LLVMValueRef new_block_index_val = LLVMConstInt(usize_type_ref, new_block_index, false);
+    LLVMAddCase(g->cur_async_switch_instr, new_block_index_val, resume_bb);
+    LLVMBuildStore(g->builder, new_block_index_val, g->cur_async_resume_index_ptr);
+
+    LLVMValueRef zero = LLVMConstNull(usize_type_ref);
+    LLVMValueRef all_ones = LLVMConstAllOnes(usize_type_ref);
+    LLVMValueRef prev_val = LLVMBuildAtomicRMW(g->builder, LLVMAtomicRMWBinOpXchg, g->cur_async_awaiter_ptr,
+            all_ones, LLVMAtomicOrderingAcquire, g->is_single_threaded);
+
+    LLVMBasicBlockRef bad_return_block = LLVMAppendBasicBlock(g->cur_fn_val, "BadReturn");
+    LLVMBasicBlockRef early_return_block = LLVMAppendBasicBlock(g->cur_fn_val, "EarlyReturn");
+    LLVMBasicBlockRef resume_them_block = LLVMAppendBasicBlock(g->cur_fn_val, "ResumeThem");
+
+    LLVMValueRef switch_instr = LLVMBuildSwitch(g->builder, prev_val, resume_them_block, 2);
+    LLVMBasicBlockRef switch_bb = LLVMGetInsertBlock(g->builder);
+
+    LLVMAddCase(switch_instr, zero, early_return_block);
+    LLVMAddCase(switch_instr, all_ones, bad_return_block);
+
+    // Something has gone horribly wrong, and this is an invalid second return.
+    LLVMPositionBuilderAtEnd(g->builder, bad_return_block);
+    gen_assertion(g, PanicMsgIdBadReturn, &instruction->base);
+
+    // The caller has not done an await yet. So we suspend at the return instruction, until a
+    // cancel or await is performed.
+    LLVMPositionBuilderAtEnd(g->builder, early_return_block);
+    LLVMBuildRetVoid(g->builder);
+
+    // Add a safety check for when getting resumed by the awaiter.
+    LLVMPositionBuilderAtEnd(g->builder, resume_bb);
+    LLVMBasicBlockRef after_resume_block = LLVMGetInsertBlock(g->builder);
+    gen_assert_resume_id(g, &instruction->base, ResumeIdAwaitEarlyReturn, PanicMsgIdResumedFnPendingAwait,
+            resume_them_block);
+
+    // We need to resume the caller by tail calling them.
+    // That will happen when rendering IrInstructionReturn after running the defers/errdefers.
+    // We either got here from Entry (function call) or from the switch above
+    g->cur_async_prev_val = LLVMBuildPhi(g->builder, usize_type_ref, "");
+    LLVMValueRef incoming_values[] = { LLVMGetParam(g->cur_fn_val, 1), prev_val };
+    LLVMBasicBlockRef incoming_blocks[] = { after_resume_block, switch_bb };
+    LLVMAddIncoming(g->cur_async_prev_val, incoming_values, incoming_blocks, 2);
+
+    return nullptr;
+}
+
+static LLVMValueRef ir_render_return(CodeGen *g, IrExecutable *executable, IrInstructionReturn *instruction) {
     if (fn_is_async(g->cur_fn)) {
         LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
-        bool ret_type_has_bits = return_instruction->value != nullptr &&
-            type_has_bits(return_instruction->value->value.type);
-        ZigType *ret_type = ret_type_has_bits ? return_instruction->value->value.type : nullptr;
+        bool ret_type_has_bits = instruction->operand != nullptr &&
+            type_has_bits(instruction->operand->value.type);
+        ZigType *ret_type = ret_type_has_bits ? instruction->operand->value.type : nullptr;
 
-        if (ir_want_runtime_safety(g, &return_instruction->base)) {
+        if (ir_want_runtime_safety(g, &instruction->base)) {
             LLVMValueRef new_resume_index = LLVMConstAllOnes(usize_type_ref);
             LLVMBuildStore(g->builder, new_resume_index, g->cur_async_resume_index_ptr);
         }
 
-        LLVMValueRef result_ptr_as_usize;
         if (ret_type_has_bits) {
-            LLVMValueRef result_ptr_ptr = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, coro_ret_start, "");
-            LLVMValueRef result_ptr = LLVMBuildLoad(g->builder, result_ptr_ptr, "");
-            if (!handle_is_ptr(ret_type)) {
-                // It's a scalar, so it didn't get written to the result ptr. Do that now.
-                LLVMBuildStore(g->builder, ir_llvm_value(g, return_instruction->value), result_ptr);
-            }
-            result_ptr_as_usize = LLVMBuildPtrToInt(g->builder, result_ptr, usize_type_ref, "");
-        } else {
-            // For debug safety, this value has to be anything other than all 1's, which signals
-            // that it is being resumed. 0 is a bad choice since null pointers are special.
-            result_ptr_as_usize = ir_want_runtime_safety(g, &return_instruction->base) ?
-                LLVMConstInt(usize_type_ref, 1, false) : LLVMGetUndef(usize_type_ref);
+            // If the awaiter result pointer is non-null, we need to copy the result to there.
+            LLVMBasicBlockRef copy_block = LLVMAppendBasicBlock(g->cur_fn_val, "CopyResult");
+            LLVMBasicBlockRef copy_end_block = LLVMAppendBasicBlock(g->cur_fn_val, "CopyResultEnd");
+            LLVMValueRef awaiter_ret_ptr_ptr = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, coro_ret_start + 1, "");
+            LLVMValueRef awaiter_ret_ptr = LLVMBuildLoad(g->builder, awaiter_ret_ptr_ptr, "");
+            LLVMValueRef zero_ptr = LLVMConstNull(LLVMTypeOf(awaiter_ret_ptr));
+            LLVMValueRef need_copy_bit = LLVMBuildICmp(g->builder, LLVMIntNE, awaiter_ret_ptr, zero_ptr, "");
+            LLVMBuildCondBr(g->builder, need_copy_bit, copy_block, copy_end_block);
+
+            LLVMPositionBuilderAtEnd(g->builder, copy_block);
+            LLVMValueRef ret_ptr = LLVMBuildLoad(g->builder, g->cur_ret_ptr_ptr, "");
+            LLVMTypeRef ptr_u8 = LLVMPointerType(LLVMInt8Type(), 0);
+            LLVMValueRef dest_ptr_casted = LLVMBuildBitCast(g->builder, awaiter_ret_ptr, ptr_u8, "");
+            LLVMValueRef src_ptr_casted = LLVMBuildBitCast(g->builder, ret_ptr, ptr_u8, "");
+            bool is_volatile = false;
+            uint32_t abi_align = get_abi_alignment(g, ret_type);
+            LLVMValueRef byte_count_val = LLVMConstInt(usize_type_ref, type_size(g, ret_type), false);
+            ZigLLVMBuildMemCpy(g->builder,
+                    dest_ptr_casted, abi_align,
+                    src_ptr_casted, abi_align, byte_count_val, is_volatile);
+            LLVMBuildBr(g->builder, copy_end_block);
+
+            LLVMPositionBuilderAtEnd(g->builder, copy_end_block);
         }
-        LLVMValueRef zero = LLVMConstNull(usize_type_ref);
-        LLVMValueRef all_ones = LLVMConstAllOnes(usize_type_ref);
-        LLVMValueRef prev_val = LLVMBuildAtomicRMW(g->builder, LLVMAtomicRMWBinOpXchg, g->cur_async_awaiter_ptr,
-                all_ones, LLVMAtomicOrderingMonotonic, g->is_single_threaded);
-
-        LLVMBasicBlockRef bad_return_block = LLVMAppendBasicBlock(g->cur_fn_val, "BadReturn");
-        LLVMBasicBlockRef early_return_block = LLVMAppendBasicBlock(g->cur_fn_val, "EarlyReturn");
-        LLVMBasicBlockRef resume_them_block = LLVMAppendBasicBlock(g->cur_fn_val, "ResumeThem");
-
-        LLVMValueRef switch_instr = LLVMBuildSwitch(g->builder, prev_val, resume_them_block, 2);
-
-        LLVMAddCase(switch_instr, zero, early_return_block);
-        LLVMAddCase(switch_instr, all_ones, bad_return_block);
-
-        // Something has gone horribly wrong, and this is an invalid second return.
-        LLVMPositionBuilderAtEnd(g->builder, bad_return_block);
-        gen_assertion(g, PanicMsgIdBadReturn, &return_instruction->base);
-
-        // The caller will deal with fetching the result - we're done.
-        LLVMPositionBuilderAtEnd(g->builder, early_return_block);
-        LLVMBuildRetVoid(g->builder);
 
         // We need to resume the caller by tail calling them.
-        LLVMPositionBuilderAtEnd(g->builder, resume_them_block);
         ZigType *any_frame_type = get_any_frame_type(g, ret_type);
-        LLVMValueRef their_frame_ptr = LLVMBuildIntToPtr(g->builder, prev_val,
+        LLVMValueRef their_frame_ptr = LLVMBuildIntToPtr(g->builder, g->cur_async_prev_val,
                 get_llvm_type(g, any_frame_type), "");
-        LLVMValueRef fn_ptr_ptr = LLVMBuildStructGEP(g->builder, their_frame_ptr, coro_fn_ptr_index, "");
-        LLVMValueRef awaiter_fn = LLVMBuildLoad(g->builder, fn_ptr_ptr, "");
-        LLVMValueRef args[] = {their_frame_ptr, result_ptr_as_usize};
-        LLVMValueRef call_inst = ZigLLVMBuildCall(g->builder, awaiter_fn, args, 2, LLVMFastCallConv,
-                ZigLLVM_FnInlineAuto, "");
+        LLVMValueRef call_inst = gen_resume(g, nullptr, their_frame_ptr, ResumeIdReturn, nullptr);
         ZigLLVMSetTailCall(call_inst);
         LLVMBuildRetVoid(g->builder);
 
         return nullptr;
     }
     if (want_first_arg_sret(g, &g->cur_fn->type_entry->data.fn.fn_type_id)) {
-        if (return_instruction->value == nullptr) {
+        if (instruction->operand == nullptr) {
             LLVMBuildRetVoid(g->builder);
             return nullptr;
         }
         assert(g->cur_ret_ptr);
-        src_assert(return_instruction->value->value.special != ConstValSpecialRuntime,
-                return_instruction->base.source_node);
-        LLVMValueRef value = ir_llvm_value(g, return_instruction->value);
-        ZigType *return_type = return_instruction->value->value.type;
+        src_assert(instruction->operand->value.special != ConstValSpecialRuntime,
+                instruction->base.source_node);
+        LLVMValueRef value = ir_llvm_value(g, instruction->operand);
+        ZigType *return_type = instruction->operand->value.type;
         gen_assign_raw(g, g->cur_ret_ptr, get_pointer_to_type(g, return_type, false), value);
         LLVMBuildRetVoid(g->builder);
     } else if (g->cur_fn->type_entry->data.fn.fn_type_id.cc != CallingConventionAsync &&
             handle_is_ptr(g->cur_fn->type_entry->data.fn.fn_type_id.return_type))
     {
-        if (return_instruction->value == nullptr) {
+        if (instruction->operand == nullptr) {
             LLVMValueRef by_val_value = gen_load_untyped(g, g->cur_ret_ptr, 0, false, "");
             LLVMBuildRet(g->builder, by_val_value);
         } else {
-            LLVMValueRef value = ir_llvm_value(g, return_instruction->value);
+            LLVMValueRef value = ir_llvm_value(g, instruction->operand);
             LLVMValueRef by_val_value = gen_load_untyped(g, value, 0, false, "");
             LLVMBuildRet(g->builder, by_val_value);
         }
-    } else if (return_instruction->value == nullptr) {
+    } else if (instruction->operand == nullptr) {
         LLVMBuildRetVoid(g->builder);
     } else {
-        LLVMValueRef value = ir_llvm_value(g, return_instruction->value);
+        LLVMValueRef value = ir_llvm_value(g, instruction->operand);
         LLVMBuildRet(g->builder, value);
     }
     return nullptr;
@@ -3417,7 +3526,7 @@ static void set_call_instr_sret(CodeGen *g, LLVMValueRef call_instr) {
 static void render_async_spills(CodeGen *g) {
     ZigType *fn_type = g->cur_fn->type_entry;
     ZigType *import = get_scope_import(&g->cur_fn->fndef_scope->base);
-    uint32_t async_var_index = frame_index_arg(g, &fn_type->data.fn.fn_type_id);
+    uint32_t async_var_index = frame_index_arg(g, fn_type->data.fn.fn_type_id.return_type);
     for (size_t var_i = 0; var_i < g->cur_fn->variable_list.length; var_i += 1) {
         ZigVar *var = g->cur_fn->variable_list.at(var_i);
 
@@ -3450,7 +3559,7 @@ static void render_async_spills(CodeGen *g) {
         }
     }
     // label (grep this): [coro_frame_struct_layout]
-    if (codegen_fn_has_err_ret_tracing_stack(g, g->cur_fn)) {
+    if (codegen_fn_has_err_ret_tracing_stack(g, g->cur_fn, true)) {
         async_var_index += 2;
     }
     for (size_t alloca_i = 0; alloca_i < g->cur_fn->alloca_gen_list.length; alloca_i += 1) {
@@ -3553,7 +3662,7 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
 
             if (ret_has_bits) {
                 // Use the result location which is inside the frame if this is an async call.
-                ret_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc, coro_ret_start + 1, "");
+                ret_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc, coro_ret_start + 2, "");
             }
         } else {
             LLVMValueRef frame_slice_ptr = ir_llvm_value(g, instruction->new_stack);
@@ -3590,17 +3699,26 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
         frame_result_loc = ir_llvm_value(g, instruction->frame_result_loc);
         awaiter_init_val = LLVMBuildPtrToInt(g->builder, g->cur_ret_ptr, usize_type_ref, ""); // caller's own frame pointer
         if (ret_has_bits) {
-            if (result_loc != nullptr) {
+            if (result_loc == nullptr) {
+                // return type is a scalar, but we still need a pointer to it. Use the async fn frame.
+                ret_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc, coro_ret_start + 2, "");
+            } else {
                 // Use the call instruction's result location.
                 ret_ptr = result_loc;
-            } else {
-                // return type is a scalar, but we still need a pointer to it. Use the async fn frame.
-                ret_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc, coro_ret_start + 1, "");
             }
+
+            // Store a zero in the awaiter's result ptr to indicate we do not need a copy made.
+            LLVMValueRef awaiter_ret_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc, coro_ret_start + 1, "");
+            LLVMValueRef zero_ptr = LLVMConstNull(LLVMGetElementType(LLVMTypeOf(awaiter_ret_ptr)));
+            LLVMBuildStore(g->builder, zero_ptr, awaiter_ret_ptr);
         }
 
-        // even if prefix_arg_err_ret_stack is true, let the async function do its
-        // error return tracing normally, and then we'll invoke merge_error_return_traces like normal.
+        if (prefix_arg_err_ret_stack) {
+            LLVMValueRef err_ret_trace_ptr_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc,
+                    frame_index_trace_arg(g, src_return_type), "");
+            LLVMValueRef my_err_ret_trace_val = get_cur_err_ret_trace_val(g, instruction->base.scope);
+            LLVMBuildStore(g->builder, my_err_ret_trace_val, err_ret_trace_ptr_ptr);
+        }
     }
     if (instruction->is_async || callee_is_async) {
         assert(frame_result_loc != nullptr);
@@ -3652,7 +3770,7 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
     LLVMValueRef result;
 
     if (instruction->is_async || callee_is_async) {
-        uint32_t arg_start_i = frame_index_arg(g, &fn_type->data.fn.fn_type_id);
+        uint32_t arg_start_i = frame_index_arg(g, fn_type->data.fn.fn_type_id.return_type);
 
         LLVMValueRef casted_frame;
         if (instruction->new_stack != nullptr) {
@@ -3678,8 +3796,7 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
         }
     }
     if (instruction->is_async) {
-        LLVMValueRef args[] = {frame_result_loc, LLVMGetUndef(usize_type_ref)};
-        ZigLLVMBuildCall(g->builder, fn_val, args, 2, llvm_cc, fn_inline, "");
+        gen_resume(g, fn_val, frame_result_loc, ResumeIdCall, nullptr);
         if (instruction->new_stack != nullptr) {
             return frame_result_loc;
         }
@@ -3694,36 +3811,23 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
         LLVMAddCase(g->cur_async_switch_instr, new_block_index_val, call_bb);
 
         LLVMBuildStore(g->builder, new_block_index_val, g->cur_async_resume_index_ptr);
-        LLVMValueRef args[] = {frame_result_loc, LLVMGetUndef(usize_type_ref)};
-        LLVMValueRef call_inst = ZigLLVMBuildCall(g->builder, fn_val, args, 2, llvm_cc, fn_inline, "");
+
+        LLVMValueRef call_inst = gen_resume(g, fn_val, frame_result_loc, ResumeIdCall, nullptr);
         ZigLLVMSetTailCall(call_inst);
         LLVMBuildRetVoid(g->builder);
 
         LLVMPositionBuilderAtEnd(g->builder, call_bb);
-        if (ir_want_runtime_safety(g, &instruction->base)) {
-            LLVMBasicBlockRef bad_resume_block = LLVMAppendBasicBlock(g->cur_fn_val, "BadResume");
-            LLVMBasicBlockRef ok_resume_block = LLVMAppendBasicBlock(g->cur_fn_val, "OkResume");
-            LLVMValueRef arg_val = LLVMGetParam(g->cur_fn_val, 1);
-            LLVMValueRef all_ones = LLVMConstAllOnes(usize_type_ref);
-            LLVMValueRef ok_bit = LLVMBuildICmp(g->builder, LLVMIntNE, arg_val, all_ones, "");
-            LLVMBuildCondBr(g->builder, ok_bit, ok_resume_block, bad_resume_block);
-
-            LLVMPositionBuilderAtEnd(g->builder, bad_resume_block);
-            gen_safety_crash(g, PanicMsgIdResumedAnAwaitingFn);
-
-            LLVMPositionBuilderAtEnd(g->builder, ok_resume_block);
-        }
-
+        gen_assert_resume_id(g, &instruction->base, ResumeIdReturn, PanicMsgIdResumedAnAwaitingFn, nullptr);
         render_async_var_decls(g, instruction->base.scope);
 
-        if (type_has_bits(src_return_type)) {
-            LLVMValueRef spilled_result_ptr = LLVMGetParam(g->cur_fn_val, 1);
-            LLVMValueRef casted_spilled_result_ptr = LLVMBuildIntToPtr(g->builder, spilled_result_ptr,
-                    get_llvm_type(g, ptr_result_type), "");
-            return get_handle_value(g, casted_spilled_result_ptr, src_return_type, ptr_result_type);
-        } else {
+        if (!type_has_bits(src_return_type))
             return nullptr;
-        }
+
+        if (result_loc != nullptr) 
+            return get_handle_value(g, result_loc, src_return_type, ptr_result_type);
+
+        LLVMValueRef result_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc, coro_ret_start + 2, "");
+        return LLVMBuildLoad(g->builder, result_ptr, "");
     }
 
     if (instruction->new_stack == nullptr) {
@@ -5191,8 +5295,9 @@ static LLVMValueRef ir_render_suspend_finish(CodeGen *g, IrExecutable *executabl
     return nullptr;
 }
 
-static LLVMValueRef ir_render_await(CodeGen *g, IrExecutable *executable, IrInstructionAwait *instruction) {
+static LLVMValueRef ir_render_await(CodeGen *g, IrExecutable *executable, IrInstructionAwaitGen *instruction) {
     LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+    LLVMValueRef zero = LLVMConstNull(usize_type_ref);
     LLVMValueRef target_frame_ptr = ir_llvm_value(g, instruction->frame);
     ZigType *result_type = instruction->base.value.type;
     ZigType *ptr_result_type = get_pointer_to_type(g, result_type, true);
@@ -5208,86 +5313,75 @@ static LLVMValueRef ir_render_await(CodeGen *g, IrExecutable *executable, IrInst
     // At this point resuming the function will do the correct thing.
     // This code is as if it is running inside the suspend block.
 
+    // supply the awaiter return pointer
+    LLVMValueRef result_loc = (instruction->result_loc == nullptr) ?
+        nullptr : ir_llvm_value(g, instruction->result_loc);
+    if (type_has_bits(result_type)) {
+        LLVMValueRef awaiter_ret_ptr_ptr = LLVMBuildStructGEP(g->builder, target_frame_ptr, coro_ret_start + 1, "");
+        if (result_loc == nullptr) {
+            // no copy needed
+            LLVMBuildStore(g->builder, zero, awaiter_ret_ptr_ptr);
+        } else {
+            LLVMBuildStore(g->builder, result_loc, awaiter_ret_ptr_ptr);
+        }
+    }
+
+    // supply the error return trace pointer
+    LLVMValueRef my_err_ret_trace_val = get_cur_err_ret_trace_val(g, instruction->base.scope);
+    if (my_err_ret_trace_val != nullptr) {
+        LLVMValueRef err_ret_trace_ptr_ptr = LLVMBuildStructGEP(g->builder, target_frame_ptr,
+                frame_index_trace_arg(g, result_type), "");
+        LLVMBuildStore(g->builder, my_err_ret_trace_val, err_ret_trace_ptr_ptr);
+    }
+
     // caller's own frame pointer
     LLVMValueRef awaiter_init_val = LLVMBuildPtrToInt(g->builder, g->cur_ret_ptr, usize_type_ref, "");
     LLVMValueRef awaiter_ptr = LLVMBuildStructGEP(g->builder, target_frame_ptr, coro_awaiter_index, "");
-    LLVMValueRef result_ptr_as_usize;
-    if (type_has_bits(result_type)) {
-        LLVMValueRef result_ptr_ptr = LLVMBuildStructGEP(g->builder, target_frame_ptr, coro_ret_start, "");
-        LLVMValueRef result_ptr = LLVMBuildLoad(g->builder, result_ptr_ptr, "");
-        result_ptr_as_usize = LLVMBuildPtrToInt(g->builder, result_ptr, usize_type_ref, "");
-    } else {
-        result_ptr_as_usize = LLVMGetUndef(usize_type_ref);
-    }
     LLVMValueRef prev_val = LLVMBuildAtomicRMW(g->builder, LLVMAtomicRMWBinOpXchg, awaiter_ptr, awaiter_init_val,
-            LLVMAtomicOrderingMonotonic, g->is_single_threaded);
+            LLVMAtomicOrderingRelease, g->is_single_threaded);
 
     LLVMBasicBlockRef bad_await_block = LLVMAppendBasicBlock(g->cur_fn_val, "BadAwait");
     LLVMBasicBlockRef complete_suspend_block = LLVMAppendBasicBlock(g->cur_fn_val, "CompleteSuspend");
+    LLVMBasicBlockRef early_return_block = LLVMAppendBasicBlock(g->cur_fn_val, "EarlyReturn");
 
-    LLVMValueRef zero = LLVMConstNull(usize_type_ref);
     LLVMValueRef all_ones = LLVMConstAllOnes(usize_type_ref);
     LLVMValueRef switch_instr = LLVMBuildSwitch(g->builder, prev_val, bad_await_block, 2);
-    LLVMBasicBlockRef predecessor_bb = LLVMGetInsertBlock(g->builder);
 
     LLVMAddCase(switch_instr, zero, complete_suspend_block);
-
-    // Early return: The async function has already completed. No need to suspend.
-    LLVMAddCase(switch_instr, all_ones, resume_bb);
+    LLVMAddCase(switch_instr, all_ones, early_return_block);
 
     // We discovered that another awaiter was already here.
     LLVMPositionBuilderAtEnd(g->builder, bad_await_block);
     gen_assertion(g, PanicMsgIdBadAwait, &instruction->base);
 
+    // Early return: The async function has already completed, but it is suspending before setting the result,
+    // populating the error return trace if applicable, and running the defers.
+    // Tail resume it now, so that it can complete.
+    LLVMPositionBuilderAtEnd(g->builder, early_return_block);
+    LLVMValueRef call_inst = gen_resume(g, nullptr, target_frame_ptr, ResumeIdAwaitEarlyReturn, awaiter_init_val);
+    ZigLLVMSetTailCall(call_inst);
+    LLVMBuildRetVoid(g->builder);
+
     // Rely on the target to resume us from suspension.
     LLVMPositionBuilderAtEnd(g->builder, complete_suspend_block);
     LLVMBuildRetVoid(g->builder);
 
     LLVMPositionBuilderAtEnd(g->builder, resume_bb);
-    // We either got here from Entry (function call) or from the switch above
-    LLVMValueRef spilled_result_ptr = LLVMBuildPhi(g->builder, usize_type_ref, "");
-    LLVMValueRef incoming_values[] = { LLVMGetParam(g->cur_fn_val, 1), result_ptr_as_usize };
-    LLVMBasicBlockRef incoming_blocks[] = { g->cur_preamble_llvm_block, predecessor_bb };
-    LLVMAddIncoming(spilled_result_ptr, incoming_values, incoming_blocks, 2);
-
-    if (ir_want_runtime_safety(g, &instruction->base)) {
-        LLVMBasicBlockRef bad_resume_block = LLVMAppendBasicBlock(g->cur_fn_val, "BadResume");
-        LLVMBasicBlockRef ok_resume_block = LLVMAppendBasicBlock(g->cur_fn_val, "OkResume");
-        LLVMValueRef all_ones = LLVMConstAllOnes(usize_type_ref);
-        LLVMValueRef ok_bit = LLVMBuildICmp(g->builder, LLVMIntNE, spilled_result_ptr, all_ones, "");
-        LLVMBuildCondBr(g->builder, ok_bit, ok_resume_block, bad_resume_block);
-
-        LLVMPositionBuilderAtEnd(g->builder, bad_resume_block);
-        gen_safety_crash(g, PanicMsgIdResumedAnAwaitingFn);
-
-        LLVMPositionBuilderAtEnd(g->builder, ok_resume_block);
-    }
-
-    render_async_var_decls(g, instruction->base.scope);
-
-    if (type_has_bits(result_type)) {
-        LLVMValueRef casted_spilled_result_ptr = LLVMBuildIntToPtr(g->builder, spilled_result_ptr,
-                get_llvm_type(g, ptr_result_type), "");
-        return get_handle_value(g, casted_spilled_result_ptr, result_type, ptr_result_type);
-    } else {
-        return nullptr;
+    gen_assert_resume_id(g, &instruction->base, ResumeIdReturn, PanicMsgIdResumedAnAwaitingFn, nullptr);
+    if (type_has_bits(result_type) && result_loc != nullptr) {
+        return get_handle_value(g, result_loc, result_type, ptr_result_type);
     }
+    return nullptr;
 }
 
 static LLVMValueRef ir_render_coro_resume(CodeGen *g, IrExecutable *executable,
         IrInstructionCoroResume *instruction)
 {
-    LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
     LLVMValueRef frame = ir_llvm_value(g, instruction->frame);
     ZigType *frame_type = instruction->frame->value.type;
     assert(frame_type->id == ZigTypeIdAnyFrame);
-    LLVMValueRef fn_ptr_ptr = LLVMBuildStructGEP(g->builder, frame, coro_fn_ptr_index, "");
-    LLVMValueRef uncasted_fn_val = LLVMBuildLoad(g->builder, fn_ptr_ptr, "");
-    LLVMValueRef fn_val = LLVMBuildIntToPtr(g->builder, uncasted_fn_val, g->anyframe_fn_type, "");
-    LLVMValueRef arg_val = ir_want_runtime_safety(g, &instruction->base) ?
-        LLVMConstAllOnes(usize_type_ref) : LLVMGetUndef(usize_type_ref);
-    LLVMValueRef args[] = {frame, arg_val};
-    ZigLLVMBuildCall(g->builder, fn_val, args, 2, LLVMFastCallConv, ZigLLVM_FnInlineAuto, "");
+
+    gen_resume(g, nullptr, frame, ResumeIdManual, nullptr);
     return nullptr;
 }
 
@@ -5383,7 +5477,6 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
         case IrInstructionIdImplicitCast:
         case IrInstructionIdResolveResult:
         case IrInstructionIdResetResult:
-        case IrInstructionIdResultPtr:
         case IrInstructionIdContainerInitList:
         case IrInstructionIdSliceSrc:
         case IrInstructionIdRef:
@@ -5393,10 +5486,13 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
         case IrInstructionIdFrameType:
         case IrInstructionIdFrameSizeSrc:
         case IrInstructionIdAllocaGen:
+        case IrInstructionIdAwaitSrc:
             zig_unreachable();
 
         case IrInstructionIdDeclVarGen:
             return ir_render_decl_var(g, executable, (IrInstructionDeclVarGen *)instruction);
+        case IrInstructionIdReturnBegin:
+            return ir_render_return_begin(g, executable, (IrInstructionReturnBegin *)instruction);
         case IrInstructionIdReturn:
             return ir_render_return(g, executable, (IrInstructionReturn *)instruction);
         case IrInstructionIdBinOp:
@@ -5547,8 +5643,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_coro_resume(g, executable, (IrInstructionCoroResume *)instruction);
         case IrInstructionIdFrameSizeGen:
             return ir_render_frame_size(g, executable, (IrInstructionFrameSizeGen *)instruction);
-        case IrInstructionIdAwait:
-            return ir_render_await(g, executable, (IrInstructionAwait *)instruction);
+        case IrInstructionIdAwaitGen:
+            return ir_render_await(g, executable, (IrInstructionAwaitGen *)instruction);
     }
     zig_unreachable();
 }
@@ -6777,16 +6873,19 @@ static void do_code_gen(CodeGen *g) {
             g->cur_async_awaiter_ptr = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, coro_awaiter_index, "");
             LLVMValueRef resume_index_ptr = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, coro_resume_index, "");
             g->cur_async_resume_index_ptr = resume_index_ptr;
-            LLVMValueRef err_ret_trace_val = nullptr;
-            uint32_t trace_field_index;
+
+            if (type_has_bits(fn_type_id->return_type)) {
+                g->cur_ret_ptr_ptr = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, coro_ret_start, "");
+            }
             if (codegen_fn_has_err_ret_tracing_arg(g, fn_type_id->return_type)) {
-                trace_field_index = frame_index_trace_arg(g, fn_type_id);
-                err_ret_trace_val = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, trace_field_index, "");
-                g->cur_err_ret_trace_val_arg = err_ret_trace_val;
-            } else if (codegen_fn_has_err_ret_tracing_stack(g, fn_table_entry)) {
-                trace_field_index = frame_index_trace_stack(g, fn_type_id);
-                err_ret_trace_val = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, trace_field_index, "");
-                g->cur_err_ret_trace_val_stack = err_ret_trace_val;
+                uint32_t trace_field_index = frame_index_trace_arg(g, fn_type_id->return_type);
+                g->cur_err_ret_trace_val_arg = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr, trace_field_index, "");
+            }
+            uint32_t trace_field_index_stack = UINT32_MAX;
+            if (codegen_fn_has_err_ret_tracing_stack(g, fn_table_entry, true)) {
+                trace_field_index_stack = frame_index_trace_stack(g, fn_type_id);
+                g->cur_err_ret_trace_val_stack = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr,
+                        trace_field_index_stack, "");
             }
 
             LLVMValueRef resume_index = LLVMBuildLoad(g->builder, resume_index_ptr, "");
@@ -6798,11 +6897,11 @@ static void do_code_gen(CodeGen *g) {
             LLVMAddCase(switch_instr, zero, entry_block->llvm_block);
             g->cur_resume_block_count += 1;
             LLVMPositionBuilderAtEnd(g->builder, entry_block->llvm_block);
-            if (err_ret_trace_val != nullptr) {
+            if (trace_field_index_stack != UINT32_MAX) {
                 LLVMValueRef trace_field_ptr = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr,
-                        trace_field_index, "");
+                        trace_field_index_stack, "");
                 LLVMValueRef trace_field_addrs = LLVMBuildStructGEP(g->builder, g->cur_ret_ptr,
-                        trace_field_index + 1, "");
+                        trace_field_index_stack + 1, "");
 
                 LLVMValueRef index_ptr = LLVMBuildStructGEP(g->builder, trace_field_ptr, 0, "");
                 LLVMBuildStore(g->builder, zero, index_ptr);
@@ -9725,7 +9824,7 @@ bool codegen_fn_has_err_ret_tracing_arg(CodeGen *g, ZigType *return_type) {
          return_type->id == ZigTypeIdErrorSet);
 }
 
-bool codegen_fn_has_err_ret_tracing_stack(CodeGen *g, ZigFn *fn) {
+bool codegen_fn_has_err_ret_tracing_stack(CodeGen *g, ZigFn *fn, bool is_async) {
     return g->have_err_ret_tracing && fn->calls_or_awaits_errorable_fn &&
-        !codegen_fn_has_err_ret_tracing_arg(g, fn->type_entry->data.fn.fn_type_id.return_type);
+        (is_async || !codegen_fn_has_err_ret_tracing_arg(g, fn->type_entry->data.fn.fn_type_id.return_type));
 }
src/codegen.hpp
@@ -62,6 +62,6 @@ TargetSubsystem detect_subsystem(CodeGen *g);
 
 void codegen_release_caches(CodeGen *codegen);
 bool codegen_fn_has_err_ret_tracing_arg(CodeGen *g, ZigType *return_type);
-bool codegen_fn_has_err_ret_tracing_stack(CodeGen *g, ZigFn *fn);
+bool codegen_fn_has_err_ret_tracing_stack(CodeGen *g, ZigFn *fn, bool is_async);
 
 #endif
src/ir.cpp
@@ -525,6 +525,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionReturn *) {
     return IrInstructionIdReturn;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionReturnBegin *) {
+    return IrInstructionIdReturnBegin;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionCast *) {
     return IrInstructionIdCast;
 }
@@ -945,10 +949,6 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionResetResult *) {
     return IrInstructionIdResetResult;
 }
 
-static constexpr IrInstructionId ir_instruction_id(IrInstructionResultPtr *) {
-    return IrInstructionIdResultPtr;
-}
-
 static constexpr IrInstructionId ir_instruction_id(IrInstructionPtrOfArrayToSlice *) {
     return IrInstructionIdPtrOfArrayToSlice;
 }
@@ -1049,8 +1049,12 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionSuspendFinish *)
     return IrInstructionIdSuspendFinish;
 }
 
-static constexpr IrInstructionId ir_instruction_id(IrInstructionAwait *) {
-    return IrInstructionIdAwait;
+static constexpr IrInstructionId ir_instruction_id(IrInstructionAwaitSrc *) {
+    return IrInstructionIdAwaitSrc;
+}
+
+static constexpr IrInstructionId ir_instruction_id(IrInstructionAwaitGen *) {
+    return IrInstructionIdAwaitGen;
 }
 
 static constexpr IrInstructionId ir_instruction_id(IrInstructionCoroResume *) {
@@ -1109,18 +1113,32 @@ static IrInstruction *ir_build_cond_br(IrBuilder *irb, Scope *scope, AstNode *so
 }
 
 static IrInstruction *ir_build_return(IrBuilder *irb, Scope *scope, AstNode *source_node,
-        IrInstruction *return_value)
+        IrInstruction *operand)
 {
     IrInstructionReturn *return_instruction = ir_build_instruction<IrInstructionReturn>(irb, scope, source_node);
     return_instruction->base.value.type = irb->codegen->builtin_types.entry_unreachable;
     return_instruction->base.value.special = ConstValSpecialStatic;
-    return_instruction->value = return_value;
+    return_instruction->operand = operand;
+
+    if (operand != nullptr) ir_ref_instruction(operand, irb->current_basic_block);
+
+    return &return_instruction->base;
+}
+
+static IrInstruction *ir_build_return_begin(IrBuilder *irb, Scope *scope, AstNode *source_node,
+        IrInstruction *operand)
+{
+    IrInstructionReturnBegin *return_instruction = ir_build_instruction<IrInstructionReturnBegin>(irb, scope, source_node);
+    return_instruction->base.value.type = irb->codegen->builtin_types.entry_void;
+    return_instruction->base.value.special = ConstValSpecialStatic;
+    return_instruction->operand = operand;
 
-    if (return_value != nullptr) ir_ref_instruction(return_value, irb->current_basic_block);
+    ir_ref_instruction(operand, irb->current_basic_block);
 
     return &return_instruction->base;
 }
 
+
 static IrInstruction *ir_build_const_void(IrBuilder *irb, Scope *scope, AstNode *source_node) {
     IrInstructionConst *const_instruction = ir_build_instruction<IrInstructionConst>(irb, scope, source_node);
     const_instruction->base.value.type = irb->codegen->builtin_types.entry_void;
@@ -2525,11 +2543,12 @@ static IrInstruction *ir_build_align_of(IrBuilder *irb, Scope *scope, AstNode *s
 }
 
 static IrInstruction *ir_build_test_err_src(IrBuilder *irb, Scope *scope, AstNode *source_node,
-    IrInstruction *base_ptr, bool resolve_err_set)
+    IrInstruction *base_ptr, bool resolve_err_set, bool base_ptr_is_payload)
 {
     IrInstructionTestErrSrc *instruction = ir_build_instruction<IrInstructionTestErrSrc>(irb, scope, source_node);
     instruction->base_ptr = base_ptr;
     instruction->resolve_err_set = resolve_err_set;
+    instruction->base_ptr_is_payload = base_ptr_is_payload;
 
     ir_ref_instruction(base_ptr, irb->current_basic_block);
 
@@ -2971,18 +2990,6 @@ static IrInstruction *ir_build_reset_result(IrBuilder *irb, Scope *scope, AstNod
     return &instruction->base;
 }
 
-static IrInstruction *ir_build_result_ptr(IrBuilder *irb, Scope *scope, AstNode *source_node,
-        ResultLoc *result_loc, IrInstruction *result)
-{
-    IrInstructionResultPtr *instruction = ir_build_instruction<IrInstructionResultPtr>(irb, scope, source_node);
-    instruction->result_loc = result_loc;
-    instruction->result = result;
-
-    ir_ref_instruction(result, irb->current_basic_block);
-
-    return &instruction->base;
-}
-
 static IrInstruction *ir_build_opaque_type(IrBuilder *irb, Scope *scope, AstNode *source_node) {
     IrInstructionOpaqueType *instruction = ir_build_instruction<IrInstructionOpaqueType>(irb, scope, source_node);
 
@@ -3266,17 +3273,33 @@ static IrInstruction *ir_build_suspend_finish(IrBuilder *irb, Scope *scope, AstN
     return &instruction->base;
 }
 
-static IrInstruction *ir_build_await(IrBuilder *irb, Scope *scope, AstNode *source_node,
-        IrInstruction *frame)
+static IrInstruction *ir_build_await_src(IrBuilder *irb, Scope *scope, AstNode *source_node,
+        IrInstruction *frame, ResultLoc *result_loc)
 {
-    IrInstructionAwait *instruction = ir_build_instruction<IrInstructionAwait>(irb, scope, source_node);
+    IrInstructionAwaitSrc *instruction = ir_build_instruction<IrInstructionAwaitSrc>(irb, scope, source_node);
     instruction->frame = frame;
+    instruction->result_loc = result_loc;
 
     ir_ref_instruction(frame, irb->current_basic_block);
 
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_await_gen(IrAnalyze *ira, IrInstruction *source_instruction,
+        IrInstruction *frame, ZigType *result_type, IrInstruction *result_loc)
+{
+    IrInstructionAwaitGen *instruction = ir_build_instruction<IrInstructionAwaitGen>(&ira->new_irb,
+            source_instruction->scope, source_instruction->source_node);
+    instruction->base.value.type = result_type;
+    instruction->frame = frame;
+    instruction->result_loc = result_loc;
+
+    ir_ref_instruction(frame, ira->new_irb.current_basic_block);
+    if (result_loc != nullptr) ir_ref_instruction(result_loc, ira->new_irb.current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_coro_resume(IrBuilder *irb, Scope *scope, AstNode *source_node,
         IrInstruction *frame)
 {
@@ -3416,16 +3439,6 @@ static ScopeDeferExpr *get_scope_defer_expr(Scope *scope) {
     return nullptr;
 }
 
-static IrInstruction *ir_gen_async_return(IrBuilder *irb, Scope *scope, AstNode *node, IrInstruction *return_value,
-    bool is_generated_code)
-{
-    ir_mark_gen(ir_build_add_implicit_return_type(irb, scope, node, return_value));
-
-    IrInstruction *return_inst = ir_build_return(irb, scope, node, return_value);
-    return_inst->is_gen = is_generated_code;
-    return return_inst;
-}
-
 static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node, LVal lval, ResultLoc *result_loc) {
     assert(node->type == NodeTypeReturnExpr);
 
@@ -3467,19 +3480,16 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                     return_value = ir_build_const_void(irb, scope, node);
                 }
 
+                ir_build_return_begin(irb, scope, node, return_value);
+
                 size_t defer_counts[2];
                 ir_count_defers(irb, scope, outer_scope, defer_counts);
                 bool have_err_defers = defer_counts[ReturnKindError] > 0;
                 if (have_err_defers || irb->codegen->have_err_ret_tracing) {
                     IrBasicBlock *err_block = ir_create_basic_block(irb, scope, "ErrRetErr");
                     IrBasicBlock *ok_block = ir_create_basic_block(irb, scope, "ErrRetOk");
-                    if (!have_err_defers) {
-                        ir_gen_defers_for_block(irb, scope, outer_scope, false);
-                    }
 
-                    IrInstruction *ret_ptr = ir_build_result_ptr(irb, scope, node, &result_loc_ret->base,
-                            return_value);
-                    IrInstruction *is_err = ir_build_test_err_src(irb, scope, node, ret_ptr, false);
+                    IrInstruction *is_err = ir_build_test_err_src(irb, scope, node, return_value, false, true);
 
                     bool should_inline = ir_should_inline(irb->exec, scope);
                     IrInstruction *is_comptime;
@@ -3493,28 +3503,26 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                     IrBasicBlock *ret_stmt_block = ir_create_basic_block(irb, scope, "RetStmt");
 
                     ir_set_cursor_at_end_and_append_block(irb, err_block);
-                    if (have_err_defers) {
-                        ir_gen_defers_for_block(irb, scope, outer_scope, true);
-                    }
                     if (irb->codegen->have_err_ret_tracing && !should_inline) {
                         ir_build_save_err_ret_addr(irb, scope, node);
                     }
+                    ir_gen_defers_for_block(irb, scope, outer_scope, true);
                     ir_build_br(irb, scope, node, ret_stmt_block, is_comptime);
 
                     ir_set_cursor_at_end_and_append_block(irb, ok_block);
-                    if (have_err_defers) {
-                        ir_gen_defers_for_block(irb, scope, outer_scope, false);
-                    }
+                    ir_gen_defers_for_block(irb, scope, outer_scope, false);
                     ir_build_br(irb, scope, node, ret_stmt_block, is_comptime);
 
                     ir_set_cursor_at_end_and_append_block(irb, ret_stmt_block);
-                    IrInstruction *result = ir_gen_async_return(irb, scope, node, return_value, false);
+                    ir_mark_gen(ir_build_add_implicit_return_type(irb, scope, node, return_value));
+                    IrInstruction *result = ir_build_return(irb, scope, node, return_value);
                     result_loc_ret->base.source_instruction = result;
                     return result;
                 } else {
                     // generate unconditional defers
                     ir_gen_defers_for_block(irb, scope, outer_scope, false);
-                    IrInstruction *result = ir_gen_async_return(irb, scope, node, return_value, false);
+                    ir_mark_gen(ir_build_add_implicit_return_type(irb, scope, node, return_value));
+                    IrInstruction *result = ir_build_return(irb, scope, node, return_value);
                     result_loc_ret->base.source_instruction = result;
                     return result;
                 }
@@ -3525,7 +3533,7 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                 IrInstruction *err_union_ptr = ir_gen_node_extra(irb, expr_node, scope, LValPtr, nullptr);
                 if (err_union_ptr == irb->codegen->invalid_instruction)
                     return irb->codegen->invalid_instruction;
-                IrInstruction *is_err_val = ir_build_test_err_src(irb, scope, node, err_union_ptr, true);
+                IrInstruction *is_err_val = ir_build_test_err_src(irb, scope, node, err_union_ptr, true, false);
 
                 IrBasicBlock *return_block = ir_create_basic_block(irb, scope, "ErrRetReturn");
                 IrBasicBlock *continue_block = ir_create_basic_block(irb, scope, "ErrRetContinue");
@@ -3539,10 +3547,10 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                 ir_mark_gen(ir_build_cond_br(irb, scope, node, is_err_val, return_block, continue_block, is_comptime));
 
                 ir_set_cursor_at_end_and_append_block(irb, return_block);
+                IrInstruction *err_val_ptr = ir_build_unwrap_err_code(irb, scope, node, err_union_ptr);
+                IrInstruction *err_val = ir_build_load_ptr(irb, scope, node, err_val_ptr);
+                ir_build_return_begin(irb, scope, node, err_val);
                 if (!ir_gen_defers_for_block(irb, scope, outer_scope, true)) {
-                    IrInstruction *err_val_ptr = ir_build_unwrap_err_code(irb, scope, node, err_union_ptr);
-                    IrInstruction *err_val = ir_build_load_ptr(irb, scope, node, err_val_ptr);
-
                     ResultLocReturn *result_loc_ret = allocate<ResultLocReturn>(1);
                     result_loc_ret->base.id = ResultLocIdReturn;
                     ir_build_reset_result(irb, scope, node, &result_loc_ret->base);
@@ -3551,7 +3559,8 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                     if (irb->codegen->have_err_ret_tracing && !should_inline) {
                         ir_build_save_err_ret_addr(irb, scope, node);
                     }
-                    IrInstruction *ret_inst = ir_gen_async_return(irb, scope, node, err_val, false);
+                    ir_mark_gen(ir_build_add_implicit_return_type(irb, scope, node, err_val));
+                    IrInstruction *ret_inst = ir_build_return(irb, scope, node, err_val);
                     result_loc_ret->base.source_instruction = ret_inst;
                 }
 
@@ -6081,7 +6090,8 @@ static IrInstruction *ir_gen_while_expr(IrBuilder *irb, Scope *scope, AstNode *n
                 LValPtr, nullptr);
         if (err_val_ptr == irb->codegen->invalid_instruction)
             return err_val_ptr;
-        IrInstruction *is_err = ir_build_test_err_src(irb, scope, node->data.while_expr.condition, err_val_ptr, true);
+        IrInstruction *is_err = ir_build_test_err_src(irb, scope, node->data.while_expr.condition, err_val_ptr,
+                true, false);
         IrBasicBlock *after_cond_block = irb->current_basic_block;
         IrInstruction *void_else_result = else_node ? nullptr : ir_mark_gen(ir_build_const_void(irb, scope, node));
         IrInstruction *cond_br_inst;
@@ -6897,7 +6907,7 @@ static IrInstruction *ir_gen_if_err_expr(IrBuilder *irb, Scope *scope, AstNode *
         return err_val_ptr;
 
     IrInstruction *err_val = ir_build_load_ptr(irb, scope, node, err_val_ptr);
-    IrInstruction *is_err = ir_build_test_err_src(irb, scope, node, err_val_ptr, true);
+    IrInstruction *is_err = ir_build_test_err_src(irb, scope, node, err_val_ptr, true, false);
 
     IrBasicBlock *ok_block = ir_create_basic_block(irb, scope, "TryOk");
     IrBasicBlock *else_block = ir_create_basic_block(irb, scope, "TryElse");
@@ -7513,7 +7523,7 @@ static IrInstruction *ir_gen_catch(IrBuilder *irb, Scope *parent_scope, AstNode
     if (err_union_ptr == irb->codegen->invalid_instruction)
         return irb->codegen->invalid_instruction;
 
-    IrInstruction *is_err = ir_build_test_err_src(irb, parent_scope, node, err_union_ptr, true);
+    IrInstruction *is_err = ir_build_test_err_src(irb, parent_scope, node, err_union_ptr, true, false);
 
     IrInstruction *is_comptime;
     if (ir_should_inline(irb->exec, parent_scope)) {
@@ -7830,7 +7840,9 @@ static IrInstruction *ir_gen_resume(IrBuilder *irb, Scope *scope, AstNode *node)
     return ir_build_coro_resume(irb, scope, node, target_inst);
 }
 
-static IrInstruction *ir_gen_await_expr(IrBuilder *irb, Scope *scope, AstNode *node) {
+static IrInstruction *ir_gen_await_expr(IrBuilder *irb, Scope *scope, AstNode *node, LVal lval,
+        ResultLoc *result_loc)
+{
     assert(node->type == NodeTypeAwaitExpr);
 
     ZigFn *fn_entry = exec_fn_entry(irb->exec);
@@ -7852,7 +7864,8 @@ static IrInstruction *ir_gen_await_expr(IrBuilder *irb, Scope *scope, AstNode *n
     if (target_inst == irb->codegen->invalid_instruction)
         return irb->codegen->invalid_instruction;
 
-    return ir_build_await(irb, scope, node, target_inst);
+    IrInstruction *await_inst = ir_build_await_src(irb, scope, node, target_inst, result_loc);
+    return ir_lval_wrap(irb, scope, await_inst, lval, result_loc);
 }
 
 static IrInstruction *ir_gen_suspend(IrBuilder *irb, Scope *parent_scope, AstNode *node) {
@@ -8016,7 +8029,7 @@ static IrInstruction *ir_gen_node_raw(IrBuilder *irb, AstNode *node, Scope *scop
         case NodeTypeResume:
             return ir_lval_wrap(irb, scope, ir_gen_resume(irb, scope, node), lval, result_loc);
         case NodeTypeAwaitExpr:
-            return ir_lval_wrap(irb, scope, ir_gen_await_expr(irb, scope, node), lval, result_loc);
+            return ir_gen_await_expr(irb, scope, node, lval, result_loc);
         case NodeTypeSuspend:
             return ir_lval_wrap(irb, scope, ir_gen_suspend(irb, scope, node), lval, result_loc);
         case NodeTypeEnumLiteral:
@@ -8088,8 +8101,10 @@ bool ir_gen(CodeGen *codegen, AstNode *node, Scope *scope, IrExecutable *ir_exec
         return false;
 
     if (!instr_is_unreachable(result)) {
+        ir_mark_gen(ir_build_return_begin(irb, scope, node, result));
         // no need for save_err_ret_addr because this cannot return error
-        ir_gen_async_return(irb, scope, result->source_node, result, true);
+        ir_mark_gen(ir_build_add_implicit_return_type(irb, scope, result->source_node, result));
+        ir_mark_gen(ir_build_return(irb, scope, result->source_node, result));
     }
 
     return true;
@@ -8181,18 +8196,19 @@ static ConstExprValue *ir_exec_const_result(CodeGen *codegen, IrExecutable *exec
         IrInstruction *instruction = bb->instruction_list.at(i);
         if (instruction->id == IrInstructionIdReturn) {
             IrInstructionReturn *ret_inst = (IrInstructionReturn *)instruction;
-            IrInstruction *value = ret_inst->value;
-            if (value->value.special == ConstValSpecialRuntime) {
-                exec_add_error_node(codegen, exec, value->source_node,
+            IrInstruction *operand = ret_inst->operand;
+            if (operand->value.special == ConstValSpecialRuntime) {
+                exec_add_error_node(codegen, exec, operand->source_node,
                         buf_sprintf("unable to evaluate constant expression"));
                 return &codegen->invalid_instruction->value;
             }
-            return &value->value;
+            return &operand->value;
         } else if (ir_has_side_effects(instruction)) {
             if (instr_is_comptime(instruction)) {
                 switch (instruction->id) {
                     case IrInstructionIdUnwrapErrPayload:
                     case IrInstructionIdUnionFieldPtr:
+                    case IrInstructionIdReturnBegin:
                         continue;
                     default:
                         break;
@@ -12593,12 +12609,32 @@ static IrInstruction *ir_analyze_instruction_add_implicit_return_type(IrAnalyze
     return ir_const_void(ira, &instruction->base);
 }
 
+static IrInstruction *ir_analyze_instruction_return_begin(IrAnalyze *ira, IrInstructionReturnBegin *instruction) {
+    IrInstruction *operand = instruction->operand->child;
+    if (type_is_invalid(operand->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *casted_operand = ir_implicit_cast(ira, operand, ira->explicit_return_type);
+    if (type_is_invalid(casted_operand->value.type)) {
+        AstNode *source_node = ira->explicit_return_type_source_node;
+        if (source_node != nullptr) {
+            ErrorMsg *msg = ira->codegen->errors.last();
+            add_error_note(ira->codegen, msg, source_node,
+                buf_sprintf("return type declared here"));
+        }
+        return ir_unreach_error(ira);
+    }
+
+    return ir_build_return_begin(&ira->new_irb, instruction->base.scope, instruction->base.source_node,
+            casted_operand);
+}
+
 static IrInstruction *ir_analyze_instruction_return(IrAnalyze *ira, IrInstructionReturn *instruction) {
-    IrInstruction *value = instruction->value->child;
-    if (type_is_invalid(value->value.type))
+    IrInstruction *operand = instruction->operand->child;
+    if (type_is_invalid(operand->value.type))
         return ir_unreach_error(ira);
 
-    if (!instr_is_comptime(value) && handle_is_ptr(ira->explicit_return_type)) {
+    if (!instr_is_comptime(operand) && handle_is_ptr(ira->explicit_return_type)) {
         // result location mechanism took care of it.
         IrInstruction *result = ir_build_return(&ira->new_irb, instruction->base.scope,
                 instruction->base.source_node, nullptr);
@@ -12606,26 +12642,21 @@ static IrInstruction *ir_analyze_instruction_return(IrAnalyze *ira, IrInstructio
         return ir_finish_anal(ira, result);
     }
 
-    IrInstruction *casted_value = ir_implicit_cast(ira, value, ira->explicit_return_type);
-    if (type_is_invalid(casted_value->value.type)) {
-        AstNode *source_node = ira->explicit_return_type_source_node;
-        if (source_node != nullptr) {
-            ErrorMsg *msg = ira->codegen->errors.last();
-            add_error_note(ira->codegen, msg, source_node,
-                buf_sprintf("return type declared here"));
-        }
+    IrInstruction *casted_operand = ir_implicit_cast(ira, operand, ira->explicit_return_type);
+    if (type_is_invalid(casted_operand->value.type)) {
+        // error already reported by IrInstructionReturnBegin
         return ir_unreach_error(ira);
     }
 
-    if (casted_value->value.special == ConstValSpecialRuntime &&
-        casted_value->value.type->id == ZigTypeIdPointer &&
-        casted_value->value.data.rh_ptr == RuntimeHintPtrStack)
+    if (casted_operand->value.special == ConstValSpecialRuntime &&
+        casted_operand->value.type->id == ZigTypeIdPointer &&
+        casted_operand->value.data.rh_ptr == RuntimeHintPtrStack)
     {
-        ir_add_error(ira, casted_value, buf_sprintf("function returns address of local variable"));
+        ir_add_error(ira, casted_operand, buf_sprintf("function returns address of local variable"));
         return ir_unreach_error(ira);
     }
     IrInstruction *result = ir_build_return(&ira->new_irb, instruction->base.scope,
-            instruction->base.source_node, casted_value);
+            instruction->base.source_node, casted_operand);
     result->value.type = ira->codegen->builtin_types.entry_unreachable;
     return ir_finish_anal(ira, result);
 }
@@ -22176,19 +22207,6 @@ static IrInstruction *ir_analyze_instruction_overflow_op(IrAnalyze *ira, IrInstr
     return result;
 }
 
-static IrInstruction *ir_analyze_instruction_result_ptr(IrAnalyze *ira, IrInstructionResultPtr *instruction) {
-    IrInstruction *result = instruction->result->child;
-    if (type_is_invalid(result->value.type))
-        return result;
-
-    if (instruction->result_loc->written && instruction->result_loc->resolved_loc != nullptr &&
-            !instr_is_comptime(result))
-    {
-        return instruction->result_loc->resolved_loc;
-    }
-    return ir_get_ref(ira, &instruction->base, result, true, false);
-}
-
 static void ir_eval_mul_add(IrAnalyze *ira, IrInstructionMulAdd *source_instr, ZigType *float_type,
     ConstExprValue *op1, ConstExprValue *op2, ConstExprValue *op3, ConstExprValue *out_val) {
     if (float_type->id == ZigTypeIdComptimeFloat) {
@@ -22313,11 +22331,16 @@ static IrInstruction *ir_analyze_instruction_test_err(IrAnalyze *ira, IrInstruct
     if (type_is_invalid(base_ptr->value.type))
         return ira->codegen->invalid_instruction;
 
-    IrInstruction *value = ir_get_deref(ira, &instruction->base, base_ptr, nullptr);
+    IrInstruction *value;
+    if (instruction->base_ptr_is_payload) {
+        value = base_ptr;
+    } else {
+        value = ir_get_deref(ira, &instruction->base, base_ptr, nullptr);
+    }
+
     ZigType *type_entry = value->value.type;
     if (type_is_invalid(type_entry))
         return ira->codegen->invalid_instruction;
-
     if (type_entry->id == ZigTypeIdErrorUnion) {
         if (instr_is_comptime(value)) {
             ConstExprValue *err_union_val = ir_resolve_const(ira, value, UndefBad);
@@ -24443,7 +24466,7 @@ static IrInstruction *ir_analyze_instruction_suspend_finish(IrAnalyze *ira,
     return ir_build_suspend_finish(&ira->new_irb, instruction->base.scope, instruction->base.source_node, begin);
 }
 
-static IrInstruction *ir_analyze_instruction_await(IrAnalyze *ira, IrInstructionAwait *instruction) {
+static IrInstruction *ir_analyze_instruction_await(IrAnalyze *ira, IrInstructionAwaitSrc *instruction) {
     IrInstruction *frame_ptr = instruction->frame->child;
     if (type_is_invalid(frame_ptr->value.type))
         return ira->codegen->invalid_instruction;
@@ -24484,9 +24507,17 @@ static IrInstruction *ir_analyze_instruction_await(IrAnalyze *ira, IrInstruction
         fn_entry->calls_or_awaits_errorable_fn = true;
     }
 
-    IrInstruction *result = ir_build_await(&ira->new_irb,
-            instruction->base.scope, instruction->base.source_node, frame);
-    result->value.type = result_type;
+    IrInstruction *result_loc;
+    if (type_has_bits(result_type)) {
+        result_loc = ir_resolve_result(ira, &instruction->base, instruction->result_loc,
+                result_type, nullptr, true, false, true);
+        if (result_loc != nullptr && (type_is_invalid(result_loc->value.type) || instr_is_unreachable(result_loc)))
+            return result_loc;
+    } else {
+        result_loc = nullptr;
+    }
+
+    IrInstruction *result = ir_build_await_gen(ira, &instruction->base, frame, result_type, result_loc);
     return ir_finish_anal(ira, result);
 }
 
@@ -24541,8 +24572,11 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
         case IrInstructionIdRefGen:
         case IrInstructionIdTestErrGen:
         case IrInstructionIdFrameSizeGen:
+        case IrInstructionIdAwaitGen:
             zig_unreachable();
 
+        case IrInstructionIdReturnBegin:
+            return ir_analyze_instruction_return_begin(ira, (IrInstructionReturnBegin *)instruction);
         case IrInstructionIdReturn:
             return ir_analyze_instruction_return(ira, (IrInstructionReturn *)instruction);
         case IrInstructionIdConst:
@@ -24749,8 +24783,6 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
             return ir_analyze_instruction_resolve_result(ira, (IrInstructionResolveResult *)instruction);
         case IrInstructionIdResetResult:
             return ir_analyze_instruction_reset_result(ira, (IrInstructionResetResult *)instruction);
-        case IrInstructionIdResultPtr:
-            return ir_analyze_instruction_result_ptr(ira, (IrInstructionResultPtr *)instruction);
         case IrInstructionIdOpaqueType:
             return ir_analyze_instruction_opaque_type(ira, (IrInstructionOpaqueType *)instruction);
         case IrInstructionIdSetAlignStack:
@@ -24807,8 +24839,8 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
             return ir_analyze_instruction_suspend_finish(ira, (IrInstructionSuspendFinish *)instruction);
         case IrInstructionIdCoroResume:
             return ir_analyze_instruction_coro_resume(ira, (IrInstructionCoroResume *)instruction);
-        case IrInstructionIdAwait:
-            return ir_analyze_instruction_await(ira, (IrInstructionAwait *)instruction);
+        case IrInstructionIdAwaitSrc:
+            return ir_analyze_instruction_await(ira, (IrInstructionAwaitSrc *)instruction);
     }
     zig_unreachable();
 }
@@ -24898,6 +24930,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdStorePtr:
         case IrInstructionIdCallSrc:
         case IrInstructionIdCallGen:
+        case IrInstructionIdReturnBegin:
         case IrInstructionIdReturn:
         case IrInstructionIdUnreachable:
         case IrInstructionIdSetCold:
@@ -24943,7 +24976,8 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdSuspendBegin:
         case IrInstructionIdSuspendFinish:
         case IrInstructionIdCoroResume:
-        case IrInstructionIdAwait:
+        case IrInstructionIdAwaitSrc:
+        case IrInstructionIdAwaitGen:
             return true;
 
         case IrInstructionIdPhi:
@@ -25041,7 +25075,6 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdHasDecl:
         case IrInstructionIdAllocaSrc:
         case IrInstructionIdAllocaGen:
-        case IrInstructionIdResultPtr:
             return false;
 
         case IrInstructionIdAsm:
src/ir_print.cpp
@@ -64,11 +64,15 @@ static void ir_print_other_block(IrPrint *irp, IrBasicBlock *bb) {
     }
 }
 
-static void ir_print_return(IrPrint *irp, IrInstructionReturn *return_instruction) {
+static void ir_print_return_begin(IrPrint *irp, IrInstructionReturnBegin *instruction) {
+    fprintf(irp->f, "@returnBegin(");
+    ir_print_other_instruction(irp, instruction->operand);
+    fprintf(irp->f, ")");
+}
+
+static void ir_print_return(IrPrint *irp, IrInstructionReturn *instruction) {
     fprintf(irp->f, "return ");
-    if (return_instruction->value != nullptr) {
-        ir_print_other_instruction(irp, return_instruction->value);
-    }
+    ir_print_other_instruction(irp, instruction->operand);
 }
 
 static void ir_print_const(IrPrint *irp, IrInstructionConst *const_instruction) {
@@ -1329,14 +1333,6 @@ static void ir_print_reset_result(IrPrint *irp, IrInstructionResetResult *instru
     fprintf(irp->f, ")");
 }
 
-static void ir_print_result_ptr(IrPrint *irp, IrInstructionResultPtr *instruction) {
-    fprintf(irp->f, "ResultPtr(");
-    ir_print_result_loc(irp, instruction->result_loc);
-    fprintf(irp->f, ",");
-    ir_print_other_instruction(irp, instruction->result);
-    fprintf(irp->f, ")");
-}
-
 static void ir_print_opaque_type(IrPrint *irp, IrInstructionOpaqueType *instruction) {
     fprintf(irp->f, "@OpaqueType()");
 }
@@ -1538,9 +1534,19 @@ static void ir_print_coro_resume(IrPrint *irp, IrInstructionCoroResume *instruct
     fprintf(irp->f, ")");
 }
 
-static void ir_print_await(IrPrint *irp, IrInstructionAwait *instruction) {
+static void ir_print_await_src(IrPrint *irp, IrInstructionAwaitSrc *instruction) {
     fprintf(irp->f, "@await(");
     ir_print_other_instruction(irp, instruction->frame);
+    fprintf(irp->f, ",");
+    ir_print_result_loc(irp, instruction->result_loc);
+    fprintf(irp->f, ")");
+}
+
+static void ir_print_await_gen(IrPrint *irp, IrInstructionAwaitGen *instruction) {
+    fprintf(irp->f, "@await(");
+    ir_print_other_instruction(irp, instruction->frame);
+    fprintf(irp->f, ",");
+    ir_print_other_instruction(irp, instruction->result_loc);
     fprintf(irp->f, ")");
 }
 
@@ -1549,6 +1555,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
     switch (instruction->id) {
         case IrInstructionIdInvalid:
             zig_unreachable();
+        case IrInstructionIdReturnBegin:
+            ir_print_return_begin(irp, (IrInstructionReturnBegin *)instruction);
+            break;
         case IrInstructionIdReturn:
             ir_print_return(irp, (IrInstructionReturn *)instruction);
             break;
@@ -1921,9 +1930,6 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdResetResult:
             ir_print_reset_result(irp, (IrInstructionResetResult *)instruction);
             break;
-        case IrInstructionIdResultPtr:
-            ir_print_result_ptr(irp, (IrInstructionResultPtr *)instruction);
-            break;
         case IrInstructionIdOpaqueType:
             ir_print_opaque_type(irp, (IrInstructionOpaqueType *)instruction);
             break;
@@ -2020,8 +2026,11 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdCoroResume:
             ir_print_coro_resume(irp, (IrInstructionCoroResume *)instruction);
             break;
-        case IrInstructionIdAwait:
-            ir_print_await(irp, (IrInstructionAwait *)instruction);
+        case IrInstructionIdAwaitSrc:
+            ir_print_await_src(irp, (IrInstructionAwaitSrc *)instruction);
+            break;
+        case IrInstructionIdAwaitGen:
+            ir_print_await_gen(irp, (IrInstructionAwaitGen *)instruction);
             break;
     }
     fprintf(irp->f, "\n");
BRANCH_TODO
@@ -33,3 +33,6 @@
    - anyframe, anyframe->T
  * safety for double await
  * call graph analysis to have fewer stack trace frames
+ * grep for "coroutine" and "coro" and replace all that nomenclature with "async functions"
+ * when there are multiple calls to async functions in a function, reuse the same frame buffer, so that the
+   needed bytes is equal to the largest callee's frame