Commit d26905c102

Andrew Kelley <superjoe30@gmail.com>
2018-03-29 06:24:04
error return traces for the early return case
it would work but LLVM is not correctly spilling the addresses. See #821
1 parent 7186e92
src/all_types.hpp
@@ -1656,6 +1656,8 @@ struct CodeGen {
     LLVMValueRef coro_save_fn_val;
     LLVMValueRef coro_promise_fn_val;
     LLVMValueRef coro_alloc_helper_fn_val;
+    LLVMValueRef merge_err_ret_traces_fn_val;
+    LLVMValueRef add_error_return_trace_addr_fn_val;
     bool error_during_imports;
 
     const char **clang_argv;
@@ -2054,6 +2056,7 @@ enum IrInstructionId {
     IrInstructionIdAwaitBookkeeping,
     IrInstructionIdSaveErrRetAddr,
     IrInstructionIdAddImplicitReturnType,
+    IrInstructionIdMergeErrRetTraces,
 };
 
 struct IrInstruction {
@@ -2892,6 +2895,11 @@ struct IrInstructionExport {
 
 struct IrInstructionErrorReturnTrace {
     IrInstruction base;
+
+    enum Nullable {
+        Null,
+        NonNull,
+    } nullable;
 };
 
 struct IrInstructionErrorUnion {
@@ -3024,6 +3032,13 @@ struct IrInstructionAddImplicitReturnType {
     IrInstruction *value;
 };
 
+struct IrInstructionMergeErrRetTraces {
+    IrInstruction base;
+
+    IrInstruction *coro_promise_ptr;
+    TypeStructField *resolved_field;
+};
+
 static const size_t slice_ptr_index = 0;
 static const size_t slice_len_index = 1;
 
@@ -3033,11 +3048,15 @@ static const size_t maybe_null_index = 1;
 static const size_t err_union_err_index = 0;
 static const size_t err_union_payload_index = 1;
 
+// TODO call graph analysis to find out what this number needs to be for every function
+static const size_t stack_trace_ptr_count = 30;
+
 #define ASYNC_ALLOC_FIELD_NAME "allocFn"
 #define ASYNC_FREE_FIELD_NAME "freeFn"
 #define AWAITER_HANDLE_FIELD_NAME "awaiter_handle"
 #define RESULT_FIELD_NAME "result"
 #define RESULT_PTR_FIELD_NAME "result_ptr"
+#define ERR_RET_TRACE_PTR_FIELD_NAME "err_ret_trace_ptr"
 
 
 enum FloatMode {
src/analyze.cpp
@@ -468,10 +468,26 @@ TypeTableEntry *get_promise_frame_type(CodeGen *g, TypeTableEntry *return_type)
 
     TypeTableEntry *awaiter_handle_type = get_maybe_type(g, g->builtin_types.entry_promise);
     TypeTableEntry *result_ptr_type = get_pointer_to_type(g, return_type, false);
-    const char *field_names[] = {AWAITER_HANDLE_FIELD_NAME, RESULT_FIELD_NAME, RESULT_PTR_FIELD_NAME};
-    TypeTableEntry *field_types[] = {awaiter_handle_type, return_type, result_ptr_type};
+
+    ZigList<const char *> field_names = {};
+    field_names.append(AWAITER_HANDLE_FIELD_NAME);
+    field_names.append(RESULT_FIELD_NAME);
+    field_names.append(RESULT_PTR_FIELD_NAME);
+    if (g->have_err_ret_tracing) {
+        field_names.append(ERR_RET_TRACE_PTR_FIELD_NAME);
+    }
+
+    ZigList<TypeTableEntry *> field_types = {};
+    field_types.append(awaiter_handle_type);
+    field_types.append(return_type);
+    field_types.append(result_ptr_type);
+    if (g->have_err_ret_tracing) {
+        field_types.append(get_ptr_to_stack_trace_type(g));
+    }
+
+    assert(field_names.length == field_types.length);
     Buf *name = buf_sprintf("AsyncFramePromise(%s)", buf_ptr(&return_type->name));
-    TypeTableEntry *entry = get_struct_type(g, buf_ptr(name), field_names, field_types, 3);
+    TypeTableEntry *entry = get_struct_type(g, buf_ptr(name), field_names.items, field_types.items, field_names.length);
 
     return_type->promise_frame_parent = entry;
     return entry;
src/codegen.cpp
@@ -1114,22 +1114,19 @@ static LLVMValueRef get_return_address_fn_val(CodeGen *g) {
     return g->return_address_fn_val;
 }
 
-static LLVMValueRef get_return_err_fn(CodeGen *g) {
-    if (g->return_err_fn != nullptr)
-        return g->return_err_fn;
-
-    assert(g->err_tag_type != nullptr);
+static LLVMValueRef get_add_error_return_trace_addr_fn(CodeGen *g) {
+    if (g->add_error_return_trace_addr_fn_val != nullptr)
+        return g->add_error_return_trace_addr_fn_val;
 
     LLVMTypeRef arg_types[] = {
-        // error return trace pointer
         get_ptr_to_stack_trace_type(g)->type_ref,
+        g->builtin_types.entry_usize->type_ref,
     };
-    LLVMTypeRef fn_type_ref = LLVMFunctionType(LLVMVoidType(), arg_types, 1, false);
+    LLVMTypeRef fn_type_ref = LLVMFunctionType(LLVMVoidType(), arg_types, 2, false);
 
-    Buf *fn_name = get_mangled_name(g, buf_create_from_str("__zig_return_error"), false);
+    Buf *fn_name = get_mangled_name(g, buf_create_from_str("__zig_add_err_ret_trace_addr"), false);
     LLVMValueRef fn_val = LLVMAddFunction(g->module, buf_ptr(fn_name), fn_type_ref);
-    addLLVMFnAttr(fn_val, "noinline"); // so that we can look at return address
-    addLLVMFnAttr(fn_val, "cold");
+    addLLVMFnAttr(fn_val, "alwaysinline");
     LLVMSetLinkage(fn_val, LLVMInternalLinkage);
     LLVMSetFunctionCallConv(fn_val, get_llvm_cc(g, CallingConventionUnspecified));
     addLLVMFnAttr(fn_val, "nounwind");
@@ -1151,6 +1148,8 @@ static LLVMValueRef get_return_err_fn(CodeGen *g) {
     // stack_trace.instruction_addresses[stack_trace.index % stack_trace.instruction_addresses.len] = return_address;
 
     LLVMValueRef err_ret_trace_ptr = LLVMGetParam(fn_val, 0);
+    LLVMValueRef address_value = LLVMGetParam(fn_val, 1);
+
     size_t index_field_index = g->stack_trace_type->data.structure.fields[0].gen_index;
     LLVMValueRef index_field_ptr = LLVMBuildStructGEP(g->builder, err_ret_trace_ptr, (unsigned)index_field_index, "");
     size_t addresses_field_index = g->stack_trace_type->data.structure.fields[1].gen_index;
@@ -1172,15 +1171,10 @@ static LLVMValueRef get_return_err_fn(CodeGen *g) {
     LLVMValueRef ptr_value = gen_load_untyped(g, ptr_field_ptr, 0, false, "");
     LLVMValueRef address_slot = LLVMBuildInBoundsGEP(g->builder, ptr_value, address_indices, 1, "");
 
-    LLVMValueRef zero = LLVMConstNull(g->builtin_types.entry_i32->type_ref);
-    LLVMValueRef return_address_ptr = LLVMBuildCall(g->builder, get_return_address_fn_val(g), &zero, 1, "");
-    LLVMValueRef return_address = LLVMBuildPtrToInt(g->builder, return_address_ptr, usize_type_ref, "");
-
-    LLVMValueRef address_value = LLVMBuildPtrToInt(g->builder, return_address, usize_type_ref, "");
     gen_store_untyped(g, address_value, address_slot, 0, false);
 
     // stack_trace.index += 1;
-    LLVMValueRef index_plus_one_val = LLVMBuildAdd(g->builder, index_val, LLVMConstInt(usize_type_ref, 1, false), "");
+    LLVMValueRef index_plus_one_val = LLVMBuildNUWAdd(g->builder, index_val, LLVMConstInt(usize_type_ref, 1, false), "");
     gen_store_untyped(g, index_plus_one_val, index_field_ptr, 0, false);
 
     // return;
@@ -1189,6 +1183,187 @@ static LLVMValueRef get_return_err_fn(CodeGen *g) {
     LLVMPositionBuilderAtEnd(g->builder, prev_block);
     LLVMSetCurrentDebugLocation(g->builder, prev_debug_location);
 
+    g->add_error_return_trace_addr_fn_val = fn_val;
+    return fn_val;
+}
+
+static LLVMValueRef get_merge_err_ret_traces_fn_val(CodeGen *g) {
+    if (g->merge_err_ret_traces_fn_val)
+        return g->merge_err_ret_traces_fn_val;
+
+    assert(g->stack_trace_type != nullptr);
+
+    LLVMTypeRef param_types[] = {
+        get_ptr_to_stack_trace_type(g)->type_ref,
+        get_ptr_to_stack_trace_type(g)->type_ref,
+    };
+    LLVMTypeRef fn_type_ref = LLVMFunctionType(LLVMVoidType(), param_types, 2, false);
+
+    Buf *fn_name = get_mangled_name(g, buf_create_from_str("__zig_merge_error_return_traces"), false);
+    LLVMValueRef fn_val = LLVMAddFunction(g->module, buf_ptr(fn_name), fn_type_ref);
+    LLVMSetLinkage(fn_val, LLVMInternalLinkage);
+    LLVMSetFunctionCallConv(fn_val, get_llvm_cc(g, CallingConventionUnspecified));
+    addLLVMFnAttr(fn_val, "nounwind");
+    add_uwtable_attr(g, fn_val);
+    addLLVMArgAttr(fn_val, (unsigned)0, "nonnull");
+    addLLVMArgAttr(fn_val, (unsigned)0, "noalias");
+    addLLVMArgAttr(fn_val, (unsigned)0, "writeonly");
+    addLLVMArgAttr(fn_val, (unsigned)1, "nonnull");
+    addLLVMArgAttr(fn_val, (unsigned)1, "noalias");
+    addLLVMArgAttr(fn_val, (unsigned)1, "readonly");
+    if (g->build_mode == BuildModeDebug) {
+        ZigLLVMAddFunctionAttr(fn_val, "no-frame-pointer-elim", "true");
+        ZigLLVMAddFunctionAttr(fn_val, "no-frame-pointer-elim-non-leaf", nullptr);
+    }
+
+    // this is above the ZigLLVMClearCurrentDebugLocation
+    LLVMValueRef add_error_return_trace_addr_fn_val = get_add_error_return_trace_addr_fn(g);
+
+    LLVMBasicBlockRef entry_block = LLVMAppendBasicBlock(fn_val, "Entry");
+    LLVMBasicBlockRef prev_block = LLVMGetInsertBlock(g->builder);
+    LLVMValueRef prev_debug_location = LLVMGetCurrentDebugLocation(g->builder);
+    LLVMPositionBuilderAtEnd(g->builder, entry_block);
+    ZigLLVMClearCurrentDebugLocation(g->builder);
+
+    // var frame_index: usize = undefined;
+    // var frames_left: usize = undefined;
+    // if (src_stack_trace.index < src_stack_trace.instruction_addresses.len) {
+    //     frame_index = 0;
+    //     frames_left = src_stack_trace.index;
+    //     if (frames_left == 0) return;
+    // } else {
+    //     frame_index = (src_stack_trace.index + 1) % src_stack_trace.instruction_addresses.len;
+    //     frames_left = src_stack_trace.instruction_addresses.len;
+    // }
+    // while (true) {
+    //     __zig_add_err_ret_trace_addr(dest_stack_trace, src_stack_trace.instruction_addresses[frame_index]);
+    //     frames_left -= 1;
+    //     if (frames_left == 0) return;
+    //     frame_index = (frame_index + 1) % src_stack_trace.instruction_addresses.len;
+    // }
+    LLVMBasicBlockRef return_block = LLVMAppendBasicBlock(fn_val, "Return");
+
+    LLVMValueRef frame_index_ptr = LLVMBuildAlloca(g->builder, g->builtin_types.entry_usize->type_ref, "frame_index");
+    LLVMValueRef frames_left_ptr = LLVMBuildAlloca(g->builder, g->builtin_types.entry_usize->type_ref, "frames_left");
+
+    LLVMValueRef dest_stack_trace_ptr = LLVMGetParam(fn_val, 0);
+    LLVMValueRef src_stack_trace_ptr = LLVMGetParam(fn_val, 1);
+
+    size_t src_index_field_index = g->stack_trace_type->data.structure.fields[0].gen_index;
+    size_t src_addresses_field_index = g->stack_trace_type->data.structure.fields[1].gen_index;
+    LLVMValueRef src_index_field_ptr = LLVMBuildStructGEP(g->builder, src_stack_trace_ptr,
+            (unsigned)src_index_field_index, "");
+    LLVMValueRef src_addresses_field_ptr = LLVMBuildStructGEP(g->builder, src_stack_trace_ptr,
+            (unsigned)src_addresses_field_index, "");
+    TypeTableEntry *slice_type = g->stack_trace_type->data.structure.fields[1].type_entry;
+    size_t ptr_field_index = slice_type->data.structure.fields[slice_ptr_index].gen_index;
+    LLVMValueRef src_ptr_field_ptr = LLVMBuildStructGEP(g->builder, src_addresses_field_ptr, (unsigned)ptr_field_index, "");
+    size_t len_field_index = slice_type->data.structure.fields[slice_len_index].gen_index;
+    LLVMValueRef src_len_field_ptr = LLVMBuildStructGEP(g->builder, src_addresses_field_ptr, (unsigned)len_field_index, "");
+    LLVMValueRef src_index_val = LLVMBuildLoad(g->builder, src_index_field_ptr, "");
+    LLVMValueRef src_ptr_val = LLVMBuildLoad(g->builder, src_ptr_field_ptr, "");
+    LLVMValueRef src_len_val = LLVMBuildLoad(g->builder, src_len_field_ptr, "");
+    LLVMValueRef no_wrap_bit = LLVMBuildICmp(g->builder, LLVMIntULT, src_index_val, src_len_val, "");
+    LLVMBasicBlockRef no_wrap_block = LLVMAppendBasicBlock(fn_val, "NoWrap");
+    LLVMBasicBlockRef yes_wrap_block = LLVMAppendBasicBlock(fn_val, "YesWrap");
+    LLVMBasicBlockRef loop_block = LLVMAppendBasicBlock(fn_val, "Loop");
+    LLVMBuildCondBr(g->builder, no_wrap_bit, no_wrap_block, yes_wrap_block);
+
+    LLVMPositionBuilderAtEnd(g->builder, no_wrap_block);
+    LLVMValueRef usize_zero = LLVMConstNull(g->builtin_types.entry_usize->type_ref);
+    LLVMBuildStore(g->builder, usize_zero, frame_index_ptr);
+    LLVMBuildStore(g->builder, src_index_val, frames_left_ptr);
+    LLVMValueRef frames_left_eq_zero_bit = LLVMBuildICmp(g->builder, LLVMIntEQ, src_index_val, usize_zero, "");
+    LLVMBuildCondBr(g->builder, frames_left_eq_zero_bit, return_block, loop_block);
+
+    LLVMPositionBuilderAtEnd(g->builder, yes_wrap_block);
+    LLVMValueRef usize_one = LLVMConstInt(g->builtin_types.entry_usize->type_ref, 1, false);
+    LLVMValueRef plus_one = LLVMBuildNUWAdd(g->builder, src_index_val, usize_one, "");
+    LLVMValueRef mod_len = LLVMBuildURem(g->builder, plus_one, src_len_val, "");
+    LLVMBuildStore(g->builder, mod_len, frame_index_ptr);
+    LLVMBuildStore(g->builder, src_len_val, frames_left_ptr);
+    LLVMBuildBr(g->builder, loop_block);
+
+    LLVMPositionBuilderAtEnd(g->builder, loop_block);
+    LLVMValueRef ptr_index = LLVMBuildLoad(g->builder, frame_index_ptr, "");
+    LLVMValueRef addr_ptr = LLVMBuildInBoundsGEP(g->builder, src_ptr_val, &ptr_index, 1, "");
+    LLVMValueRef this_addr_val = LLVMBuildLoad(g->builder, addr_ptr, "");
+    LLVMValueRef args[] = {dest_stack_trace_ptr, this_addr_val};
+    LLVMBuildCall(g->builder, add_error_return_trace_addr_fn_val, args, 2, "");
+    LLVMValueRef prev_frames_left = LLVMBuildLoad(g->builder, frames_left_ptr, "");
+    LLVMValueRef new_frames_left = LLVMBuildNUWSub(g->builder, prev_frames_left, usize_one, "");
+    LLVMValueRef done_bit = LLVMBuildICmp(g->builder, LLVMIntEQ, new_frames_left, usize_zero, "");
+    LLVMBasicBlockRef continue_block = LLVMAppendBasicBlock(fn_val, "Continue");
+    LLVMBuildCondBr(g->builder, done_bit, return_block, continue_block);
+
+    LLVMPositionBuilderAtEnd(g->builder, return_block);
+    LLVMBuildRetVoid(g->builder);
+
+    LLVMPositionBuilderAtEnd(g->builder, continue_block);
+    LLVMBuildStore(g->builder, new_frames_left, frames_left_ptr);
+    LLVMValueRef prev_index = LLVMBuildLoad(g->builder, frame_index_ptr, "");
+    LLVMValueRef index_plus_one = LLVMBuildNUWAdd(g->builder, prev_index, usize_one, "");
+    LLVMValueRef index_mod_len = LLVMBuildURem(g->builder, index_plus_one, src_len_val, "");
+    LLVMBuildStore(g->builder, index_mod_len, frame_index_ptr);
+    LLVMBuildBr(g->builder, loop_block);
+
+    LLVMPositionBuilderAtEnd(g->builder, prev_block);
+    LLVMSetCurrentDebugLocation(g->builder, prev_debug_location);
+
+    g->merge_err_ret_traces_fn_val = fn_val;
+    return fn_val;
+
+}
+
+static LLVMValueRef get_return_err_fn(CodeGen *g) {
+    if (g->return_err_fn != nullptr)
+        return g->return_err_fn;
+
+    assert(g->err_tag_type != nullptr);
+
+    LLVMTypeRef arg_types[] = {
+        // error return trace pointer
+        get_ptr_to_stack_trace_type(g)->type_ref,
+    };
+    LLVMTypeRef fn_type_ref = LLVMFunctionType(LLVMVoidType(), arg_types, 1, false);
+
+    Buf *fn_name = get_mangled_name(g, buf_create_from_str("__zig_return_error"), false);
+    LLVMValueRef fn_val = LLVMAddFunction(g->module, buf_ptr(fn_name), fn_type_ref);
+    addLLVMFnAttr(fn_val, "noinline"); // so that we can look at return address
+    addLLVMFnAttr(fn_val, "cold");
+    LLVMSetLinkage(fn_val, LLVMInternalLinkage);
+    LLVMSetFunctionCallConv(fn_val, get_llvm_cc(g, CallingConventionUnspecified));
+    addLLVMFnAttr(fn_val, "nounwind");
+    add_uwtable_attr(g, fn_val);
+    addLLVMArgAttr(fn_val, (unsigned)0, "nonnull");
+    if (g->build_mode == BuildModeDebug) {
+        ZigLLVMAddFunctionAttr(fn_val, "no-frame-pointer-elim", "true");
+        ZigLLVMAddFunctionAttr(fn_val, "no-frame-pointer-elim-non-leaf", nullptr);
+    }
+
+    // this is above the ZigLLVMClearCurrentDebugLocation
+    LLVMValueRef add_error_return_trace_addr_fn_val = get_add_error_return_trace_addr_fn(g);
+
+    LLVMBasicBlockRef entry_block = LLVMAppendBasicBlock(fn_val, "Entry");
+    LLVMBasicBlockRef prev_block = LLVMGetInsertBlock(g->builder);
+    LLVMValueRef prev_debug_location = LLVMGetCurrentDebugLocation(g->builder);
+    LLVMPositionBuilderAtEnd(g->builder, entry_block);
+    ZigLLVMClearCurrentDebugLocation(g->builder);
+
+    LLVMValueRef err_ret_trace_ptr = LLVMGetParam(fn_val, 0);
+
+    LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->type_ref;
+    LLVMValueRef zero = LLVMConstNull(g->builtin_types.entry_i32->type_ref);
+    LLVMValueRef return_address_ptr = LLVMBuildCall(g->builder, get_return_address_fn_val(g), &zero, 1, "");
+    LLVMValueRef return_address = LLVMBuildPtrToInt(g->builder, return_address_ptr, usize_type_ref, "");
+
+    LLVMValueRef args[] = { err_ret_trace_ptr, return_address };
+    LLVMBuildCall(g->builder, add_error_return_trace_addr_fn_val, args, 2, "");
+    LLVMBuildRetVoid(g->builder);
+
+    LLVMPositionBuilderAtEnd(g->builder, prev_block);
+    LLVMSetCurrentDebugLocation(g->builder, prev_debug_location);
+
     g->return_err_fn = fn_val;
     return fn_val;
 }
@@ -1641,7 +1816,6 @@ static LLVMValueRef ir_render_save_err_ret_addr(CodeGen *g, IrExecutable *execut
     };
     LLVMValueRef call_instruction = ZigLLVMBuildCall(g->builder, return_err_fn, args, 1,
             get_llvm_cc(g, CallingConventionUnspecified), ZigLLVM_FnInlineAuto, "");
-    LLVMSetTailCall(call_instruction, true);
     return call_instruction;
 }
 
@@ -4204,6 +4378,22 @@ static LLVMValueRef ir_render_atomic_rmw(CodeGen *g, IrExecutable *executable,
     return LLVMBuildIntToPtr(g->builder, uncasted_result, operand_type->type_ref, "");
 }
 
+static LLVMValueRef ir_render_merge_err_ret_traces(CodeGen *g, IrExecutable *executable,
+        IrInstructionMergeErrRetTraces *instruction)
+{
+    assert(g->have_err_ret_tracing);
+
+    LLVMValueRef coro_promise_ptr = ir_llvm_value(g, instruction->coro_promise_ptr);
+    TypeStructField *field = instruction->resolved_field;
+    LLVMValueRef ptr_field_ptr = LLVMBuildStructGEP(g->builder, coro_promise_ptr, field->gen_index, "");
+    LLVMValueRef src_trace_ptr = LLVMBuildLoad(g->builder, ptr_field_ptr, "");
+    LLVMValueRef dest_trace_ptr = get_cur_err_ret_trace_val(g, instruction->base.scope);
+
+    LLVMValueRef args[] = { dest_trace_ptr, src_trace_ptr };
+    LLVMBuildCall(g->builder, get_merge_err_ret_traces_fn_val(g), args, 2, "");
+    return nullptr;
+}
+
 static void set_debug_location(CodeGen *g, IrInstruction *instruction) {
     AstNode *source_node = instruction->source_node;
     Scope *scope = instruction->scope;
@@ -4421,6 +4611,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_atomic_rmw(g, executable, (IrInstructionAtomicRmw *)instruction);
         case IrInstructionIdSaveErrRetAddr:
             return ir_render_save_err_ret_addr(g, executable, (IrInstructionSaveErrRetAddr *)instruction);
+        case IrInstructionIdMergeErrRetTraces:
+            return ir_render_merge_err_ret_traces(g, executable, (IrInstructionMergeErrRetTraces *)instruction);
     }
     zig_unreachable();
 }
@@ -5313,12 +5505,12 @@ static void do_code_gen(CodeGen *g) {
         bool is_async = fn_table_entry->type_entry->data.fn.fn_type_id.cc == CallingConventionAsync;
         bool have_err_ret_trace_stack = g->have_err_ret_tracing && fn_table_entry->calls_or_awaits_errorable_fn &&
             (is_async || !have_err_ret_trace_arg);
-        if (have_err_ret_trace_stack) {
-            // TODO call graph analysis to find out what this number needs to be for every function
-            static const size_t stack_trace_ptr_count = 30;
-
+        bool have_exactly_one_err_ret_value = !have_err_ret_trace_stack && g->have_err_ret_tracing && is_async &&
+            type_can_fail(fn_table_entry->type_entry->data.fn.fn_type_id.return_type);
+        if (have_err_ret_trace_stack || have_exactly_one_err_ret_value) {
             TypeTableEntry *usize = g->builtin_types.entry_usize;
-            TypeTableEntry *array_type = get_array_type(g, usize, stack_trace_ptr_count);
+            uint32_t ret_addr_count = have_exactly_one_err_ret_value ? 1 : stack_trace_ptr_count;
+            TypeTableEntry *array_type = get_array_type(g, usize, ret_addr_count);
             LLVMValueRef err_ret_array_val = build_alloca(g, array_type, "error_return_trace_addresses",
                     get_abi_alignment(g, array_type));
             g->cur_err_ret_trace_val_stack = build_alloca(g, g->stack_trace_type, "error_return_trace", get_abi_alignment(g, g->stack_trace_type));
@@ -5341,7 +5533,7 @@ static void do_code_gen(CodeGen *g) {
 
             size_t len_field_index = slice_type->data.structure.fields[slice_len_index].gen_index;
             LLVMValueRef len_field_ptr = LLVMBuildStructGEP(g->builder, addresses_field_ptr, (unsigned)len_field_index, "");
-            gen_store(g, LLVMConstInt(usize->type_ref, stack_trace_ptr_count, false), len_field_ptr, get_pointer_to_type(g, usize, false));
+            gen_store(g, LLVMConstInt(usize->type_ref, ret_addr_count, false), len_field_ptr, get_pointer_to_type(g, usize, false));
         } else {
             g->cur_err_ret_trace_val_stack = nullptr;
         }
@@ -5943,6 +6135,8 @@ static void define_builtin_compile_vars(CodeGen *g) {
     os_path_join(g->cache_dir, buf_create_from_str(builtin_zig_basename), builtin_zig_path);
     Buf *contents = buf_alloc();
 
+    // Modifications to this struct must be coordinated with code that does anything with
+    // g->stack_trace_type. There are hard-coded references to the field indexes.
     buf_append_str(contents,
         "pub const StackTrace = struct {\n"
         "    index: usize,\n"
src/ir.cpp
@@ -725,6 +725,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionAddImplicitRetur
     return IrInstructionIdAddImplicitReturnType;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionMergeErrRetTraces *) {
+    return IrInstructionIdMergeErrRetTraces;
+}
+
 template<typename T>
 static T *ir_create_instruction(IrBuilder *irb, Scope *scope, AstNode *source_node) {
     T *special_instruction = allocate<T>(1);
@@ -972,6 +976,12 @@ static IrInstruction *ir_build_const_promise_init(IrBuilder *irb, Scope *scope,
     const_instruction->base.value.data.x_struct.fields[1].special = ConstValSpecialUndef;
     const_instruction->base.value.data.x_struct.fields[2].type = struct_type->data.structure.fields[2].type_entry;
     const_instruction->base.value.data.x_struct.fields[2].special = ConstValSpecialUndef;
+    if (irb->codegen->have_err_ret_tracing) {
+        assert(struct_type->data.structure.src_field_count == 4);
+
+        const_instruction->base.value.data.x_struct.fields[3].type = struct_type->data.structure.fields[3].type_entry;
+        const_instruction->base.value.data.x_struct.fields[3].special = ConstValSpecialUndef;
+    }
     return &const_instruction->base;
 }
 
@@ -2495,8 +2505,9 @@ static IrInstruction *ir_build_arg_type(IrBuilder *irb, Scope *scope, AstNode *s
     return &instruction->base;
 }
 
-static IrInstruction *ir_build_error_return_trace(IrBuilder *irb, Scope *scope, AstNode *source_node) {
+static IrInstruction *ir_build_error_return_trace(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstructionErrorReturnTrace::Nullable nullable) {
     IrInstructionErrorReturnTrace *instruction = ir_build_instruction<IrInstructionErrorReturnTrace>(irb, scope, source_node);
+    instruction->nullable = nullable;
 
     return &instruction->base;
 }
@@ -2717,6 +2728,18 @@ static IrInstruction *ir_build_add_implicit_return_type(IrBuilder *irb, Scope *s
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_merge_err_ret_traces(IrBuilder *irb, Scope *scope, AstNode *source_node,
+        IrInstruction *coro_promise_ptr, TypeStructField *resolved_field)
+{
+    IrInstructionMergeErrRetTraces *instruction = ir_build_instruction<IrInstructionMergeErrRetTraces>(irb, scope, source_node);
+    instruction->coro_promise_ptr = coro_promise_ptr;
+    instruction->resolved_field = resolved_field;
+
+    ir_ref_instruction(coro_promise_ptr, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static void ir_count_defers(IrBuilder *irb, Scope *inner_scope, Scope *outer_scope, size_t *results) {
     results[ReturnKindUnconditional] = 0;
     results[ReturnKindError] = 0;
@@ -2822,34 +2845,6 @@ static IrInstruction *ir_gen_async_return(IrBuilder *irb, Scope *scope, AstNode
     // the above blocks are rendered by ir_gen after the rest of codegen
 }
 
-static bool exec_have_err_ret_trace(CodeGen *g, IrExecutable *exec) {
-    if (!g->have_err_ret_tracing)
-        return false;
-    FnTableEntry *fn_entry = exec_fn_entry(exec);
-    if (fn_entry == nullptr)
-        return false;
-    if (exec->is_inline)
-        return false;
-    return type_can_fail(fn_entry->type_entry->data.fn.fn_type_id.return_type);
-}
-
-static void ir_gen_save_err_ret_addr(IrBuilder *irb, Scope *scope, AstNode *node) {
-    if (!exec_have_err_ret_trace(irb->codegen, irb->exec))
-        return;
-
-    bool is_async = exec_is_async(irb->exec);
-
-    if (is_async) {
-        //IrInstruction *err_ret_addr_ptr = ir_build_load_ptr(irb, scope, node, irb->exec->coro_err_ret_addr_ptr);
-        //IrInstruction *return_address_ptr = ir_build_instr_addr(irb, scope, node);
-        //IrInstruction *return_address_usize = ir_build_ptr_to_int(irb, scope, node, return_address_ptr);
-        //ir_build_store_ptr(irb, scope, node, err_ret_addr_ptr, return_address_usize);
-        return;
-    }
-
-    ir_build_save_err_ret_addr(irb, scope, node);
-}
-
 static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node, LVal lval) {
     assert(node->type == NodeTypeReturnExpr);
 
@@ -2895,8 +2890,9 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
 
                     IrInstruction *is_err = ir_build_test_err(irb, scope, node, return_value);
 
+                    bool should_inline = ir_should_inline(irb->exec, scope);
                     IrInstruction *is_comptime;
-                    if (ir_should_inline(irb->exec, scope)) {
+                    if (should_inline) {
                         is_comptime = ir_build_const_bool(irb, scope, node, true);
                     } else {
                         is_comptime = ir_build_test_comptime(irb, scope, node, is_err);
@@ -2909,7 +2905,9 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                     if (have_err_defers) {
                         ir_gen_defers_for_block(irb, scope, outer_scope, true);
                     }
-                    ir_gen_save_err_ret_addr(irb, scope, node);
+                    if (irb->codegen->have_err_ret_tracing && !should_inline) {
+                        ir_build_save_err_ret_addr(irb, scope, node);
+                    }
                     ir_build_br(irb, scope, node, ret_stmt_block, is_comptime);
 
                     ir_set_cursor_at_end_and_append_block(irb, ok_block);
@@ -2938,7 +2936,8 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                 IrBasicBlock *return_block = ir_create_basic_block(irb, scope, "ErrRetReturn");
                 IrBasicBlock *continue_block = ir_create_basic_block(irb, scope, "ErrRetContinue");
                 IrInstruction *is_comptime;
-                if (ir_should_inline(irb->exec, scope)) {
+                bool should_inline = ir_should_inline(irb->exec, scope);
+                if (should_inline) {
                     is_comptime = ir_build_const_bool(irb, scope, node, true);
                 } else {
                     is_comptime = ir_build_test_comptime(irb, scope, node, is_err_val);
@@ -2948,7 +2947,9 @@ static IrInstruction *ir_gen_return(IrBuilder *irb, Scope *scope, AstNode *node,
                 ir_set_cursor_at_end_and_append_block(irb, return_block);
                 ir_gen_defers_for_block(irb, scope, outer_scope, true);
                 IrInstruction *err_val = ir_build_unwrap_err_code(irb, scope, node, err_union_ptr);
-                ir_gen_save_err_ret_addr(irb, scope, node);
+                if (irb->codegen->have_err_ret_tracing && !should_inline) {
+                    ir_build_save_err_ret_addr(irb, scope, node);
+                }
                 ir_gen_async_return(irb, scope, node, err_val, false);
 
                 ir_set_cursor_at_end_and_append_block(irb, continue_block);
@@ -4242,7 +4243,7 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
             }
         case BuiltinFnIdErrorReturnTrace:
             {
-                return ir_build_error_return_trace(irb, scope, node);
+                return ir_build_error_return_trace(irb, scope, node, IrInstructionErrorReturnTrace::Null);
             }
         case BuiltinFnIdAtomicRmw:
             {
@@ -6148,10 +6149,13 @@ static IrInstruction *ir_gen_await_expr(IrBuilder *irb, Scope *parent_scope, Ast
     IrInstruction *is_non_null = ir_build_test_nonnull(irb, parent_scope, node, maybe_await_handle);
     IrBasicBlock *yes_suspend_block = ir_create_basic_block(irb, parent_scope, "YesSuspend");
     IrBasicBlock *no_suspend_block = ir_create_basic_block(irb, parent_scope, "NoSuspend");
-    IrBasicBlock *merge_block = ir_create_basic_block(irb, parent_scope, "Merge");
+    IrBasicBlock *merge_block = ir_create_basic_block(irb, parent_scope, "MergeSuspend");
     ir_build_cond_br(irb, parent_scope, node, is_non_null, no_suspend_block, yes_suspend_block, const_bool_false);
 
     ir_set_cursor_at_end_and_append_block(irb, no_suspend_block);
+    if (irb->codegen->have_err_ret_tracing) {
+        ir_build_merge_err_ret_traces(irb, parent_scope, node, coro_promise_ptr, nullptr);
+    }
     Buf *result_field_name = buf_create_from_str(RESULT_FIELD_NAME);
     IrInstruction *promise_result_ptr = ir_build_field_ptr(irb, parent_scope, node, coro_promise_ptr, result_field_name);
     IrInstruction *no_suspend_result = ir_build_load_ptr(irb, parent_scope, node, promise_result_ptr);
@@ -6460,13 +6464,19 @@ bool ir_gen(CodeGen *codegen, AstNode *node, Scope *scope, IrExecutable *ir_exec
         irb->exec->coro_handle = ir_build_coro_begin(irb, coro_scope, node, coro_id, coro_mem_ptr);
 
         Buf *awaiter_handle_field_name = buf_create_from_str(AWAITER_HANDLE_FIELD_NAME);
-        irb->exec->coro_awaiter_field_ptr = ir_build_field_ptr(irb, coro_scope, node, coro_promise_ptr,
+        irb->exec->coro_awaiter_field_ptr = ir_build_field_ptr(irb, scope, node, coro_promise_ptr,
                 awaiter_handle_field_name);
         Buf *result_field_name = buf_create_from_str(RESULT_FIELD_NAME);
-        irb->exec->coro_result_field_ptr = ir_build_field_ptr(irb, coro_scope, node, coro_promise_ptr, result_field_name);
+        irb->exec->coro_result_field_ptr = ir_build_field_ptr(irb, scope, node, coro_promise_ptr, result_field_name);
         result_ptr_field_name = buf_create_from_str(RESULT_PTR_FIELD_NAME);
-        irb->exec->coro_result_ptr_field_ptr = ir_build_field_ptr(irb, coro_scope, node, coro_promise_ptr, result_ptr_field_name);
-        ir_build_store_ptr(irb, coro_scope, node, irb->exec->coro_result_ptr_field_ptr, irb->exec->coro_result_field_ptr);
+        irb->exec->coro_result_ptr_field_ptr = ir_build_field_ptr(irb, scope, node, coro_promise_ptr, result_ptr_field_name);
+        ir_build_store_ptr(irb, scope, node, irb->exec->coro_result_ptr_field_ptr, irb->exec->coro_result_field_ptr);
+        if (irb->codegen->have_err_ret_tracing) {
+            IrInstruction *err_ret_trace_ptr = ir_build_error_return_trace(irb, scope, node, IrInstructionErrorReturnTrace::NonNull);
+            Buf *err_ret_trace_ptr_field_name = buf_create_from_str(ERR_RET_TRACE_PTR_FIELD_NAME);
+            IrInstruction *coro_err_ret_trace_ptr_field_ptr = ir_build_field_ptr(irb, scope, node, coro_promise_ptr, err_ret_trace_ptr_field_name);
+            ir_build_store_ptr(irb, scope, node, coro_err_ret_trace_ptr_field_ptr, err_ret_trace_ptr);
+        }
 
 
         irb->exec->coro_early_final = ir_create_basic_block(irb, scope, "CoroEarlyFinal");
@@ -11579,18 +11589,25 @@ static bool exec_has_err_ret_trace(CodeGen *g, IrExecutable *exec) {
 static TypeTableEntry *ir_analyze_instruction_error_return_trace(IrAnalyze *ira,
         IrInstructionErrorReturnTrace *instruction)
 {
-    TypeTableEntry *ptr_to_stack_trace_type = get_ptr_to_stack_trace_type(ira->codegen);
-    TypeTableEntry *nullable_type = get_maybe_type(ira->codegen, ptr_to_stack_trace_type);
-    if (!exec_has_err_ret_trace(ira->codegen, ira->new_irb.exec)) {
-        ConstExprValue *out_val = ir_build_const_from(ira, &instruction->base);
-        out_val->data.x_maybe = nullptr;
+    if (instruction->nullable == IrInstructionErrorReturnTrace::Null) {
+        TypeTableEntry *ptr_to_stack_trace_type = get_ptr_to_stack_trace_type(ira->codegen);
+        TypeTableEntry *nullable_type = get_maybe_type(ira->codegen, ptr_to_stack_trace_type);
+        if (!exec_has_err_ret_trace(ira->codegen, ira->new_irb.exec)) {
+            ConstExprValue *out_val = ir_build_const_from(ira, &instruction->base);
+            out_val->data.x_maybe = nullptr;
+            return nullable_type;
+        }
+        IrInstruction *new_instruction = ir_build_error_return_trace(&ira->new_irb, instruction->base.scope,
+                instruction->base.source_node, instruction->nullable);
+        ir_link_new_instruction(new_instruction, &instruction->base);
         return nullable_type;
+    } else {
+        assert(ira->codegen->have_err_ret_tracing);
+        IrInstruction *new_instruction = ir_build_error_return_trace(&ira->new_irb, instruction->base.scope,
+                instruction->base.source_node, instruction->nullable);
+        ir_link_new_instruction(new_instruction, &instruction->base);
+        return get_ptr_to_stack_trace_type(ira->codegen);
     }
-
-    IrInstruction *new_instruction = ir_build_error_return_trace(&ira->new_irb, instruction->base.scope,
-            instruction->base.source_node);
-    ir_link_new_instruction(new_instruction, &instruction->base);
-    return nullable_type;
 }
 
 static TypeTableEntry *ir_analyze_instruction_error_union(IrAnalyze *ira,
@@ -17904,6 +17921,34 @@ static TypeTableEntry *ir_analyze_instruction_await_bookkeeping(IrAnalyze *ira,
     return out_val->type;
 }
 
+static TypeTableEntry *ir_analyze_instruction_merge_err_ret_traces(IrAnalyze *ira,
+        IrInstructionMergeErrRetTraces *instruction)
+{
+    IrInstruction *coro_promise_ptr = instruction->coro_promise_ptr->other;
+    if (type_is_invalid(coro_promise_ptr->value.type))
+        return ira->codegen->builtin_types.entry_invalid;
+
+    assert(coro_promise_ptr->value.type->id == TypeTableEntryIdPointer);
+    TypeTableEntry *promise_frame_type = coro_promise_ptr->value.type->data.pointer.child_type;
+    assert(promise_frame_type->id == TypeTableEntryIdStruct);
+    TypeTableEntry *promise_result_type = promise_frame_type->data.structure.fields[1].type_entry;
+
+    if (!type_can_fail(promise_result_type)) {
+        ConstExprValue *out_val = ir_build_const_from(ira, &instruction->base);
+        out_val->type = ira->codegen->builtin_types.entry_void;
+        return out_val->type;
+    }
+
+    TypeStructField *field = find_struct_type_field(promise_frame_type, buf_create_from_str(ERR_RET_TRACE_PTR_FIELD_NAME));
+    assert(field != nullptr);
+
+    IrInstruction *result = ir_build_merge_err_ret_traces(&ira->new_irb, instruction->base.scope,
+            instruction->base.source_node, coro_promise_ptr, field);
+    ir_link_new_instruction(result, &instruction->base);
+    result->value.type = ira->codegen->builtin_types.entry_void;
+    return result->value.type;
+}
+
 static TypeTableEntry *ir_analyze_instruction_save_err_ret_addr(IrAnalyze *ira, IrInstructionSaveErrRetAddr *instruction) {
     IrInstruction *result = ir_build_save_err_ret_addr(&ira->new_irb, instruction->base.scope,
             instruction->base.source_node);
@@ -18155,6 +18200,8 @@ static TypeTableEntry *ir_analyze_instruction_nocast(IrAnalyze *ira, IrInstructi
             return ir_analyze_instruction_save_err_ret_addr(ira, (IrInstructionSaveErrRetAddr *)instruction);
         case IrInstructionIdAddImplicitReturnType:
             return ir_analyze_instruction_add_implicit_return_type(ira, (IrInstructionAddImplicitReturnType *)instruction);
+        case IrInstructionIdMergeErrRetTraces:
+            return ir_analyze_instruction_merge_err_ret_traces(ira, (IrInstructionMergeErrRetTraces *)instruction);
     }
     zig_unreachable();
 }
@@ -18282,6 +18329,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdAwaitBookkeeping:
         case IrInstructionIdSaveErrRetAddr:
         case IrInstructionIdAddImplicitReturnType:
+        case IrInstructionIdMergeErrRetTraces:
             return true;
 
         case IrInstructionIdPhi:
src/ir_print.cpp
@@ -1024,7 +1024,16 @@ static void ir_print_export(IrPrint *irp, IrInstructionExport *instruction) {
 }
 
 static void ir_print_error_return_trace(IrPrint *irp, IrInstructionErrorReturnTrace *instruction) {
-    fprintf(irp->f, "@errorReturnTrace()");
+    fprintf(irp->f, "@errorReturnTrace(");
+    switch (instruction->nullable) {
+        case IrInstructionErrorReturnTrace::Null:
+            fprintf(irp->f, "Null");
+            break;
+        case IrInstructionErrorReturnTrace::NonNull:
+            fprintf(irp->f, "NonNull");
+            break;
+    }
+    fprintf(irp->f, ")");
 }
 
 static void ir_print_error_union(IrPrint *irp, IrInstructionErrorUnion *instruction) {
@@ -1179,6 +1188,16 @@ static void ir_print_add_implicit_return_type(IrPrint *irp, IrInstructionAddImpl
     fprintf(irp->f, ")");
 }
 
+static void ir_print_merge_err_ret_traces(IrPrint *irp, IrInstructionMergeErrRetTraces *instruction) {
+    fprintf(irp->f, "@mergeErrRetTraces(");
+    ir_print_other_instruction(irp, instruction->coro_promise_ptr);
+    fprintf(irp->f, ",");
+    if (instruction->resolved_field != nullptr) {
+        fprintf(irp->f, "field '%s'", buf_ptr(instruction->resolved_field->name));
+    }
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
     ir_print_prefix(irp, instruction);
     switch (instruction->id) {
@@ -1559,6 +1578,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdAddImplicitReturnType:
             ir_print_add_implicit_return_type(irp, (IrInstructionAddImplicitReturnType *)instruction);
             break;
+        case IrInstructionIdMergeErrRetTraces:
+            ir_print_merge_err_ret_traces(irp, (IrInstructionMergeErrRetTraces *)instruction);
+            break;
     }
     fprintf(irp->f, "\n");
 }