Commit c9b2210fcf

Andrew Kelley <andrew@ziglang.org>
2019-09-11 04:59:00
async function calls re-use frame buffers
See #3069
1 parent 7101e58
Changed files (2)
src/analyze.cpp
@@ -2538,6 +2538,8 @@ static Error resolve_enum_zero_bits(CodeGen *g, ZigType *enum_type) {
     enum_type->data.enumeration.resolve_loop_flag = false;
     enum_type->data.enumeration.resolve_status = ResolveStatusSizeKnown;
 
+    occupied_tag_values.deinit();
+
     return ErrorNone;
 }
 
@@ -5878,6 +5880,11 @@ static Error resolve_async_frame(CodeGen *g, ZigType *frame_type) {
         fn->err_code_spill = &alloca_gen->base;
     }
 
+    ZigType *largest_call_frame_type = nullptr;
+    // Later we'll change this to be largest_call_frame_type instead of void.
+    IrInstruction *all_calls_alloca = ir_create_alloca(g, &fn->fndef_scope->base, fn->body_node,
+            fn, g->builtin_types.entry_void, "@async_call_frame");
+
     for (size_t i = 0; i < fn->call_list.length; i += 1) {
         IrInstructionCallGen *call = fn->call_list.at(i);
         if (call->new_stack != nullptr) {
@@ -5921,9 +5928,21 @@ static Error resolve_async_frame(CodeGen *g, ZigType *frame_type) {
 
         mark_suspension_point(call->base.scope);
 
-        call->frame_result_loc = ir_create_alloca(g, call->base.scope, call->base.source_node, fn,
-                callee_frame_type, "");
+        if ((err = type_resolve(g, callee_frame_type, ResolveStatusSizeKnown))) {
+            return err;
+        }
+        if (largest_call_frame_type == nullptr ||
+            callee_frame_type->abi_size > largest_call_frame_type->abi_size)
+        {
+            largest_call_frame_type = callee_frame_type;
+        }
+
+        call->frame_result_loc = all_calls_alloca;
     }
+    if (largest_call_frame_type != nullptr) {
+        all_calls_alloca->value.type = get_pointer_to_type(g, largest_call_frame_type, false);
+    }
+
     // Since this frame is async, an await might represent a suspend point, and
     // therefore need to spill. It also needs to mark expr scopes as having to spill.
     // For example: foo() + await z
src/codegen.cpp
@@ -3863,6 +3863,7 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
     ZigList<ZigType *> gen_param_types = {};
     LLVMValueRef result_loc = instruction->result_loc ? ir_llvm_value(g, instruction->result_loc) : nullptr;
     LLVMValueRef zero = LLVMConstNull(usize_type_ref);
+    LLVMValueRef frame_result_loc_uncasted = nullptr;
     LLVMValueRef frame_result_loc;
     LLVMValueRef awaiter_init_val;
     LLVMValueRef ret_ptr;
@@ -3871,7 +3872,10 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
             if (instruction->modifier == CallModifierAsync) {
                 frame_result_loc = result_loc;
             } else {
-                frame_result_loc = ir_llvm_value(g, instruction->frame_result_loc);
+                frame_result_loc_uncasted = ir_llvm_value(g, instruction->frame_result_loc);
+                src_assert(instruction->fn_entry != nullptr, instruction->base.source_node);
+                frame_result_loc = LLVMBuildBitCast(g->builder, frame_result_loc_uncasted,
+                        LLVMPointerType(get_llvm_type(g, instruction->fn_entry->frame_type), 0), "");
             }
         } else {
             if (instruction->new_stack->value.type->id == ZigTypeIdPointer &&
@@ -4138,6 +4142,13 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
                 }
             }
 
+            if (frame_result_loc_uncasted != nullptr && instruction->fn_entry != nullptr) {
+                // Instead of a spill, we do the bitcast again. The uncasted LLVM IR instruction will
+                // be an Alloca from the entry block, so it does not need to be spilled.
+                frame_result_loc = LLVMBuildBitCast(g->builder, frame_result_loc_uncasted,
+                        LLVMPointerType(get_llvm_type(g, instruction->fn_entry->frame_type), 0), "");
+            }
+
             LLVMValueRef result_ptr = LLVMBuildStructGEP(g->builder, frame_result_loc, frame_ret_start + 2, "");
             return LLVMBuildLoad(g->builder, result_ptr, "");
         }