Commit 7bb6393b59

Isaac Freund <isaac.freund@coil.com>
2021-12-07 19:35:46
stage1: implement @prefetch() builtin
1 parent 175463d
src/stage1/all_types.hpp
@@ -898,6 +898,18 @@ struct AstNodeFnCallExpr {
     bool seen; // used by @compileLog
 };
 
+// Must be kept in sync with std.builtin.PrefetchOptions.Rw
+enum PrefetchRw {
+    PrefetchRwRead,
+    PrefetchRwWrite,
+};
+
+// Must be kept in sync with std.builtin.PrefetchOptions.Cache
+enum PrefetchCache {
+    PrefetchCacheInstruction,
+    PrefetchCacheData,
+};
+
 struct AstNodeArrayAccessExpr {
     AstNode *array_ref_expr;
     AstNode *subscript;
@@ -1818,6 +1830,7 @@ enum BuiltinFnId {
     BuiltinFnIdReduce,
     BuiltinFnIdMaximum,
     BuiltinFnIdMinimum,
+    BuiltinFnIdPrefetch,
 };
 
 struct BuiltinFnEntry {
@@ -2021,6 +2034,7 @@ struct CodeGen {
     LLVMValueRef return_err_fn;
     LLVMValueRef wasm_memory_size;
     LLVMValueRef wasm_memory_grow;
+    LLVMValueRef prefetch;
     LLVMTypeRef anyframe_fn_type;
 
     // reminder: hash tables must be initialized before use
@@ -2647,6 +2661,7 @@ enum Stage1ZirInstId : uint8_t {
     Stage1ZirInstIdWasmMemorySize,
     Stage1ZirInstIdWasmMemoryGrow,
     Stage1ZirInstIdSrc,
+    Stage1ZirInstIdPrefetch,
 };
 
 // ir_render_* functions in codegen.cpp consume Gen instructions and produce LLVM IR.
@@ -2743,6 +2758,7 @@ enum Stage1AirInstId : uint8_t {
     Stage1AirInstIdWasmMemorySize,
     Stage1AirInstIdWasmMemoryGrow,
     Stage1AirInstIdExtern,
+    Stage1AirInstIdPrefetch,
 };
 
 struct Stage1ZirInst {
@@ -3683,6 +3699,24 @@ struct Stage1ZirInstSrc {
     Stage1ZirInst base;
 };
 
+struct Stage1ZirInstPrefetch {
+    Stage1ZirInst base;
+
+    Stage1ZirInst *ptr;
+    Stage1ZirInst *options;
+};
+
+struct Stage1AirInstPrefetch {
+    Stage1AirInst base;
+
+    Stage1AirInst *ptr;
+    PrefetchRw rw;
+    // Must be in the range 0-3 inclusive
+    uint8_t locality;
+    PrefetchCache cache;
+};
+
+
 struct Stage1ZirInstSlice {
     Stage1ZirInst base;
 
src/stage1/astgen.cpp
@@ -349,6 +349,8 @@ void destroy_instruction_src(Stage1ZirInst *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<Stage1ZirInstWasmMemoryGrow *>(inst));
         case Stage1ZirInstIdSrc:
             return heap::c_allocator.destroy(reinterpret_cast<Stage1ZirInstSrc *>(inst));
+        case Stage1ZirInstIdPrefetch:
+            return heap::c_allocator.destroy(reinterpret_cast<Stage1ZirInstPrefetch *>(inst));
     }
     zig_unreachable();
 }
@@ -941,6 +943,10 @@ static constexpr Stage1ZirInstId ir_inst_id(Stage1ZirInstSrc *) {
     return Stage1ZirInstIdSrc;
 }
 
+static constexpr Stage1ZirInstId ir_inst_id(Stage1ZirInstPrefetch *) {
+    return Stage1ZirInstIdPrefetch;
+}
+
 template<typename T>
 static T *ir_create_instruction(Stage1AstGen *ag, Scope *scope, AstNode *source_node) {
     T *special_instruction = heap::c_allocator.create<T>();
@@ -2870,6 +2876,21 @@ static Stage1ZirInst *ir_build_src(Stage1AstGen *ag, Scope *scope, AstNode *sour
     return &instruction->base;
 }
 
+static Stage1ZirInst *ir_build_prefetch(Stage1AstGen *ag, Scope *scope, AstNode *source_node,
+        Stage1ZirInst *ptr, Stage1ZirInst *options)
+{
+    Stage1ZirInstPrefetch *prefetch_instruction = ir_build_instruction<Stage1ZirInstPrefetch>(
+            ag, scope, source_node);
+    prefetch_instruction->ptr = ptr;
+    prefetch_instruction->options = options;
+
+    ir_ref_instruction(ptr, ag->current_basic_block);
+    ir_ref_instruction(options, ag->current_basic_block);
+
+    return &prefetch_instruction->base;
+}
+
+
 static void ir_count_defers(Stage1AstGen *ag, Scope *inner_scope, Scope *outer_scope, size_t *results) {
     results[ReturnKindUnconditional] = 0;
     results[ReturnKindError] = 0;
@@ -5416,6 +5437,29 @@ static Stage1ZirInst *astgen_builtin_fn_call(Stage1AstGen *ag, Scope *scope, Ast
                 Stage1ZirInst *src_inst = ir_build_src(ag, scope, node);
                 return ir_lval_wrap(ag, scope, src_inst, lval, result_loc);
             }
+        case BuiltinFnIdPrefetch:
+            {
+                ZigType *options_type = get_builtin_type(ag->codegen, "PrefetchOptions");
+                Stage1ZirInst *options_type_inst = ir_build_const_type(ag, scope, node, options_type);
+                ResultLocCast *result_loc_cast = ir_build_cast_result_loc(ag, options_type_inst, no_result_loc());
+
+                AstNode *ptr_node = node->data.fn_call_expr.params.at(0);
+                Stage1ZirInst *ptr_value = astgen_node(ag, ptr_node, scope);
+                if (ptr_value == ag->codegen->invalid_inst_src)
+                    return ptr_value;
+
+                AstNode *options_node = node->data.fn_call_expr.params.at(1);
+                Stage1ZirInst *options_value = astgen_node_extra(ag, options_node,
+                    scope, LValNone, &result_loc_cast->base);
+                if (options_value == ag->codegen->invalid_inst_src)
+                    return options_value;
+
+                Stage1ZirInst *casted_options_value = ir_build_implicit_cast(
+                    ag, scope, options_node, options_value, result_loc_cast);
+
+                Stage1ZirInst *ir_extern = ir_build_prefetch(ag, scope, node, ptr_value, casted_options_value);
+                return ir_lval_wrap(ag, scope, ir_extern, lval, result_loc);
+            }
     }
     zig_unreachable();
 }
src/stage1/codegen.cpp
@@ -1139,6 +1139,24 @@ static LLVMValueRef gen_wasm_memory_grow(CodeGen *g) {
     return g->wasm_memory_grow;
 }
 
+static LLVMValueRef gen_prefetch(CodeGen *g) {
+    if (g->prefetch)
+        return g->prefetch;
+
+    // declare void @llvm.prefetch(i8*, i32, i32, i32)
+    LLVMTypeRef param_types[] = {
+        LLVMPointerType(LLVMInt8Type(), 0),
+        LLVMInt32Type(),
+        LLVMInt32Type(),
+        LLVMInt32Type(),
+    };
+    LLVMTypeRef fn_type = LLVMFunctionType(LLVMVoidType(), param_types, 4, false);
+    g->prefetch = LLVMAddFunction(g->module, "llvm.prefetch.p0i8", fn_type);
+    assert(LLVMGetIntrinsicID(g->prefetch));
+
+    return g->prefetch;
+}
+
 static LLVMValueRef get_stacksave_fn_val(CodeGen *g) {
     if (g->stacksave_fn_val)
         return g->stacksave_fn_val;
@@ -5899,6 +5917,52 @@ static LLVMValueRef ir_render_wasm_memory_grow(CodeGen *g, Stage1Air *executable
     return val;
 }
 
+static LLVMValueRef ir_render_prefetch(CodeGen *g, Stage1Air *executable, Stage1AirInstPrefetch *instruction) {
+    static_assert(PrefetchRwRead == 0, "");
+    static_assert(PrefetchRwWrite == 1, "");
+    assert(instruction->rw == PrefetchRwRead || instruction->rw == PrefetchRwWrite);
+
+    assert(instruction->locality >= 0 && instruction->locality <= 3);
+
+    static_assert(PrefetchCacheInstruction == 0, "");
+    static_assert(PrefetchCacheData == 1, "");
+    assert(instruction->cache == PrefetchCacheData || instruction->cache == PrefetchCacheInstruction);
+    
+    // LLVM fails during codegen of instruction cache prefetchs for these architectures.
+    // This is an LLVM bug as the prefetch intrinsic should be a noop if not supported by the target.
+    // To work around this, simply don't emit llvm.prefetch in this case.
+    // See https://bugs.llvm.org/show_bug.cgi?id=21037
+    if (instruction->cache == PrefetchCacheInstruction) {
+        switch (g->zig_target->arch) {
+            case ZigLLVM_x86:
+            case ZigLLVM_x86_64:
+                return nullptr;
+            default:
+                break;
+        }
+    }
+
+    // Another case of the same LLVM bug described above
+    if (instruction->rw == PrefetchRwWrite && instruction->cache == PrefetchCacheInstruction) {
+        switch (g->zig_target->arch) {
+            case ZigLLVM_arm:
+                return nullptr;
+            default:
+                break;
+        }
+
+    }
+
+    LLVMValueRef params[] = {
+        LLVMBuildBitCast(g->builder, ir_llvm_value(g, instruction->ptr), LLVMPointerType(LLVMInt8Type(), 0), ""),
+        LLVMConstInt(LLVMInt32Type(), instruction->rw, false),
+        LLVMConstInt(LLVMInt32Type(), instruction->locality, false),
+        LLVMConstInt(LLVMInt32Type(), instruction->cache, false),
+    };
+    LLVMValueRef val = LLVMBuildCall(g->builder, gen_prefetch(g), params, 4, "");
+    return val;
+}
+
 static LLVMValueRef ir_render_slice(CodeGen *g, Stage1Air *executable, Stage1AirInstSlice *instruction) {
     Error err;
 
@@ -7150,6 +7214,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, Stage1Air *executable, Sta
             return ir_render_wasm_memory_grow(g, executable, (Stage1AirInstWasmMemoryGrow *) instruction);
         case Stage1AirInstIdExtern:
             return ir_render_extern(g, executable, (Stage1AirInstExtern *) instruction);
+        case Stage1AirInstIdPrefetch:
+            return ir_render_prefetch(g, executable, (Stage1AirInstPrefetch *) instruction);
     }
     zig_unreachable();
 }
@@ -9120,6 +9186,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdReduce, "reduce", 2);
     create_builtin_fn(g, BuiltinFnIdMaximum, "maximum", 2);
     create_builtin_fn(g, BuiltinFnIdMinimum, "minimum", 2);
+    create_builtin_fn(g, BuiltinFnIdPrefetch, "prefetch", 2);
 }
 
 static const char *bool_to_str(bool b) {
src/stage1/ir.cpp
@@ -467,6 +467,8 @@ void destroy_instruction_gen(Stage1AirInst *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<Stage1AirInstWasmMemoryGrow *>(inst));
         case Stage1AirInstIdExtern:
             return heap::c_allocator.destroy(reinterpret_cast<Stage1AirInstExtern *>(inst));
+        case Stage1AirInstIdPrefetch:
+            return heap::c_allocator.destroy(reinterpret_cast<Stage1AirInstPrefetch *>(inst));
     }
     zig_unreachable();
 }
@@ -1115,6 +1117,10 @@ static constexpr Stage1AirInstId ir_inst_id(Stage1AirInstExtern *) {
     return Stage1AirInstIdExtern;
 }
 
+static constexpr Stage1AirInstId ir_inst_id(Stage1AirInstPrefetch *) {
+    return Stage1AirInstIdPrefetch;
+}
+
 template<typename T>
 static T *ir_create_inst_gen(IrBuilderGen *irb, Scope *scope, AstNode *source_node) {
     T *special_instruction = heap::c_allocator.create<T>();
@@ -24853,6 +24859,52 @@ static Stage1AirInst *ir_analyze_instruction_src(IrAnalyze *ira, Stage1ZirInstSr
     return ir_const_move(ira, instruction->base.scope, instruction->base.source_node, result);
 }
 
+static Stage1AirInst *ir_analyze_instruction_prefetch(IrAnalyze *ira, Stage1ZirInstPrefetch *instruction) {
+    Stage1AirInst *ptr = instruction->ptr->child;
+    if (type_is_invalid(ptr->value->type))
+        return ira->codegen->invalid_inst_gen;
+
+    Stage1AirInst *raw_options_inst = instruction->options->child;
+    if (type_is_invalid(raw_options_inst->value->type))
+        return ira->codegen->invalid_inst_gen;
+
+    ZigType *options_type = get_builtin_type(ira->codegen, "PrefetchOptions");
+    Stage1AirInst *options_inst = ir_implicit_cast(ira, raw_options_inst, options_type);
+    if (type_is_invalid(options_inst->value->type))
+        return ira->codegen->invalid_inst_gen;
+
+    ZigValue *options_val = ir_resolve_const(ira, options_inst, UndefBad);
+    if (options_val == nullptr)
+        return ira->codegen->invalid_inst_gen;
+
+    ZigValue *rw_val = get_const_field(ira, options_inst->source_node, options_val, "rw", 0);
+    if (rw_val == nullptr)
+        return ira->codegen->invalid_inst_gen;
+    PrefetchRw rw = (PrefetchRw)bigint_as_u8(&rw_val->data.x_enum_tag);
+
+    ZigValue *locality_val = get_const_field(ira, options_inst->source_node, options_val, "locality", 1);
+    if (locality_val == nullptr)
+        return ira->codegen->invalid_inst_gen;
+    uint8_t locality = bigint_as_u8(&locality_val->data.x_bigint);
+    assert(locality <= 3);
+
+    ZigValue *cache_val = get_const_field(ira, options_inst->source_node, options_val, "cache", 2);
+    if (cache_val == nullptr)
+        return ira->codegen->invalid_inst_gen;
+    PrefetchCache cache = (PrefetchCache)bigint_as_u8(&cache_val->data.x_enum_tag);
+
+    Stage1AirInstPrefetch *air_instruction = ir_build_inst_void<Stage1AirInstPrefetch>(&ira->new_irb,
+            instruction->base.scope, instruction->base.source_node);
+    air_instruction->ptr = ptr;
+    air_instruction->rw = rw;
+    air_instruction->locality = locality;
+    air_instruction->cache = cache;
+
+    ir_ref_inst_gen(ptr);
+
+    return &air_instruction->base;
+}
+
 static Stage1AirInst *ir_analyze_instruction_base(IrAnalyze *ira, Stage1ZirInst *instruction) {
     switch (instruction->id) {
         case Stage1ZirInstIdInvalid:
@@ -25138,6 +25190,8 @@ static Stage1AirInst *ir_analyze_instruction_base(IrAnalyze *ira, Stage1ZirInst
             return ir_analyze_instruction_wasm_memory_grow(ira, (Stage1ZirInstWasmMemoryGrow *)instruction);
         case Stage1ZirInstIdSrc:
             return ir_analyze_instruction_src(ira, (Stage1ZirInstSrc *)instruction);
+        case Stage1ZirInstIdPrefetch:
+            return ir_analyze_instruction_prefetch(ira, (Stage1ZirInstPrefetch *)instruction);
     }
     zig_unreachable();
 }
@@ -25305,6 +25359,7 @@ bool ir_inst_gen_has_side_effects(Stage1AirInst *instruction) {
         case Stage1AirInstIdSpillBegin:
         case Stage1AirInstIdWasmMemoryGrow:
         case Stage1AirInstIdExtern:
+        case Stage1AirInstIdPrefetch:
             return true;
 
         case Stage1AirInstIdPhi:
@@ -25444,6 +25499,7 @@ bool ir_inst_src_has_side_effects(Stage1ZirInst *instruction) {
         case Stage1ZirInstIdAwait:
         case Stage1ZirInstIdSpillBegin:
         case Stage1ZirInstIdWasmMemoryGrow:
+        case Stage1ZirInstIdPrefetch:
             return true;
 
         case Stage1ZirInstIdPhi:
src/stage1/ir_print.cpp
@@ -371,6 +371,8 @@ const char* ir_inst_src_type_str(Stage1ZirInstId id) {
             return "SrcWasmMemoryGrow";
         case Stage1ZirInstIdSrc:
             return "SrcSrc";
+        case Stage1ZirInstIdPrefetch:
+            return "SrcPrefetch";
     }
     zig_unreachable();
 }
@@ -559,6 +561,8 @@ const char* ir_inst_gen_type_str(Stage1AirInstId id) {
             return "GenWasmMemoryGrow";
         case Stage1AirInstIdExtern:
             return "GenExtern";
+        case Stage1AirInstIdPrefetch:
+            return "GenPrefetch";
     }
     zig_unreachable();
 }
@@ -2436,6 +2440,18 @@ static void ir_print_extern(IrPrintSrc *irp, Stage1ZirInstExtern *instruction) {
     fprintf(irp->f, ")");
 }
 
+static void ir_print_prefetch(IrPrintSrc *irp, Stage1ZirInstPrefetch *instruction) {
+    fprintf(irp->f, "@prefetch(");
+    ir_print_other_inst_src(irp, instruction->ptr);
+    fprintf(irp->f, ",");
+    ir_print_other_inst_src(irp, instruction->options);
+    fprintf(irp->f, ")");
+}
+
+static void ir_print_prefetch(IrPrintGen *irp, Stage1AirInstPrefetch *instruction) {
+    fprintf(irp->f, "@prefetch(...)");
+}
+
 static void ir_print_error_return_trace(IrPrintSrc *irp, Stage1ZirInstErrorReturnTrace *instruction) {
     fprintf(irp->f, "@errorReturnTrace(");
     switch (instruction->optional) {
@@ -3108,6 +3124,9 @@ static void ir_print_inst_src(IrPrintSrc *irp, Stage1ZirInst *instruction, bool
         case Stage1ZirInstIdSrc:
             ir_print_builtin_src(irp, (Stage1ZirInstSrc *)instruction);
             break;
+        case Stage1ZirInstIdPrefetch:
+            ir_print_prefetch(irp, (Stage1ZirInstPrefetch *)instruction);
+            break;
     }
     fprintf(irp->f, "\n");
 }
@@ -3387,6 +3406,9 @@ static void ir_print_inst_gen(IrPrintGen *irp, Stage1AirInst *instruction, bool
         case Stage1AirInstIdExtern:
             ir_print_extern(irp, (Stage1AirInstExtern *)instruction);
             break;
+        case Stage1AirInstIdPrefetch:
+            ir_print_prefetch(irp, (Stage1AirInstPrefetch *)instruction);
+            break;
 
     }
     fprintf(irp->f, "\n");
test/behavior/prefetch.zig
@@ -0,0 +1,27 @@
+const std = @import("std");
+
+test "@prefetch()" {
+    var a: u32 = 42;
+
+    @prefetch(&a, .{});
+
+    @prefetch(&a, .{ .rw = .read, .locality = 3, .cache = .data });
+    @prefetch(&a, .{ .rw = .read, .locality = 2, .cache = .data });
+    @prefetch(&a, .{ .rw = .read, .locality = 1, .cache = .data });
+    @prefetch(&a, .{ .rw = .read, .locality = 0, .cache = .data });
+
+    @prefetch(&a, .{ .rw = .write, .locality = 3, .cache = .data });
+    @prefetch(&a, .{ .rw = .write, .locality = 2, .cache = .data });
+    @prefetch(&a, .{ .rw = .write, .locality = 1, .cache = .data });
+    @prefetch(&a, .{ .rw = .write, .locality = 0, .cache = .data });
+
+    @prefetch(&a, .{ .rw = .read, .locality = 3, .cache = .instruction });
+    @prefetch(&a, .{ .rw = .read, .locality = 2, .cache = .instruction });
+    @prefetch(&a, .{ .rw = .read, .locality = 1, .cache = .instruction });
+    @prefetch(&a, .{ .rw = .read, .locality = 0, .cache = .instruction });
+
+    @prefetch(&a, .{ .rw = .write, .locality = 3, .cache = .instruction });
+    @prefetch(&a, .{ .rw = .write, .locality = 2, .cache = .instruction });
+    @prefetch(&a, .{ .rw = .write, .locality = 1, .cache = .instruction });
+    @prefetch(&a, .{ .rw = .write, .locality = 0, .cache = .instruction });
+}
test/behavior.zig
@@ -169,6 +169,7 @@ test {
                 _ = @import("behavior/optional_stage1.zig");
                 _ = @import("behavior/pointers_stage1.zig");
                 _ = @import("behavior/popcount_stage1.zig");
+                _ = @import("behavior/prefetch.zig");
                 _ = @import("behavior/ptrcast_stage1.zig");
                 _ = @import("behavior/reflection.zig");
                 _ = @import("behavior/saturating_arithmetic_stage1.zig");