Commit d410693dad

Andrew Kelley <andrew@ziglang.org>
2022-05-31 09:19:23
LLVM: elide some loads when lowering
Generally, the load instruction may need to make a copy of an isByRef=true value, such as in the case of the following code: ```zig pub fn swap(comptime T: type, a: *T, b: *T) void { const tmp = a.*; a.* = b.*; b.* = tmp; } ``` However, it only needs to do so if there are any instructions which can possibly write to memory. When calling functions with isByRef=true parameters, the AIR code that is generated looks like loads followed directly by call. This allows for a peephole optimization when lowering loads: if the load instruction operates on an isByRef=true type and dies before any side effects occur, then we can safely lower the load as a no-op that returns its operand. This is one out of three changes I intend to make to address #11498. However I will put these changes in separate branches and merge them separately so that we can have three independent points on the perf charts.
1 parent 26aea8c
Changed files (2)
src/codegen/llvm.zig
@@ -3885,7 +3885,7 @@ pub const FuncGen = struct {
 
     fn genBody(self: *FuncGen, body: []const Air.Inst.Index) Error!void {
         const air_tags = self.air.instructions.items(.tag);
-        for (body) |inst| {
+        for (body) |inst, i| {
             const opt_value: ?*const llvm.Value = switch (air_tags[inst]) {
                 // zig fmt: off
                 .add       => try self.airAdd(inst),
@@ -3976,7 +3976,7 @@ pub const FuncGen = struct {
                 .fptrunc        => try self.airFptrunc(inst),
                 .fpext          => try self.airFpext(inst),
                 .ptrtoint       => try self.airPtrToInt(inst),
-                .load           => try self.airLoad(inst),
+                .load           => try self.airLoad(inst, body, i + 1),
                 .loop           => try self.airLoop(inst),
                 .not            => try self.airNot(inst),
                 .ret            => try self.airRet(inst),
@@ -6982,11 +6982,33 @@ pub const FuncGen = struct {
         return null;
     }
 
-    fn airLoad(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airLoad(
+        self: *FuncGen,
+        inst: Air.Inst.Index,
+        body: []const Air.Inst.Index,
+        body_i: usize,
+    ) !?*const llvm.Value {
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
         const ptr_ty = self.air.typeOf(ty_op.operand);
-        if (!ptr_ty.isVolatilePtr() and self.liveness.isUnused(inst))
-            return null;
+        elide: {
+            const ptr_info = ptr_ty.ptrInfo().data;
+            if (ptr_info.@"volatile") break :elide;
+            if (self.liveness.isUnused(inst)) return null;
+            if (!isByRef(ptr_info.pointee_type)) break :elide;
+
+            // It would be valid to fall back to the code below here that simply calls
+            // load(). However, as an optimization, we want to avoid unnecessary copies
+            // of isByRef=true types. Here, we scan forward in the current block,
+            // looking to see if this load dies before any side effects occur.
+            // In such case, we can safely return the operand without making a copy.
+            for (body[body_i..]) |body_inst| {
+                switch (self.liveness.categorizeOperand(self.air, body_inst, inst)) {
+                    .none => continue,
+                    .write, .noret, .complex => break :elide,
+                    .tomb => return try self.resolveInst(ty_op.operand),
+                }
+            } else unreachable;
+        }
         const ptr = try self.resolveInst(ty_op.operand);
         return self.load(ptr, ptr_ty);
     }
src/Liveness.zig
@@ -112,6 +112,402 @@ pub fn clearOperandDeath(l: Liveness, inst: Air.Inst.Index, operand: OperandInt)
     l.tomb_bits[usize_index] &= ~mask;
 }
 
+const OperandCategory = enum {
+    /// The operand lives on, but this instruction cannot possibly mutate memory.
+    none,
+    /// The operand lives on and this instruction can mutate memory.
+    write,
+    /// The operand dies at this instruction.
+    tomb,
+    /// The operand lives on, and this instruction is noreturn.
+    noret,
+    /// This instruction is too complicated for analysis, no information is available.
+    complex,
+};
+
+/// Given an instruction that we are examining, and an operand that we are looking for,
+/// returns a classification.
+pub fn categorizeOperand(
+    l: Liveness,
+    air: Air,
+    inst: Air.Inst.Index,
+    operand: Air.Inst.Index,
+) OperandCategory {
+    const air_tags = air.instructions.items(.tag);
+    const air_datas = air.instructions.items(.data);
+    const operand_ref = Air.indexToRef(operand);
+    switch (air_tags[inst]) {
+        .add,
+        .addwrap,
+        .add_sat,
+        .sub,
+        .subwrap,
+        .sub_sat,
+        .mul,
+        .mulwrap,
+        .mul_sat,
+        .div_float,
+        .div_trunc,
+        .div_floor,
+        .div_exact,
+        .rem,
+        .mod,
+        .bit_and,
+        .bit_or,
+        .xor,
+        .cmp_lt,
+        .cmp_lte,
+        .cmp_eq,
+        .cmp_gte,
+        .cmp_gt,
+        .cmp_neq,
+        .bool_and,
+        .bool_or,
+        .array_elem_val,
+        .slice_elem_val,
+        .ptr_elem_val,
+        .shl,
+        .shl_exact,
+        .shl_sat,
+        .shr,
+        .shr_exact,
+        .min,
+        .max,
+        => {
+            const o = air_datas[inst].bin_op;
+            if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            if (o.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
+            return .none;
+        },
+
+        .store,
+        .atomic_store_unordered,
+        .atomic_store_monotonic,
+        .atomic_store_release,
+        .atomic_store_seq_cst,
+        .set_union_tag,
+        => {
+            const o = air_datas[inst].bin_op;
+            if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
+            if (o.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
+            return .write;
+        },
+
+        .arg,
+        .alloc,
+        .ret_ptr,
+        .constant,
+        .const_ty,
+        .breakpoint,
+        .dbg_stmt,
+        .dbg_inline_begin,
+        .dbg_inline_end,
+        .dbg_block_begin,
+        .dbg_block_end,
+        .unreach,
+        .ret_addr,
+        .frame_addr,
+        .wasm_memory_size,
+        .err_return_trace,
+        => return .none,
+
+        .fence => return .write,
+
+        .not,
+        .bitcast,
+        .load,
+        .fpext,
+        .fptrunc,
+        .intcast,
+        .trunc,
+        .optional_payload,
+        .optional_payload_ptr,
+        .wrap_optional,
+        .unwrap_errunion_payload,
+        .unwrap_errunion_err,
+        .unwrap_errunion_payload_ptr,
+        .unwrap_errunion_err_ptr,
+        .wrap_errunion_payload,
+        .wrap_errunion_err,
+        .slice_ptr,
+        .slice_len,
+        .ptr_slice_len_ptr,
+        .ptr_slice_ptr_ptr,
+        .struct_field_ptr_index_0,
+        .struct_field_ptr_index_1,
+        .struct_field_ptr_index_2,
+        .struct_field_ptr_index_3,
+        .array_to_slice,
+        .float_to_int,
+        .int_to_float,
+        .get_union_tag,
+        .clz,
+        .ctz,
+        .popcount,
+        .byte_swap,
+        .bit_reverse,
+        .splat,
+        => {
+            const o = air_datas[inst].ty_op;
+            if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+
+        .optional_payload_ptr_set,
+        .errunion_payload_ptr_set,
+        => {
+            const o = air_datas[inst].ty_op;
+            if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
+            return .write;
+        },
+
+        .is_null,
+        .is_non_null,
+        .is_null_ptr,
+        .is_non_null_ptr,
+        .is_err,
+        .is_non_err,
+        .is_err_ptr,
+        .is_non_err_ptr,
+        .ptrtoint,
+        .bool_to_int,
+        .tag_name,
+        .error_name,
+        .sqrt,
+        .sin,
+        .cos,
+        .tan,
+        .exp,
+        .exp2,
+        .log,
+        .log2,
+        .log10,
+        .fabs,
+        .floor,
+        .ceil,
+        .round,
+        .trunc_float,
+        .cmp_lt_errors_len,
+        => {
+            const o = air_datas[inst].un_op;
+            if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+
+        .ret,
+        .ret_load,
+        => {
+            const o = air_datas[inst].un_op;
+            if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .noret);
+            return .noret;
+        },
+
+        .set_err_return_trace => {
+            const o = air_datas[inst].un_op;
+            if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
+            return .write;
+        },
+
+        .add_with_overflow,
+        .sub_with_overflow,
+        .mul_with_overflow,
+        .shl_with_overflow,
+        .ptr_add,
+        .ptr_sub,
+        .ptr_elem_ptr,
+        .slice_elem_ptr,
+        .slice,
+        => {
+            const ty_pl = air_datas[inst].ty_pl;
+            const extra = air.extraData(Air.Bin, ty_pl.payload).data;
+            if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
+            return .none;
+        },
+
+        .dbg_var_ptr,
+        .dbg_var_val,
+        => {
+            const o = air_datas[inst].pl_op.operand;
+            if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+
+        .prefetch => {
+            const prefetch = air_datas[inst].prefetch;
+            if (prefetch.ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+
+        .call, .call_always_tail, .call_never_tail, .call_never_inline => {
+            const inst_data = air_datas[inst].pl_op;
+            const callee = inst_data.operand;
+            const extra = air.extraData(Air.Call, inst_data.payload);
+            const args = @ptrCast([]const Air.Inst.Ref, air.extra[extra.end..][0..extra.data.args_len]);
+            if (args.len + 1 <= bpi - 1) {
+                if (callee == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
+                for (args) |arg, i| {
+                    if (arg == operand_ref) return matchOperandSmallIndex(l, inst, @intCast(OperandInt, i + 1), .write);
+                }
+                return .write;
+            }
+            var bt = l.iterateBigTomb(inst);
+            if (bt.feed()) {
+                if (callee == operand_ref) return .tomb;
+            } else {
+                if (callee == operand_ref) return .write;
+            }
+            for (args) |arg| {
+                if (bt.feed()) {
+                    if (arg == operand_ref) return .tomb;
+                } else {
+                    if (arg == operand_ref) return .write;
+                }
+            }
+            return .write;
+        },
+        .select => {
+            const pl_op = air_datas[inst].pl_op;
+            const extra = air.extraData(Air.Bin, pl_op.payload).data;
+            if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
+            if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
+            return .none;
+        },
+        .shuffle => {
+            const extra = air.extraData(Air.Shuffle, air_datas[inst].ty_pl.payload).data;
+            if (extra.a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
+            return .none;
+        },
+        .reduce => {
+            const reduce = air_datas[inst].reduce;
+            if (reduce.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+        .cmp_vector => {
+            const extra = air.extraData(Air.VectorCmp, air_datas[inst].ty_pl.payload).data;
+            if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
+            return .none;
+        },
+        .aggregate_init => {
+            const ty_pl = air_datas[inst].ty_pl;
+            const aggregate_ty = air.getRefType(ty_pl.ty);
+            const len = @intCast(usize, aggregate_ty.arrayLen());
+            const elements = @ptrCast([]const Air.Inst.Ref, air.extra[ty_pl.payload..][0..len]);
+
+            if (elements.len <= bpi - 1) {
+                for (elements) |elem, i| {
+                    if (elem == operand_ref) return matchOperandSmallIndex(l, inst, @intCast(OperandInt, i), .none);
+                }
+                return .none;
+            }
+
+            var bt = l.iterateBigTomb(inst);
+            for (elements) |elem| {
+                if (bt.feed()) {
+                    if (elem == operand_ref) return .tomb;
+                } else {
+                    if (elem == operand_ref) return .write;
+                }
+            }
+            return .write;
+        },
+        .union_init => {
+            const extra = air.extraData(Air.UnionInit, air_datas[inst].ty_pl.payload).data;
+            if (extra.init == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+        .struct_field_ptr, .struct_field_val => {
+            const extra = air.extraData(Air.StructField, air_datas[inst].ty_pl.payload).data;
+            if (extra.struct_operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+        .field_parent_ptr => {
+            const extra = air.extraData(Air.FieldParentPtr, air_datas[inst].ty_pl.payload).data;
+            if (extra.field_ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+        .cmpxchg_strong, .cmpxchg_weak => {
+            const extra = air.extraData(Air.Cmpxchg, air_datas[inst].ty_pl.payload).data;
+            if (extra.ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
+            if (extra.expected_value == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
+            if (extra.new_value == operand_ref) return matchOperandSmallIndex(l, inst, 2, .write);
+            return .write;
+        },
+        .mul_add => {
+            const pl_op = air_datas[inst].pl_op;
+            const extra = air.extraData(Air.Bin, pl_op.payload).data;
+            if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
+            if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
+            return .none;
+        },
+        .atomic_load => {
+            const ptr = air_datas[inst].atomic_load.ptr;
+            if (ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+        .atomic_rmw => {
+            const pl_op = air_datas[inst].pl_op;
+            const extra = air.extraData(Air.AtomicRmw, pl_op.payload).data;
+            if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
+            if (extra.operand == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
+            return .write;
+        },
+        .memset,
+        .memcpy,
+        => {
+            const pl_op = air_datas[inst].pl_op;
+            const extra = air.extraData(Air.Bin, pl_op.payload).data;
+            if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
+            if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
+            if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .write);
+            return .write;
+        },
+
+        .br => {
+            const br = air_datas[inst].br;
+            if (br.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .noret);
+            return .noret;
+        },
+        .assembly => {
+            return .complex;
+        },
+        .block => {
+            return .complex;
+        },
+        .loop => {
+            return .complex;
+        },
+        .cond_br => {
+            return .complex;
+        },
+        .switch_br => {
+            return .complex;
+        },
+        .wasm_memory_grow => {
+            const pl_op = air_datas[inst].pl_op;
+            if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
+            return .none;
+        },
+    }
+}
+
+fn matchOperandSmallIndex(
+    l: Liveness,
+    inst: Air.Inst.Index,
+    operand: OperandInt,
+    default: OperandCategory,
+) OperandCategory {
+    if (operandDies(l, inst, operand)) {
+        return .tomb;
+    } else {
+        return default;
+    }
+}
+
 /// Higher level API.
 pub const CondBrSlices = struct {
     then_deaths: []const Air.Inst.Index,