Commit caa4e30ef4

Jakub Konka <kubkon@jakubkonka.com>
2022-03-01 15:21:10
x64: impl airMemset using inline memset
1 parent 5a6f439
Changed files (3)
src
arch
test
src/arch/x86_64/CodeGen.zig
@@ -4413,9 +4413,12 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
                 const reg = try self.copyToTmpRegister(ty, mcv);
                 return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
             }
-            try self.genInlineMemset(stack_offset, ty, .{ .immediate = 0xaa }, .{
-                .dest_stack_base = .rsp,
-            });
+            try self.genInlineMemset(
+                .{ .stack_offset = stack_offset },
+                .{ .immediate = 0xaa },
+                .{ .immediate = abi_size },
+                .{ .dest_stack_base = .rsp },
+            );
         },
         .compare_flags_unsigned,
         .compare_flags_signed,
@@ -4519,7 +4522,12 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
                 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }, opts),
                 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }, opts),
                 8 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }, opts),
-                else => return self.genInlineMemset(stack_offset, ty, .{ .immediate = 0xaa }, opts),
+                else => |x| return self.genInlineMemset(
+                    .{ .stack_offset = stack_offset },
+                    .{ .immediate = 0xaa },
+                    .{ .immediate = x },
+                    opts,
+                ),
             }
         },
         .compare_flags_unsigned,
@@ -4701,8 +4709,8 @@ fn genInlineMemcpy(
 
     const dst_addr_reg = try self.register_manager.allocReg(null);
     switch (dst_ptr) {
-        .got_load => unreachable,
         .memory,
+        .got_load,
         .direct_load,
         => {
             try self.loadMemPtrIntoRegister(dst_addr_reg, Type.usize, dst_ptr);
@@ -4737,8 +4745,8 @@ fn genInlineMemcpy(
 
     const src_addr_reg = try self.register_manager.allocReg(null);
     switch (src_ptr) {
-        .got_load => unreachable,
         .memory,
+        .got_load,
         .direct_load,
         => {
             try self.loadMemPtrIntoRegister(src_addr_reg, Type.usize, src_ptr);
@@ -4872,39 +4880,52 @@ fn genInlineMemcpy(
 
 fn genInlineMemset(
     self: *Self,
-    stack_offset: i32,
-    ty: Type,
+    dst_ptr: MCValue,
     value: MCValue,
+    len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
     try self.register_manager.getReg(.rax, null);
+    self.register_manager.freezeRegs(&.{.rax});
+    defer self.register_manager.unfreezeRegs(&.{.rax});
 
-    const abi_size = ty.abiSize(self.target.*);
-    const negative_offset = @bitCast(u32, -stack_offset);
-
-    // We are actually counting `abi_size` bytes; however, we reuse the index register
-    // as both the counter and offset scaler, hence we need to subtract one from `abi_size`
-    // and count until -1.
-    if (abi_size > math.maxInt(i32)) {
-        // movabs rax, abi_size - 1
-        const payload = try self.addExtra(Mir.Imm64.encode(abi_size - 1));
-        _ = try self.addInst(.{
-            .tag = .movabs,
-            .ops = (Mir.Ops{
-                .reg1 = .rax,
-            }).encode(),
-            .data = .{ .payload = payload },
-        });
-    } else {
-        // mov rax, abi_size - 1
-        _ = try self.addInst(.{
-            .tag = .mov,
-            .ops = (Mir.Ops{
-                .reg1 = .rax,
-            }).encode(),
-            .data = .{ .imm = @truncate(u32, abi_size - 1) },
-        });
+    const addr_reg = try self.register_manager.allocReg(null);
+    switch (dst_ptr) {
+        .memory,
+        .got_load,
+        .direct_load,
+        => {
+            try self.loadMemPtrIntoRegister(addr_reg, Type.usize, dst_ptr);
+        },
+        .ptr_stack_offset, .stack_offset => |off| {
+            _ = try self.addInst(.{
+                .tag = .lea,
+                .ops = (Mir.Ops{
+                    .reg1 = addr_reg.to64(),
+                    .reg2 = opts.dest_stack_base orelse .rbp,
+                }).encode(),
+                .data = .{ .imm = @bitCast(u32, -off) },
+            });
+        },
+        .register => |reg| {
+            _ = try self.addInst(.{
+                .tag = .mov,
+                .ops = (Mir.Ops{
+                    .reg1 = registerAlias(addr_reg, @divExact(reg.size(), 8)),
+                    .reg2 = reg,
+                }).encode(),
+                .data = undefined,
+            });
+        },
+        else => {
+            return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
+        },
     }
+    self.register_manager.freezeRegs(&.{addr_reg});
+    defer self.register_manager.unfreezeRegs(&.{addr_reg});
+
+    try self.genSetReg(Type.usize, .rax, len);
+    try self.genBinMathOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 });
 
     // loop:
     // cmp rax, -1
@@ -4930,13 +4951,13 @@ fn genInlineMemset(
             }
             // mov byte ptr [rbp + rax + stack_offset], imm
             const payload = try self.addExtra(Mir.ImmPair{
-                .dest_off = negative_offset,
+                .dest_off = 0,
                 .operand = @truncate(u32, x),
             });
             _ = try self.addInst(.{
                 .tag = .mov_mem_index_imm,
                 .ops = (Mir.Ops{
-                    .reg1 = opts.dest_stack_base orelse .rbp,
+                    .reg1 = addr_reg,
                 }).encode(),
                 .data = .{ .payload = payload },
             });
@@ -5301,8 +5322,24 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr
 }
 
 fn airMemset(self: *Self, inst: Air.Inst.Index) !void {
-    _ = inst;
-    return self.fail("TODO implement airMemset for {}", .{self.target.cpu.arch});
+    const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+    const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+
+    const dst_ptr = try self.resolveInst(pl_op.operand);
+    dst_ptr.freezeIfRegister(&self.register_manager);
+    defer dst_ptr.unfreezeIfRegister(&self.register_manager);
+
+    const src_val = try self.resolveInst(extra.lhs);
+    src_val.freezeIfRegister(&self.register_manager);
+    defer src_val.unfreezeIfRegister(&self.register_manager);
+
+    const len = try self.resolveInst(extra.rhs);
+    len.freezeIfRegister(&self.register_manager);
+    defer len.unfreezeIfRegister(&self.register_manager);
+
+    try self.genInlineMemset(dst_ptr, src_val, len, .{});
+
+    return self.finishAir(inst, .none, .{ pl_op.operand, .none, .none });
 }
 
 fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
@@ -5313,6 +5350,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     dst_ptr.freezeIfRegister(&self.register_manager);
     defer dst_ptr.unfreezeIfRegister(&self.register_manager);
 
+    const src_ty = self.air.typeOf(extra.lhs);
     const src_ptr = try self.resolveInst(extra.lhs);
     src_ptr.freezeIfRegister(&self.register_manager);
     defer src_ptr.unfreezeIfRegister(&self.register_manager);
@@ -5321,8 +5359,30 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     len.freezeIfRegister(&self.register_manager);
     defer len.unfreezeIfRegister(&self.register_manager);
 
-    log.warn("dst_ptr = {}, src_ptr = {}, len = {}", .{ dst_ptr, src_ptr, len });
-    try self.genInlineMemcpy(dst_ptr, src_ptr, len, .{});
+    // TODO Is this the only condition for pointer dereference for memcpy?
+    const src: MCValue = blk: {
+        switch (src_ptr) {
+            .got_load, .direct_load, .memory => {
+                const reg = try self.register_manager.allocReg(null);
+                try self.loadMemPtrIntoRegister(reg, src_ty, src_ptr);
+                _ = try self.addInst(.{
+                    .tag = .mov,
+                    .ops = (Mir.Ops{
+                        .reg1 = reg,
+                        .reg2 = reg,
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = .{ .imm = 0 },
+                });
+                break :blk MCValue{ .register = reg };
+            },
+            else => break :blk src_ptr,
+        }
+    };
+    src.freezeIfRegister(&self.register_manager);
+    defer src.unfreezeIfRegister(&self.register_manager);
+
+    try self.genInlineMemcpy(dst_ptr, src, len, .{});
 
     return self.finishAir(inst, .none, .{ pl_op.operand, .none, .none });
 }
test/behavior/basic.zig
@@ -341,7 +341,6 @@ fn f2(x: bool) []const u8 {
 test "memcpy and memset intrinsics" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
 
     try testMemcpyMemset();
test/behavior/struct.zig
@@ -80,12 +80,11 @@ const StructWithNoFields = struct {
 const StructFoo = struct {
     a: i32,
     b: bool,
-    c: f32,
+    c: u64,
 };
 
 test "structs" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
 
     var foo: StructFoo = undefined;