Commit c4b83ea021

gracefu <81774659+gracefuu@users.noreply.github.com>
2021-04-09 07:51:00
stage2 x86_64: implement integer mul
This was also an experiment to see if it were easier to implement a new feature when using the instruction encoder. Verdict: It's not that much easier, but I think it's certainly much more readable, because the description of the Instruction annotates what each field means. Right now, precise knowledge of x86_64 instructions is still required because things like when to set the 64-bit flag, how to read x86_64 instruction references, etc. are still not automatically done for you. In the future, this interface might make it sligtly easier to write an assembler for x86_64, by abstracting the bit-fiddling aspects of instruction encoding.
1 parent 5bd464e
Changed files (4)
src/codegen.zig
@@ -1079,6 +1079,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             if (inst.base.isUnused())
                 return MCValue.dead;
             switch (arch) {
+                .x86_64 => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
                 .arm, .armeb => return try self.genArmMul(&inst.base, inst.lhs, inst.rhs),
                 else => return self.fail(inst.base.src, "TODO implement mul for {}", .{self.target.cpu.arch}),
             }
@@ -1574,6 +1575,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .sub, .subwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 5, 0x28),
                 .xor, .not => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 6, 0x30),
 
+                .mul, .mulwrap => try self.genX8664Imul(inst.src, inst.ty, dst_mcv, src_mcv),
                 else => unreachable,
             }
 
@@ -1795,6 +1797,153 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             }
         }
 
+        /// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
+        fn genX8664Imul(
+            self: *Self,
+            src: LazySrcLoc,
+            dst_ty: Type,
+            dst_mcv: MCValue,
+            src_mcv: MCValue,
+        ) !void {
+            switch (dst_mcv) {
+                .none => unreachable,
+                .undef => unreachable,
+                .dead, .unreach, .immediate => unreachable,
+                .compare_flags_unsigned => unreachable,
+                .compare_flags_signed => unreachable,
+                .ptr_stack_offset => unreachable,
+                .ptr_embedded_in_code => unreachable,
+                .register => |dst_reg| {
+                    switch (src_mcv) {
+                        .none => unreachable,
+                        .undef => try self.genSetReg(src, dst_ty, dst_reg, .undef),
+                        .dead, .unreach => unreachable,
+                        .ptr_stack_offset => unreachable,
+                        .ptr_embedded_in_code => unreachable,
+                        .register => |src_reg| {
+                            // register, register
+                            //
+                            // Use the following imul opcode
+                            // 0F AF /r: IMUL r32/64, r/m32/64
+                            try self.encodeX8664Instruction(src, Instruction{
+                                .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                .primary_opcode_2b = 0xaf,
+                                // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                //       https://github.com/ziglang/zig/issues/6515
+                                .modrm = @as(
+                                    ?Instruction.ModrmEffectiveAddress,
+                                    Instruction.ModrmEffectiveAddress{ .reg = src_reg },
+                                ),
+                                .reg = dst_reg,
+                            });
+                        },
+                        .immediate => |imm| {
+                            // register, immediate:
+                            // depends on size of immediate.
+                            //
+                            // immediate fits in i8:
+                            // 6B /r ib: IMUL r32/64, r/m32/64, imm8
+                            //
+                            // immediate fits in i32:
+                            // 69 /r id: IMUL r32/64, r/m32/64, imm32
+                            //
+                            // immediate is huge:
+                            // split into 2 instructions
+                            // 1) copy the 64 bit immediate into a tmp register
+                            // 2) perform register,register mul
+                            // 0F AF /r: IMUL r32/64, r/m32/64
+                            if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
+                                try self.encodeX8664Instruction(src, Instruction{
+                                    .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                    .primary_opcode_1b = 0x6B,
+                                    .reg = dst_reg,
+                                    // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                    //       https://github.com/ziglang/zig/issues/6515
+                                    .modrm = @as(
+                                        ?Instruction.ModrmEffectiveAddress,
+                                        Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
+                                    ),
+                                    .immediate_bytes = 1,
+                                    .immediate = imm,
+                                });
+                            } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
+                                try self.encodeX8664Instruction(src, Instruction{
+                                    .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                    .primary_opcode_1b = 0x69,
+                                    .reg = dst_reg,
+                                    // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                    //       https://github.com/ziglang/zig/issues/6515
+                                    .modrm = @as(
+                                        ?Instruction.ModrmEffectiveAddress,
+                                        Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
+                                    ),
+                                    .immediate_bytes = 4,
+                                    .immediate = imm,
+                                });
+                            } else {
+                                const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv);
+                                return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg });
+                            }
+                        },
+                        .embedded_in_code, .memory, .stack_offset => {
+                            return self.fail(src, "TODO implement x86 multiply source memory", .{});
+                        },
+                        .compare_flags_unsigned => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
+                        },
+                        .compare_flags_signed => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
+                        },
+                    }
+                },
+                .stack_offset => |off| {
+                    switch (src_mcv) {
+                        .none => unreachable,
+                        .undef => return self.genSetStack(src, dst_ty, off, .undef),
+                        .dead, .unreach => unreachable,
+                        .ptr_stack_offset => unreachable,
+                        .ptr_embedded_in_code => unreachable,
+                        .register => |src_reg| {
+                            // copy dst to a register
+                            const dst_reg = try self.copyToTmpRegister(src, dst_ty, dst_mcv);
+                            // multiply into dst_reg
+                            // register, register
+                            // Use the following imul opcode
+                            // 0F AF /r: IMUL r32/64, r/m32/64
+                            try self.encodeX8664Instruction(src, Instruction{
+                                .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                .primary_opcode_2b = 0xaf,
+                                // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                //       https://github.com/ziglang/zig/issues/6515
+                                .modrm = @as(
+                                    ?Instruction.ModrmEffectiveAddress,
+                                    Instruction.ModrmEffectiveAddress{ .reg = src_reg },
+                                ),
+                                .reg = dst_reg,
+                            });
+                            // copy dst_reg back out
+                            return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg });
+                        },
+                        .immediate => |imm| {
+                            return self.fail(src, "TODO implement x86 multiply source immediate", .{});
+                        },
+                        .embedded_in_code, .memory, .stack_offset => {
+                            return self.fail(src, "TODO implement x86 multiply source memory", .{});
+                        },
+                        .compare_flags_unsigned => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
+                        },
+                        .compare_flags_signed => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
+                        },
+                    }
+                },
+                .embedded_in_code, .memory => {
+                    return self.fail(src, "TODO implement x86 multiply destination memory", .{});
+                },
+            }
+        }
+
         fn genX8664ModRMRegToStack(self: *Self, src: LazySrcLoc, ty: Type, off: u32, reg: Register, opcode: u8) !void {
             const abi_size = ty.abiSize(self.target.*);
             const adj_off = off + abi_size;
src/Module.zig
@@ -4330,6 +4330,33 @@ pub fn intSub(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
     }
 }
 
+pub fn intMul(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first.
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const lhs_bigint = lhs.toBigInt(&lhs_space);
+    const rhs_bigint = rhs.toBigInt(&rhs_space);
+    const limbs = try allocator.alloc(
+        std.math.big.Limb,
+        lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1,
+    );
+    var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+    var limbs_buffer = try allocator.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
+    );
+    defer allocator.free(limbs_buffer);
+    result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, allocator);
+    const result_limbs = result_bigint.limbs[0..result_bigint.len];
+
+    if (result_bigint.positive) {
+        return Value.Tag.int_big_positive.create(allocator, result_limbs);
+    } else {
+        return Value.Tag.int_big_negative.create(allocator, result_limbs);
+    }
+}
+
 pub fn floatAdd(
     arena: *Allocator,
     float_type: Type,
@@ -4396,6 +4423,39 @@ pub fn floatSub(
     }
 }
 
+pub fn floatMul(
+    arena: *Allocator,
+    float_type: Type,
+    src: LazySrcLoc,
+    lhs: Value,
+    rhs: Value,
+) !Value {
+    switch (float_type.tag()) {
+        .f16 => {
+            @panic("TODO add __trunctfhf2 to compiler-rt");
+            //const lhs_val = lhs.toFloat(f16);
+            //const rhs_val = rhs.toFloat(f16);
+            //return Value.Tag.float_16.create(arena, lhs_val * rhs_val);
+        },
+        .f32 => {
+            const lhs_val = lhs.toFloat(f32);
+            const rhs_val = rhs.toFloat(f32);
+            return Value.Tag.float_32.create(arena, lhs_val * rhs_val);
+        },
+        .f64 => {
+            const lhs_val = lhs.toFloat(f64);
+            const rhs_val = rhs.toFloat(f64);
+            return Value.Tag.float_64.create(arena, lhs_val * rhs_val);
+        },
+        .f128, .comptime_float, .c_longdouble => {
+            const lhs_val = lhs.toFloat(f128);
+            const rhs_val = rhs.toFloat(f128);
+            return Value.Tag.float_128.create(arena, lhs_val * rhs_val);
+        },
+        else => unreachable,
+    }
+}
+
 pub fn simplePtrType(
     mod: *Module,
     arena: *Allocator,
src/Sema.zig
@@ -3885,6 +3885,13 @@ fn analyzeArithmetic(
                         try Module.floatSub(sema.arena, scalar_type, src, lhs_val, rhs_val);
                     break :blk val;
                 },
+                .mul => blk: {
+                    const val = if (is_int)
+                        try Module.intMul(sema.arena, lhs_val, rhs_val)
+                    else
+                        try Module.floatMul(sema.arena, scalar_type, src, lhs_val, rhs_val);
+                    break :blk val;
+                },
                 else => return sema.mod.fail(&block.base, src, "TODO Implement arithmetic operand '{s}'", .{@tagName(zir_tag)}),
             };
 
test/stage2/test.zig
@@ -358,6 +358,81 @@ pub fn addCases(ctx: *TestContext) !void {
         , &[_][]const u8{":2:15: error: incompatible types: 'bool' and 'comptime_int'"});
     }
 
+    {
+        var case = ctx.exe("multiplying numbers at runtime and comptime", linux_x64);
+        case.addCompareOutput(
+            \\export fn _start() noreturn {
+            \\    mul(3, 4);
+            \\
+            \\    exit();
+            \\}
+            \\
+            \\fn mul(a: u32, b: u32) void {
+            \\    if (a * b != 12) unreachable;
+            \\}
+            \\
+            \\fn exit() noreturn {
+            \\    asm volatile ("syscall"
+            \\        :
+            \\        : [number] "{rax}" (231),
+            \\          [arg1] "{rdi}" (0)
+            \\        : "rcx", "r11", "memory"
+            \\    );
+            \\    unreachable;
+            \\}
+        ,
+            "",
+        );
+        // comptime function call
+        case.addCompareOutput(
+            \\export fn _start() noreturn {
+            \\    exit();
+            \\}
+            \\
+            \\fn mul(a: u32, b: u32) u32 {
+            \\    return a * b;
+            \\}
+            \\
+            \\const x = mul(3, 4);
+            \\
+            \\fn exit() noreturn {
+            \\    asm volatile ("syscall"
+            \\        :
+            \\        : [number] "{rax}" (231),
+            \\          [arg1] "{rdi}" (x - 12)
+            \\        : "rcx", "r11", "memory"
+            \\    );
+            \\    unreachable;
+            \\}
+        ,
+            "",
+        );
+        // Inline function call
+        case.addCompareOutput(
+            \\export fn _start() noreturn {
+            \\    var x: usize = 5;
+            \\    const y = mul(2, 3, x);
+            \\    exit(y - 30);
+            \\}
+            \\
+            \\fn mul(a: usize, b: usize, c: usize) callconv(.Inline) usize {
+            \\    return a * b * c;
+            \\}
+            \\
+            \\fn exit(code: usize) noreturn {
+            \\    asm volatile ("syscall"
+            \\        :
+            \\        : [number] "{rax}" (231),
+            \\          [arg1] "{rdi}" (code)
+            \\        : "rcx", "r11", "memory"
+            \\    );
+            \\    unreachable;
+            \\}
+        ,
+            "",
+        );
+    }
+
     {
         var case = ctx.exe("assert function", linux_x64);
         case.addCompareOutput(
@@ -741,6 +816,7 @@ pub fn addCases(ctx: *TestContext) !void {
         case.addCompareOutput(
             \\export fn _start() noreturn {
             \\    assert(add(3, 4) == 1221);
+            \\    assert(mul(3, 4) == 21609);
             \\
             \\    exit();
             \\}
@@ -774,6 +850,32 @@ pub fn addCases(ctx: *TestContext) !void {
             \\    return z;
             \\}
             \\
+            \\fn mul(a: u32, b: u32) u32 {
+            \\    const x: u32 = blk: {
+            \\        const c = a * a * a * a; // 81
+            \\        const d = a * a * a * b; // 108
+            \\        const e = a * a * b * a; // 108
+            \\        const f = a * a * b * b; // 144
+            \\        const g = a * b * a * a; // 108
+            \\        const h = a * b * a * b; // 144
+            \\        const i = a * b * b * a; // 144
+            \\        const j = a * b * b * b; // 192
+            \\        const k = b * a * a * a; // 108
+            \\        const l = b * a * a * b; // 144
+            \\        const m = b * a * b * a; // 144
+            \\        const n = b * a * b * b; // 192
+            \\        const o = b * b * a * a; // 144
+            \\        const p = b * b * a * b; // 192
+            \\        const q = b * b * b * a; // 192
+            \\        const r = b * b * b * b; // 256
+            \\        const s = c + d + e + f + g + h + i + j + k + l + m + n + o + p + q + r; // 2401
+            \\        break :blk s;
+            \\    };
+            \\    const y = x * a; // 7203
+            \\    const z = y * a; // 21609
+            \\    return z;
+            \\}
+            \\
             \\pub fn assert(ok: bool) void {
             \\    if (!ok) unreachable; // assertion failure
             \\}